summaryrefslogtreecommitdiffstats
path: root/src/rgw
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 18:24:20 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 18:24:20 +0000
commit483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch)
treee5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/rgw
parentInitial commit. (diff)
downloadceph-upstream.tar.xz
ceph-upstream.zip
Adding upstream version 14.2.21.upstream/14.2.21upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--src/rgw/CMakeLists.txt407
-rw-r--r--src/rgw/librgw.cc729
-rw-r--r--src/rgw/librgw_admin_user.cc186
-rwxr-xr-xsrc/rgw/rgw-orphan-list144
-rw-r--r--src/rgw/rgw_acl.cc174
-rw-r--r--src/rgw/rgw_acl.h469
-rw-r--r--src/rgw/rgw_acl_s3.cc616
-rw-r--r--src/rgw/rgw_acl_s3.h111
-rw-r--r--src/rgw/rgw_acl_swift.cc430
-rw-r--r--src/rgw/rgw_acl_swift.h55
-rw-r--r--src/rgw/rgw_admin.cc8463
-rw-r--r--src/rgw/rgw_admin_user.cc91
-rw-r--r--src/rgw/rgw_admin_user.h43
-rw-r--r--src/rgw/rgw_aio.h80
-rw-r--r--src/rgw/rgw_aio_throttle.cc157
-rw-r--r--src/rgw/rgw_aio_throttle.h83
-rw-r--r--src/rgw/rgw_amqp.cc1035
-rw-r--r--src/rgw/rgw_amqp.h77
-rw-r--r--src/rgw/rgw_arn.cc385
-rw-r--r--src/rgw/rgw_arn.h121
-rw-r--r--src/rgw/rgw_asio_client.cc188
-rw-r--r--src/rgw/rgw_asio_client.h62
-rw-r--r--src/rgw/rgw_asio_frontend.cc834
-rw-r--r--src/rgw/rgw_asio_frontend.h28
-rw-r--r--src/rgw/rgw_auth.cc722
-rw-r--r--src/rgw/rgw_auth.h696
-rw-r--r--src/rgw/rgw_auth_filters.h290
-rw-r--r--src/rgw/rgw_auth_keystone.cc491
-rw-r--r--src/rgw/rgw_auth_keystone.h130
-rw-r--r--src/rgw/rgw_auth_registry.h101
-rw-r--r--src/rgw/rgw_auth_s3.cc1135
-rw-r--r--src/rgw/rgw_auth_s3.h615
-rw-r--r--src/rgw/rgw_b64.h87
-rw-r--r--src/rgw/rgw_basic_types.cc44
-rw-r--r--src/rgw/rgw_basic_types.h213
-rw-r--r--src/rgw/rgw_bucket.cc3178
-rw-r--r--src/rgw/rgw_bucket.h575
-rw-r--r--src/rgw/rgw_cache.cc353
-rw-r--r--src/rgw/rgw_cache.h219
-rw-r--r--src/rgw/rgw_civetweb.cc248
-rw-r--r--src/rgw/rgw_civetweb.h59
-rw-r--r--src/rgw/rgw_civetweb_frontend.cc153
-rw-r--r--src/rgw/rgw_civetweb_log.cc24
-rw-r--r--src/rgw/rgw_civetweb_log.h10
-rw-r--r--src/rgw/rgw_client_io.cc34
-rw-r--r--src/rgw/rgw_client_io.h439
-rw-r--r--src/rgw/rgw_client_io_filters.h456
-rw-r--r--src/rgw/rgw_common.cc1921
-rw-r--r--src/rgw/rgw_common.h2742
-rw-r--r--src/rgw/rgw_compression.cc201
-rw-r--r--src/rgw/rgw_compression.h60
-rw-r--r--src/rgw/rgw_coroutine.cc1058
-rw-r--r--src/rgw/rgw_coroutine.h674
-rw-r--r--src/rgw/rgw_cors.cc194
-rw-r--r--src/rgw/rgw_cors.h136
-rw-r--r--src/rgw/rgw_cors_s3.cc245
-rw-r--r--src/rgw/rgw_cors_s3.h56
-rw-r--r--src/rgw/rgw_cors_swift.h76
-rw-r--r--src/rgw/rgw_cr_rados.cc916
-rw-r--r--src/rgw/rgw_cr_rados.h1351
-rw-r--r--src/rgw/rgw_cr_rest.cc349
-rw-r--r--src/rgw/rgw_cr_rest.h593
-rw-r--r--src/rgw/rgw_cr_tools.cc275
-rw-r--r--src/rgw/rgw_cr_tools.h75
-rw-r--r--src/rgw/rgw_crypt.cc1317
-rw-r--r--src/rgw/rgw_crypt.h152
-rw-r--r--src/rgw/rgw_crypt_sanitize.cc88
-rw-r--r--src/rgw/rgw_crypt_sanitize.h71
-rw-r--r--src/rgw/rgw_data_sync.cc3709
-rw-r--r--src/rgw/rgw_data_sync.h625
-rw-r--r--src/rgw/rgw_dencoder.cc564
-rw-r--r--src/rgw/rgw_dmclock.h54
-rw-r--r--src/rgw/rgw_dmclock_async_scheduler.cc175
-rw-r--r--src/rgw/rgw_dmclock_async_scheduler.h217
-rw-r--r--src/rgw/rgw_dmclock_scheduler.h89
-rw-r--r--src/rgw/rgw_dmclock_scheduler_ctx.cc177
-rw-r--r--src/rgw/rgw_dmclock_scheduler_ctx.h118
-rw-r--r--src/rgw/rgw_dmclock_sync_scheduler.cc114
-rw-r--r--src/rgw/rgw_dmclock_sync_scheduler.h79
-rw-r--r--src/rgw/rgw_env.cc141
-rw-r--r--src/rgw/rgw_es_main.cc76
-rw-r--r--src/rgw/rgw_es_query.cc694
-rw-r--r--src/rgw/rgw_es_query.h165
-rw-r--r--src/rgw/rgw_etag_verifier.cc185
-rw-r--r--src/rgw/rgw_etag_verifier.h85
-rw-r--r--src/rgw/rgw_fcgi.cc91
-rw-r--r--src/rgw/rgw_fcgi.h57
-rw-r--r--src/rgw/rgw_fcgi_process.cc138
-rw-r--r--src/rgw/rgw_file.cc2436
-rw-r--r--src/rgw/rgw_file.h2806
-rw-r--r--src/rgw/rgw_formats.cc374
-rw-r--r--src/rgw/rgw_formats.h136
-rw-r--r--src/rgw/rgw_frontend.cc82
-rw-r--r--src/rgw/rgw_frontend.h285
-rw-r--r--src/rgw/rgw_gc.cc528
-rw-r--r--src/rgw/rgw_gc.h77
-rw-r--r--src/rgw/rgw_http_client.cc1255
-rw-r--r--src/rgw/rgw_http_client.h370
-rw-r--r--src/rgw/rgw_http_client_curl.cc122
-rw-r--r--src/rgw/rgw_http_client_curl.h32
-rw-r--r--src/rgw/rgw_http_errors.h46
-rw-r--r--src/rgw/rgw_iam_policy.cc1432
-rw-r--r--src/rgw/rgw_iam_policy.h480
-rw-r--r--src/rgw/rgw_iam_policy_keywords.gperf130
-rw-r--r--src/rgw/rgw_iam_policy_keywords.h139
-rw-r--r--src/rgw/rgw_json_enc.cc1777
-rw-r--r--src/rgw/rgw_jsonparser.cc132
-rw-r--r--src/rgw/rgw_kafka.cc719
-rw-r--r--src/rgw/rgw_kafka.h81
-rw-r--r--src/rgw/rgw_keystone.cc713
-rw-r--r--src/rgw/rgw_keystone.h373
-rw-r--r--src/rgw/rgw_lc.cc1678
-rw-r--r--src/rgw/rgw_lc.h539
-rw-r--r--src/rgw/rgw_lc_s3.cc344
-rw-r--r--src/rgw/rgw_lc_s3.h102
-rw-r--r--src/rgw/rgw_ldap.cc128
-rw-r--r--src/rgw/rgw_ldap.h143
-rw-r--r--src/rgw/rgw_lib.h225
-rw-r--r--src/rgw/rgw_lib_frontend.h115
-rw-r--r--src/rgw/rgw_loadgen.cc128
-rw-r--r--src/rgw/rgw_loadgen.h75
-rw-r--r--src/rgw/rgw_loadgen_process.cc149
-rw-r--r--src/rgw/rgw_log.cc467
-rw-r--r--src/rgw/rgw_log.h144
-rw-r--r--src/rgw/rgw_main.cc637
-rw-r--r--src/rgw/rgw_meta_sync_status.h124
-rw-r--r--src/rgw/rgw_metadata.cc1178
-rw-r--r--src/rgw/rgw_metadata.h426
-rw-r--r--src/rgw/rgw_multi.cc384
-rw-r--r--src/rgw/rgw_multi.h114
-rw-r--r--src/rgw/rgw_multi_del.cc73
-rw-r--r--src/rgw/rgw_multi_del.h66
-rw-r--r--src/rgw/rgw_multiparser.cc46
-rw-r--r--src/rgw/rgw_notify.cc141
-rw-r--r--src/rgw/rgw_notify.h27
-rw-r--r--src/rgw/rgw_notify_event_type.cc82
-rw-r--r--src/rgw/rgw_notify_event_type.h35
-rw-r--r--src/rgw/rgw_object_expirer.cc107
-rw-r--r--src/rgw/rgw_object_expirer_core.cc294
-rw-r--r--src/rgw/rgw_object_expirer_core.h100
-rw-r--r--src/rgw/rgw_object_lock.cc96
-rw-r--r--src/rgw/rgw_object_lock.h221
-rw-r--r--src/rgw/rgw_op.cc7942
-rw-r--r--src/rgw/rgw_op.h2346
-rw-r--r--src/rgw/rgw_opa.cc82
-rw-r--r--src/rgw/rgw_opa.h14
-rw-r--r--src/rgw/rgw_orphan.cc1523
-rw-r--r--src/rgw/rgw_orphan.h290
-rw-r--r--src/rgw/rgw_os_lib.cc62
-rw-r--r--src/rgw/rgw_os_lib.h12
-rw-r--r--src/rgw/rgw_otp.cc158
-rw-r--r--src/rgw/rgw_otp.h15
-rw-r--r--src/rgw/rgw_perf_counters.cc60
-rw-r--r--src/rgw/rgw_perf_counters.h50
-rw-r--r--src/rgw/rgw_period_history.cc354
-rw-r--r--src/rgw/rgw_period_history.h114
-rw-r--r--src/rgw/rgw_period_puller.cc114
-rw-r--r--src/rgw/rgw_period_puller.h20
-rw-r--r--src/rgw/rgw_period_pusher.cc307
-rw-r--r--src/rgw/rgw_period_pusher.h56
-rw-r--r--src/rgw/rgw_policy_s3.cc303
-rw-r--r--src/rgw/rgw_policy_s3.h59
-rw-r--r--src/rgw/rgw_process.cc323
-rw-r--r--src/rgw/rgw_process.h199
-rw-r--r--src/rgw/rgw_pubsub.cc872
-rw-r--r--src/rgw/rgw_pubsub.h812
-rw-r--r--src/rgw/rgw_pubsub_push.cc749
-rw-r--r--src/rgw/rgw_pubsub_push.h57
-rw-r--r--src/rgw/rgw_putobj.cc99
-rw-r--r--src/rgw/rgw_putobj.h79
-rw-r--r--src/rgw/rgw_putobj_processor.cc670
-rw-r--r--src/rgw/rgw_putobj_processor.h263
-rw-r--r--src/rgw/rgw_quota.cc1034
-rw-r--r--src/rgw/rgw_quota.h123
-rw-r--r--src/rgw/rgw_rados.cc10734
-rw-r--r--src/rgw/rgw_rados.h2633
-rw-r--r--src/rgw/rgw_realm_reloader.cc176
-rw-r--r--src/rgw/rgw_realm_reloader.h63
-rw-r--r--src/rgw/rgw_realm_watcher.cc148
-rw-r--r--src/rgw/rgw_realm_watcher.h69
-rw-r--r--src/rgw/rgw_request.h64
-rw-r--r--src/rgw/rgw_reshard.cc1177
-rw-r--r--src/rgw/rgw_reshard.h211
-rw-r--r--src/rgw/rgw_resolve.cc44
-rw-r--r--src/rgw/rgw_resolve.h27
-rw-r--r--src/rgw/rgw_rest.cc2302
-rw-r--r--src/rgw/rgw_rest.h816
-rw-r--r--src/rgw/rgw_rest_admin.h15
-rw-r--r--src/rgw/rgw_rest_bucket.cc350
-rw-r--r--src/rgw/rgw_rest_bucket.h38
-rw-r--r--src/rgw/rgw_rest_client.cc999
-rw-r--r--src/rgw/rgw_rest_client.h226
-rw-r--r--src/rgw/rgw_rest_config.cc85
-rw-r--r--src/rgw/rgw_rest_config.h88
-rw-r--r--src/rgw/rgw_rest_conn.cc466
-rw-r--r--src/rgw/rgw_rest_conn.h521
-rw-r--r--src/rgw/rgw_rest_iam.cc147
-rw-r--r--src/rgw/rgw_rest_iam.h49
-rw-r--r--src/rgw/rgw_rest_log.cc1060
-rw-r--r--src/rgw/rgw_rest_log.h336
-rw-r--r--src/rgw/rgw_rest_metadata.cc363
-rw-r--r--src/rgw/rgw_rest_metadata.h135
-rw-r--r--src/rgw/rgw_rest_pubsub.cc729
-rw-r--r--src/rgw/rgw_rest_pubsub.h41
-rw-r--r--src/rgw/rgw_rest_pubsub_common.cc259
-rw-r--r--src/rgw/rgw_rest_pubsub_common.h287
-rw-r--r--src/rgw/rgw_rest_realm.cc367
-rw-r--r--src/rgw/rgw_rest_realm.h18
-rw-r--r--src/rgw/rgw_rest_role.cc489
-rw-r--r--src/rgw/rgw_rest_role.h131
-rw-r--r--src/rgw/rgw_rest_s3.cc5133
-rw-r--r--src/rgw/rgw_rest_s3.h1045
-rw-r--r--src/rgw/rgw_rest_s3website.h103
-rw-r--r--src/rgw/rgw_rest_sts.cc459
-rw-r--r--src/rgw/rgw_rest_sts.h202
-rw-r--r--src/rgw/rgw_rest_swift.cc3093
-rw-r--r--src/rgw/rgw_rest_swift.h681
-rw-r--r--src/rgw/rgw_rest_usage.cc108
-rw-r--r--src/rgw/rgw_rest_usage.h36
-rw-r--r--src/rgw/rgw_rest_user.cc999
-rw-r--r--src/rgw/rgw_rest_user.h38
-rw-r--r--src/rgw/rgw_rest_user_policy.cc363
-rw-r--r--src/rgw/rgw_rest_user_policy.h76
-rw-r--r--src/rgw/rgw_role.cc502
-rw-r--r--src/rgw/rgw_role.h161
-rw-r--r--src/rgw/rgw_service.cc191
-rw-r--r--src/rgw/rgw_service.h112
-rw-r--r--src/rgw/rgw_string.cc45
-rw-r--r--src/rgw/rgw_string.h236
-rw-r--r--src/rgw/rgw_sts.cc427
-rw-r--r--src/rgw/rgw_sts.h222
-rw-r--r--src/rgw/rgw_swift_auth.cc759
-rw-r--r--src/rgw/rgw_swift_auth.h341
-rw-r--r--src/rgw/rgw_sync.cc3136
-rw-r--r--src/rgw/rgw_sync.h534
-rw-r--r--src/rgw/rgw_sync_counters.cc28
-rw-r--r--src/rgw/rgw_sync_counters.h25
-rw-r--r--src/rgw/rgw_sync_log_trim.cc1094
-rw-r--r--src/rgw/rgw_sync_log_trim.h110
-rw-r--r--src/rgw/rgw_sync_module.cc91
-rw-r--r--src/rgw/rgw_sync_module.h197
-rw-r--r--src/rgw/rgw_sync_module_aws.cc1807
-rw-r--r--src/rgw/rgw_sync_module_aws.h111
-rw-r--r--src/rgw/rgw_sync_module_es.cc918
-rw-r--r--src/rgw/rgw_sync_module_es.h62
-rw-r--r--src/rgw/rgw_sync_module_es_rest.cc423
-rw-r--r--src/rgw/rgw_sync_module_es_rest.h20
-rw-r--r--src/rgw/rgw_sync_module_log.cc74
-rw-r--r--src/rgw/rgw_sync_module_log.h18
-rw-r--r--src/rgw/rgw_sync_module_pubsub.cc1578
-rw-r--r--src/rgw/rgw_sync_module_pubsub.h40
-rw-r--r--src/rgw/rgw_sync_module_pubsub_rest.cc526
-rw-r--r--src/rgw/rgw_sync_module_pubsub_rest.h13
-rw-r--r--src/rgw/rgw_sync_trace.cc288
-rw-r--r--src/rgw/rgw_sync_trace.h142
-rw-r--r--src/rgw/rgw_tag.cc59
-rw-r--r--src/rgw/rgw_tag.h46
-rw-r--r--src/rgw/rgw_tag_s3.cc65
-rw-r--r--src/rgw/rgw_tag_s3.h53
-rw-r--r--src/rgw/rgw_tar.h156
-rw-r--r--src/rgw/rgw_token.cc143
-rw-r--r--src/rgw/rgw_token.h169
-rw-r--r--src/rgw/rgw_tools.cc527
-rw-r--r--src/rgw/rgw_tools.h202
-rw-r--r--src/rgw/rgw_torrent.cc266
-rw-r--r--src/rgw/rgw_torrent.h142
-rw-r--r--src/rgw/rgw_url.cc49
-rw-r--r--src/rgw/rgw_url.h12
-rw-r--r--src/rgw/rgw_usage.cc151
-rw-r--r--src/rgw/rgw_usage.h30
-rw-r--r--src/rgw/rgw_user.cc2958
-rw-r--r--src/rgw/rgw_user.h774
-rw-r--r--src/rgw/rgw_web_idp.h29
-rw-r--r--src/rgw/rgw_website.cc127
-rw-r--r--src/rgw/rgw_website.h246
-rwxr-xr-xsrc/rgw/rgw_xml.cc500
-rw-r--r--src/rgw/rgw_xml.h352
-rw-r--r--src/rgw/rgw_xml_enc.cc152
-rw-r--r--src/rgw/rgw_zone.cc1937
-rw-r--r--src/rgw/rgw_zone.h1145
-rw-r--r--src/rgw/services/svc_finisher.cc53
-rw-r--r--src/rgw/services/svc_finisher.h45
-rw-r--r--src/rgw/services/svc_notify.cc484
-rw-r--r--src/rgw/services/svc_notify.h100
-rw-r--r--src/rgw/services/svc_quota.cc15
-rw-r--r--src/rgw/services/svc_quota.h23
-rw-r--r--src/rgw/services/svc_rados.cc308
-rw-r--r--src/rgw/services/svc_rados.h178
-rw-r--r--src/rgw/services/svc_sync_modules.cc15
-rw-r--r--src/rgw/services/svc_sync_modules.h26
-rw-r--r--src/rgw/services/svc_sys_obj.cc192
-rw-r--r--src/rgw/services/svc_sys_obj.h275
-rw-r--r--src/rgw/services/svc_sys_obj_cache.cc506
-rw-r--r--src/rgw/services/svc_sys_obj_cache.h176
-rw-r--r--src/rgw/services/svc_sys_obj_core.cc595
-rw-r--r--src/rgw/services/svc_sys_obj_core.h201
-rw-r--r--src/rgw/services/svc_zone.cc1250
-rw-r--r--src/rgw/services/svc_zone.h134
-rw-r--r--src/rgw/services/svc_zone_utils.cc59
-rw-r--r--src/rgw/services/svc_zone_utils.h39
300 files changed, 154611 insertions, 0 deletions
diff --git a/src/rgw/CMakeLists.txt b/src/rgw/CMakeLists.txt
new file mode 100644
index 00000000..12f831fb
--- /dev/null
+++ b/src/rgw/CMakeLists.txt
@@ -0,0 +1,407 @@
+add_custom_target(civetweb_h
+ COMMAND ${CMAKE_COMMAND} -E make_directory
+ "${CMAKE_BINARY_DIR}/src/include/civetweb"
+ COMMAND ${CMAKE_COMMAND} -E copy_if_different
+ "${CMAKE_SOURCE_DIR}/src/civetweb/include/civetweb.h"
+ "${CMAKE_BINARY_DIR}/src/include/civetweb"
+ COMMENT "keep civetweb.h up-to-date")
+
+find_program(GPERF gperf)
+if(NOT GPERF)
+ message(FATAL_ERROR "Can't find gperf")
+endif()
+function(gperf_generate input output)
+ add_custom_command(
+ OUTPUT ${output}
+ COMMAND ${GPERF} ${input} | sed "s/register //g" > ${output}
+ DEPENDS ${input}
+ COMMENT "Generate ${output}"
+ )
+endfunction()
+
+if(Boost_VERSION VERSION_GREATER 1.73)
+ add_definitions(-DBOOST_ASIO_USE_TS_EXECUTOR_AS_DEFAULT)
+endif()
+
+set(librgw_common_srcs
+ services/svc_finisher.cc
+ services/svc_notify.cc
+ services/svc_quota.cc
+ services/svc_sync_modules.cc
+ services/svc_rados.cc
+ services/svc_sys_obj.cc
+ services/svc_sys_obj_cache.cc
+ services/svc_sys_obj_core.cc
+ services/svc_zone.cc
+ services/svc_zone_utils.cc
+ rgw_service.cc
+ rgw_acl.cc
+ rgw_acl_s3.cc
+ rgw_acl_swift.cc
+ rgw_aio_throttle.cc
+ rgw_auth.cc
+ rgw_auth_s3.cc
+ rgw_arn.cc
+ rgw_basic_types.cc
+ rgw_bucket.cc
+ rgw_cache.cc
+ rgw_common.cc
+ rgw_compression.cc
+ rgw_etag_verifier.cc
+ rgw_cors.cc
+ rgw_cors_s3.cc
+ rgw_dencoder.cc
+ rgw_env.cc
+ rgw_es_query.cc
+ rgw_formats.cc
+ rgw_gc.cc
+ rgw_http_client.cc
+ rgw_json_enc.cc
+ rgw_keystone.cc
+ rgw_ldap.cc
+ rgw_lc.cc
+ rgw_lc_s3.cc
+ rgw_metadata.cc
+ rgw_multi.cc
+ rgw_multi_del.cc
+ rgw_pubsub.cc
+ rgw_sync.cc
+ rgw_data_sync.cc
+ rgw_sync_counters.cc
+ rgw_sync_module.cc
+ rgw_sync_module_aws.cc
+ rgw_sync_module_es.cc
+ rgw_sync_module_es_rest.cc
+ rgw_sync_module_log.cc
+ rgw_sync_module_pubsub.cc
+ rgw_pubsub_push.cc
+ rgw_notify.cc
+ rgw_notify_event_type.cc
+ rgw_sync_module_pubsub_rest.cc
+ rgw_sync_log_trim.cc
+ rgw_sync_trace.cc
+ rgw_period_history.cc
+ rgw_period_puller.cc
+ rgw_reshard.cc
+ rgw_coroutine.cc
+ rgw_cr_rados.cc
+ rgw_cr_rest.cc
+ rgw_cr_tools.cc
+ rgw_object_expirer_core.cc
+ rgw_op.cc
+ rgw_otp.cc
+ rgw_policy_s3.cc
+ rgw_putobj.cc
+ rgw_putobj_processor.cc
+ rgw_quota.cc
+ rgw_rados.cc
+ rgw_resolve.cc
+ rgw_rest.cc
+ rgw_rest_client.cc
+ rgw_rest_conn.cc
+ rgw_rest_log.cc
+ rgw_rest_metadata.cc
+ rgw_rest_pubsub.cc
+ rgw_rest_pubsub_common.cc
+ rgw_rest_realm.cc
+ rgw_rest_role.cc
+ rgw_rest_s3.cc
+ rgw_role.cc
+ rgw_string.cc
+ rgw_tag.cc
+ rgw_tag_s3.cc
+ rgw_tools.cc
+ rgw_user.cc
+ rgw_website.cc
+ rgw_xml.cc
+ rgw_xml_enc.cc
+ rgw_torrent.cc
+ rgw_crypt.cc
+ rgw_crypt_sanitize.cc
+ rgw_iam_policy.cc
+ rgw_rest_user_policy.cc
+ rgw_zone.cc
+ rgw_sts.cc
+ rgw_rest_sts.cc
+ rgw_perf_counters.cc
+ rgw_object_lock.cc
+ rgw_rest_iam.cc
+ rgw_url.cc)
+
+if(WITH_RADOSGW_AMQP_ENDPOINT)
+ find_package(RabbitMQ REQUIRED)
+endif()
+if(WITH_RADOSGW_KAFKA_ENDPOINT)
+ find_package(RDKafka 1.9.2)
+ if(NOT RDKafka_FOUND)
+ set(WITH_RADOSGW_KAFKA_ENDPOINT OFF CACHE BOOL "Rados Gateway's pubsub support for Kafka push endpoint" FORCE)
+ message(STATUS "Disabling Kafka endpoint support")
+ endif()
+endif()
+
+if(WITH_RADOSGW_AMQP_ENDPOINT)
+ list(APPEND librgw_common_srcs rgw_amqp.cc)
+endif()
+if(WITH_RADOSGW_KAFKA_ENDPOINT)
+ list(APPEND librgw_common_srcs rgw_kafka.cc)
+endif()
+
+add_library(rgw_common OBJECT ${librgw_common_srcs})
+
+target_include_directories(rgw_common SYSTEM PUBLIC "services")
+target_include_directories(rgw_common PUBLIC "${CMAKE_SOURCE_DIR}/src/dmclock/support/src")
+
+if(WITH_LTTNG)
+ # rgw/rgw_op.cc includes "tracing/rgw_op.h"
+ # rgw/rgw_rados.cc includes "tracing/rgw_rados.h"
+ add_dependencies(rgw_common rgw_op-tp rgw_rados-tp)
+endif()
+
+set(rgw_a_srcs
+ rgw_auth_keystone.cc
+ rgw_client_io.cc
+ rgw_frontend.cc
+ rgw_http_client_curl.cc
+ rgw_loadgen.cc
+ rgw_log.cc
+ rgw_period_pusher.cc
+ rgw_realm_reloader.cc
+ rgw_realm_watcher.cc
+ rgw_os_lib.cc
+ rgw_process.cc
+ rgw_rest_bucket.cc
+ rgw_rest_config.cc
+ rgw_rest_log.cc
+ rgw_rest_metadata.cc
+ rgw_rest_realm.cc
+ rgw_rest_swift.cc
+ rgw_rest_usage.cc
+ rgw_rest_user.cc
+ rgw_swift_auth.cc
+ rgw_usage.cc
+ rgw_opa.cc
+ rgw_sts.cc
+ rgw_rest_sts.cc)
+
+gperf_generate(${CMAKE_SOURCE_DIR}/src/rgw/rgw_iam_policy_keywords.gperf
+ rgw_iam_policy_keywords.frag.cc)
+set_source_files_properties(rgw_iam_policy.cc PROPERTIES
+ OBJECT_DEPENDS ${CMAKE_BINARY_DIR}/src/rgw/rgw_iam_policy_keywords.frag.cc
+ COMPILE_FLAGS -I${CMAKE_BINARY_DIR}/src/rgw)
+
+
+if (WITH_RADOSGW_FCGI_FRONTEND)
+ list(APPEND rgw_a_srcs rgw_fcgi.cc)
+endif()
+
+add_library(rgw_a STATIC
+ ${rgw_a_srcs}
+ $<TARGET_OBJECTS:rgw_common>)
+
+add_dependencies(rgw_a civetweb_h)
+
+target_include_directories(rgw_a PUBLIC "${CMAKE_SOURCE_DIR}/src/dmclock/support/src")
+target_include_directories(rgw_a SYSTEM PUBLIC "../rapidjson/include")
+
+target_link_libraries(rgw_a
+ PRIVATE
+ librados cls_otp_client cls_lock_client cls_rgw_client cls_refcount_client
+ cls_log_client cls_timeindex_client cls_version_client
+ cls_user_client ceph-common common_utf8 global
+ ${CURL_LIBRARIES}
+ ${EXPAT_LIBRARIES}
+ ${OPENLDAP_LIBRARIES} ${CRYPTO_LIBS}
+ OATH::OATH)
+
+if(WITH_CURL_OPENSSL)
+ # used by rgw_http_client_curl.cc
+ target_link_libraries(rgw_a PRIVATE OpenSSL::Crypto)
+endif()
+
+if(WITH_BOOST_CONTEXT)
+ target_link_libraries(rgw_a PRIVATE Boost::coroutine Boost::context)
+endif()
+
+set(rgw_libs rgw_a)
+if(WITH_RADOSGW_AMQP_ENDPOINT)
+ # used by rgw_amqp.cc
+ list(APPEND rgw_libs RabbitMQ::RabbitMQ)
+endif()
+if(WITH_RADOSGW_KAFKA_ENDPOINT)
+ # used by rgw_kafka.cc
+ list(APPEND rgw_libs RDKafka::RDKafka)
+endif()
+
+set(radosgw_srcs
+ rgw_loadgen_process.cc
+ rgw_civetweb.cc
+ rgw_civetweb_frontend.cc
+ rgw_civetweb_log.cc
+ rgw_dmclock_scheduler_ctx.cc
+ rgw_dmclock_sync_scheduler.cc)
+
+if (WITH_RADOSGW_FCGI_FRONTEND)
+ list(APPEND radosgw_srcs rgw_fcgi_process.cc)
+endif()
+
+if(WITH_RADOSGW_BEAST_FRONTEND)
+ list(APPEND radosgw_srcs
+ rgw_asio_client.cc
+ rgw_asio_frontend.cc
+ rgw_dmclock_async_scheduler.cc)
+endif()
+
+add_library(radosgw_a STATIC ${radosgw_srcs}
+ $<TARGET_OBJECTS:civetweb_common_objs>)
+target_link_libraries(radosgw_a PRIVATE ${rgw_libs})
+if(WITH_RADOSGW_BEAST_FRONTEND AND WITH_RADOSGW_BEAST_OPENSSL)
+ # used by rgw_asio_frontend.cc
+ target_link_libraries(radosgw_a PRIVATE OpenSSL::SSL)
+endif()
+
+add_executable(radosgw rgw_main.cc)
+target_link_libraries(radosgw radosgw_a librados
+ cls_rgw_client cls_otp_client cls_lock_client cls_refcount_client
+ cls_log_client cls_timeindex_client
+ cls_version_client cls_user_client
+ global dmclock::dmclock
+ ${FCGI_LIBRARY} ${LIB_RESOLV}
+ ${CURL_LIBRARIES} ${EXPAT_LIBRARIES} ${BLKID_LIBRARIES}
+ ${ALLOC_LIBS})
+install(TARGETS radosgw DESTINATION bin)
+
+set(radosgw_admin_srcs
+ rgw_admin.cc
+ rgw_orphan.cc)
+add_executable(radosgw-admin ${radosgw_admin_srcs})
+target_link_libraries(radosgw-admin ${rgw_libs} librados
+ cls_rgw_client cls_otp_client cls_lock_client cls_refcount_client
+ cls_log_client cls_timeindex_client
+ cls_version_client cls_user_client
+ global ${FCGI_LIBRARY} ${LIB_RESOLV}
+ ${CURL_LIBRARIES} ${EXPAT_LIBRARIES} ${BLKID_LIBRARIES})
+install(TARGETS radosgw-admin DESTINATION bin)
+
+set(radosgw_es_srcs
+ rgw_es_main.cc)
+add_executable(radosgw-es ${radosgw_es_srcs})
+target_link_libraries(radosgw-es ${rgw_libs} librados
+ cls_rgw_client cls_otp_client cls_lock_client cls_refcount_client
+ cls_log_client cls_timeindex_client
+ cls_version_client cls_user_client
+ global ${FCGI_LIBRARY} ${LIB_RESOLV}
+ ${CURL_LIBRARIES} ${EXPAT_LIBRARIES} ${BLKID_LIBRARIES})
+install(TARGETS radosgw-es DESTINATION bin)
+
+set(radosgw_token_srcs
+ rgw_token.cc)
+add_executable(radosgw-token ${radosgw_token_srcs})
+target_link_libraries(radosgw-token librados
+ global ${ALLOC_LIBS})
+install(TARGETS radosgw-token DESTINATION bin)
+
+set(radosgw_object_expirer_srcs
+ rgw_object_expirer.cc)
+add_executable(radosgw-object-expirer ${radosgw_object_expirer_srcs})
+target_link_libraries(radosgw-object-expirer ${rgw_libs} librados
+ cls_rgw_client cls_otp_client cls_lock_client cls_refcount_client
+ cls_log_client cls_timeindex_client
+ cls_version_client cls_user_client
+ global ${FCGI_LIBRARY} ${LIB_RESOLV}
+ ${CURL_LIBRARIES} ${EXPAT_LIBRARIES})
+install(TARGETS radosgw-object-expirer DESTINATION bin)
+
+set(librgw_srcs
+ librgw.cc
+ rgw_file.cc)
+add_library(rgw SHARED ${librgw_srcs})
+target_link_libraries(rgw
+ PRIVATE
+ ${rgw_libs}
+ librados
+ cls_rgw_client
+ cls_otp_client
+ cls_lock_client
+ cls_refcount_client
+ cls_log_client
+ cls_timeindex_client
+ cls_version_client
+ cls_user_client
+ global
+ ${LIB_RESOLV}
+ ${CURL_LIBRARIES}
+ ${EXPAT_LIBRARIES}
+ PUBLIC
+ dmclock::dmclock)
+
+if(WITH_RADOSGW_AMQP_ENDPOINT)
+ target_link_libraries(rgw PRIVATE RabbitMQ::RabbitMQ)
+endif()
+
+if(WITH_RADOSGW_KAFKA_ENDPOINT)
+ target_link_libraries(rgw PRIVATE RDKafka::RDKafka)
+endif()
+
+set_target_properties(rgw PROPERTIES OUTPUT_NAME rgw VERSION 2.0.0
+ SOVERSION 2)
+install(TARGETS rgw DESTINATION ${CMAKE_INSTALL_LIBDIR})
+
+set(librgw_admin_user_srcs
+ librgw_admin_user.cc
+ rgw_admin_user.cc
+)
+add_library(rgw_admin_user SHARED
+ ${librgw_admin_user_srcs}
+ $<TARGET_OBJECTS:rgw_common>)
+
+add_dependencies(rgw_admin_user civetweb_h)
+
+target_link_libraries(rgw_admin_user PRIVATE
+ librados
+ cls_rgw_client
+ cls_otp_client
+ cls_lock_client
+ cls_refcount_client
+ cls_log_client
+ cls_timeindex_client
+ cls_version_client
+ cls_user_client
+ global
+ ${CURL_LIBRARIES}
+ ${EXPAT_LIBRARIES}
+ ${OPENLDAP_LIBRARIES}
+ dmclock::dmclock)
+set_target_properties(rgw_admin_user PROPERTIES OUTPUT_NAME rgw_admin_user VERSION 1.0.0
+ SOVERSION 0)
+install(TARGETS rgw_admin_user DESTINATION ${CMAKE_INSTALL_LIBDIR})
+if(WITH_RADOSGW_AMQP_ENDPOINT)
+ target_link_libraries(rgw_admin_user PRIVATE RabbitMQ::RabbitMQ)
+endif()
+if(WITH_RADOSGW_KAFKA_ENDPOINT)
+ target_link_libraries(rgw_admin_user PRIVATE RDKafka::RDKafka)
+endif()
+if(WITH_BOOST_CONTEXT)
+ target_link_libraries(rgw_admin_user PRIVATE Boost::coroutine Boost::context)
+endif()
+
+if(WITH_TESTS)
+ add_executable(ceph_rgw_jsonparser
+ rgw_jsonparser.cc)
+ target_link_libraries(ceph_rgw_jsonparser
+ ${rgw_libs}
+ global)
+
+ add_executable(ceph_rgw_multiparser
+ rgw_multiparser.cc)
+ target_link_libraries(ceph_rgw_multiparser
+ ${rgw_libs}
+ global)
+
+ install(TARGETS
+ ceph_rgw_jsonparser
+ ceph_rgw_multiparser
+ DESTINATION bin)
+endif(WITH_TESTS)
+
+install(PROGRAMS rgw-orphan-list
+ DESTINATION bin)
diff --git a/src/rgw/librgw.cc b/src/rgw/librgw.cc
new file mode 100644
index 00000000..1dd88982
--- /dev/null
+++ b/src/rgw/librgw.cc
@@ -0,0 +1,729 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2011 New Dream Network
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "include/compat.h"
+#include <sys/types.h>
+#include <string.h>
+#include <chrono>
+
+#include "include/types.h"
+#include "include/rados/librgw.h"
+#include "rgw/rgw_acl_s3.h"
+#include "rgw_acl.h"
+
+#include "include/str_list.h"
+#include "include/stringify.h"
+#include "global/global_init.h"
+#include "global/signal_handler.h"
+#include "common/config.h"
+#include "common/errno.h"
+#include "common/Timer.h"
+#include "common/Throttle.h"
+#include "common/WorkQueue.h"
+#include "common/ceph_argparse.h"
+#include "common/ceph_context.h"
+#include "common/common_init.h"
+#include "common/dout.h"
+
+#include "rgw_rados.h"
+#include "rgw_resolve.h"
+#include "rgw_op.h"
+#include "rgw_rest.h"
+#include "rgw_frontend.h"
+#include "rgw_request.h"
+#include "rgw_process.h"
+#include "rgw_rest_user.h"
+#include "rgw_rest_s3.h"
+#include "rgw_os_lib.h"
+#include "rgw_auth.h"
+#include "rgw_auth_s3.h"
+#include "rgw_lib.h"
+#include "rgw_lib_frontend.h"
+#include "rgw_http_client.h"
+#include "rgw_http_client_curl.h"
+#include "rgw_perf_counters.h"
+
+#include <errno.h>
+#include <thread>
+#include <string>
+#include <mutex>
+
+
+#define dout_subsys ceph_subsys_rgw
+
+bool global_stop = false;
+
+static void handle_sigterm(int signum)
+{
+ dout(20) << __func__ << " SIGUSR1 ignored" << dendl;
+}
+
+namespace rgw {
+
+ using std::string;
+
+ static std::mutex librgw_mtx;
+
+ RGWLib rgwlib;
+
+ class C_InitTimeout : public Context {
+ public:
+ C_InitTimeout() {}
+ void finish(int r) override {
+ derr << "Initialization timeout, failed to initialize" << dendl;
+ exit(1);
+ }
+ };
+
+ void RGWLibProcess::checkpoint()
+ {
+ m_tp.drain(&req_wq);
+ }
+
+#define MIN_EXPIRE_S 120
+
+ void RGWLibProcess::run()
+ {
+ /* write completion interval */
+ RGWLibFS::write_completion_interval_s =
+ cct->_conf->rgw_nfs_write_completion_interval_s;
+
+ /* start write timer */
+ RGWLibFS::write_timer.resume();
+
+ /* gc loop */
+ while (! shutdown) {
+ lsubdout(cct, rgw, 5) << "RGWLibProcess GC" << dendl;
+
+ /* dirent invalidate timeout--basically, the upper-bound on
+ * inconsistency with the S3 namespace */
+ auto expire_s = cct->_conf->rgw_nfs_namespace_expire_secs;
+
+ /* delay between gc cycles */
+ auto delay_s = std::max(int64_t(1), std::min(int64_t(MIN_EXPIRE_S), expire_s/2));
+
+ unique_lock uniq(mtx);
+ restart:
+ int cur_gen = gen;
+ for (auto iter = mounted_fs.begin(); iter != mounted_fs.end();
+ ++iter) {
+ RGWLibFS* fs = iter->first->ref();
+ uniq.unlock();
+ fs->gc();
+ fs->update_user();
+ fs->rele();
+ uniq.lock();
+ if (cur_gen != gen)
+ goto restart; /* invalidated */
+ }
+ cv.wait_for(uniq, std::chrono::seconds(delay_s));
+ uniq.unlock();
+ }
+ }
+
+ void RGWLibProcess::handle_request(RGWRequest* r)
+ {
+ /*
+ * invariant: valid requests are derived from RGWLibRequst
+ */
+ RGWLibRequest* req = static_cast<RGWLibRequest*>(r);
+
+ // XXX move RGWLibIO and timing setup into process_request
+
+#if 0 /* XXX */
+ utime_t tm = ceph_clock_now();
+#endif
+
+ RGWLibIO io_ctx;
+
+ int ret = process_request(req, &io_ctx);
+ if (ret < 0) {
+ /* we don't really care about return code */
+ dout(20) << "process_request() returned " << ret << dendl;
+
+ }
+ delete req;
+ } /* handle_request */
+
+ int RGWLibProcess::process_request(RGWLibRequest* req)
+ {
+ // XXX move RGWLibIO and timing setup into process_request
+
+#if 0 /* XXX */
+ utime_t tm = ceph_clock_now();
+#endif
+
+ RGWLibIO io_ctx;
+
+ int ret = process_request(req, &io_ctx);
+ if (ret < 0) {
+ /* we don't really care about return code */
+ dout(20) << "process_request() returned " << ret << dendl;
+ }
+ return ret;
+ } /* process_request */
+
+ static inline void abort_req(struct req_state *s, RGWOp *op, int err_no)
+ {
+ if (!s)
+ return;
+
+ /* XXX the dump_errno and dump_bucket_from_state behaviors in
+ * the abort_early (rgw_rest.cc) might be valuable, but aren't
+ * safe to call presently as they return HTTP data */
+
+ perfcounter->inc(l_rgw_failed_req);
+ } /* abort_req */
+
+ int RGWLibProcess::process_request(RGWLibRequest* req, RGWLibIO* io)
+ {
+ int ret = 0;
+ bool should_log = true; // XXX
+
+ dout(1) << "====== " << __func__
+ << " starting new request req=" << hex << req << dec
+ << " ======" << dendl;
+
+ /*
+ * invariant: valid requests are derived from RGWOp--well-formed
+ * requests should have assigned RGWRequest::op in their descendant
+ * constructor--if not, the compiler can find it, at the cost of
+ * a runtime check
+ */
+ RGWOp *op = (req->op) ? req->op : dynamic_cast<RGWOp*>(req);
+ if (! op) {
+ dout(1) << "failed to derive cognate RGWOp (invalid op?)" << dendl;
+ return -EINVAL;
+ }
+
+ io->init(req->cct);
+
+ perfcounter->inc(l_rgw_req);
+
+ RGWEnv& rgw_env = io->get_env();
+
+ /* XXX
+ * until major refactoring of req_state and req_info, we need
+ * to build their RGWEnv boilerplate from the RGWLibRequest,
+ * pre-staging any strings (HTTP_HOST) that provoke a crash when
+ * not found
+ */
+
+ /* XXX for now, use ""; could be a legit hostname, or, in future,
+ * perhaps a tenant (Yehuda) */
+ rgw_env.set("HTTP_HOST", "");
+
+ /* XXX and -then- bloat up req_state with string copies from it */
+ struct req_state rstate(req->cct, &rgw_env, req->get_user(), req->id);
+ struct req_state *s = &rstate;
+
+ // XXX fix this
+ s->cio = io;
+
+ RGWObjectCtx rados_ctx(store, s); // XXX holds std::map
+
+ auto sysobj_ctx = store->svc.sysobj->init_obj_ctx();
+ s->sysobj_ctx = &sysobj_ctx;
+
+ /* XXX and -then- stash req_state pointers everywhere they are needed */
+ ret = req->init(rgw_env, &rados_ctx, io, s);
+ if (ret < 0) {
+ dout(10) << "failed to initialize request" << dendl;
+ abort_req(s, op, ret);
+ goto done;
+ }
+
+ /* req is-a RGWOp, currently initialized separately */
+ ret = req->op_init();
+ if (ret < 0) {
+ dout(10) << "failed to initialize RGWOp" << dendl;
+ abort_req(s, op, ret);
+ goto done;
+ }
+
+ /* now expected by rgw_log_op() */
+ rgw_env.set("REQUEST_METHOD", s->info.method);
+ rgw_env.set("REQUEST_URI", s->info.request_uri);
+ rgw_env.set("QUERY_STRING", "");
+
+ try {
+ /* XXX authorize does less here then in the REST path, e.g.,
+ * the user's info is cached, but still incomplete */
+ ldpp_dout(s, 2) << "authorizing" << dendl;
+ ret = req->authorize(op);
+ if (ret < 0) {
+ dout(10) << "failed to authorize request" << dendl;
+ abort_req(s, op, ret);
+ goto done;
+ }
+
+ /* FIXME: remove this after switching all handlers to the new
+ * authentication infrastructure. */
+ if (! s->auth.identity) {
+ s->auth.identity = rgw::auth::transform_old_authinfo(s);
+ }
+
+ ldpp_dout(s, 2) << "reading op permissions" << dendl;
+ ret = req->read_permissions(op);
+ if (ret < 0) {
+ abort_req(s, op, ret);
+ goto done;
+ }
+
+ ldpp_dout(s, 2) << "init op" << dendl;
+ ret = op->init_processing();
+ if (ret < 0) {
+ abort_req(s, op, ret);
+ goto done;
+ }
+
+ ldpp_dout(s, 2) << "verifying op mask" << dendl;
+ ret = op->verify_op_mask();
+ if (ret < 0) {
+ abort_req(s, op, ret);
+ goto done;
+ }
+
+ ldpp_dout(s, 2) << "verifying op permissions" << dendl;
+ ret = op->verify_permission();
+ if (ret < 0) {
+ if (s->system_request) {
+ dout(2) << "overriding permissions due to system operation" << dendl;
+ } else if (s->auth.identity->is_admin_of(s->user->user_id)) {
+ dout(2) << "overriding permissions due to admin operation" << dendl;
+ } else {
+ abort_req(s, op, ret);
+ goto done;
+ }
+ }
+
+ ldpp_dout(s, 2) << "verifying op params" << dendl;
+ ret = op->verify_params();
+ if (ret < 0) {
+ abort_req(s, op, ret);
+ goto done;
+ }
+
+ ldpp_dout(s, 2) << "executing" << dendl;
+ op->pre_exec();
+ op->execute();
+ op->complete();
+
+ } catch (const ceph::crypto::DigestException& e) {
+ dout(0) << "authentication failed" << e.what() << dendl;
+ abort_req(s, op, -ERR_INVALID_SECRET_KEY);
+ }
+
+ done:
+ try {
+ io->complete_request();
+ } catch (rgw::io::Exception& e) {
+ dout(0) << "ERROR: io->complete_request() returned "
+ << e.what() << dendl;
+ }
+ if (should_log) {
+ rgw_log_op(store, nullptr /* !rest */, s,
+ (op ? op->name() : "unknown"), olog);
+ }
+
+ int http_ret = s->err.http_ret;
+
+ ldpp_dout(s, 2) << "http status=" << http_ret << dendl;
+
+ dout(1) << "====== " << __func__
+ << " req done req=" << hex << req << dec << " http_status="
+ << http_ret
+ << " ======" << dendl;
+
+ return (ret < 0 ? ret : s->err.ret);
+ } /* process_request */
+
+ int RGWLibProcess::start_request(RGWLibContinuedReq* req)
+ {
+
+ dout(1) << "====== " << __func__
+ << " starting new continued request req=" << hex << req << dec
+ << " ======" << dendl;
+
+ /*
+ * invariant: valid requests are derived from RGWOp--well-formed
+ * requests should have assigned RGWRequest::op in their descendant
+ * constructor--if not, the compiler can find it, at the cost of
+ * a runtime check
+ */
+ RGWOp *op = (req->op) ? req->op : dynamic_cast<RGWOp*>(req);
+ if (! op) {
+ dout(1) << "failed to derive cognate RGWOp (invalid op?)" << dendl;
+ return -EINVAL;
+ }
+
+ struct req_state* s = req->get_state();
+
+ /* req is-a RGWOp, currently initialized separately */
+ int ret = req->op_init();
+ if (ret < 0) {
+ dout(10) << "failed to initialize RGWOp" << dendl;
+ abort_req(s, op, ret);
+ goto done;
+ }
+
+ /* XXX authorize does less here then in the REST path, e.g.,
+ * the user's info is cached, but still incomplete */
+ ldpp_dout(s, 2) << "authorizing" << dendl;
+ ret = req->authorize(op);
+ if (ret < 0) {
+ dout(10) << "failed to authorize request" << dendl;
+ abort_req(s, op, ret);
+ goto done;
+ }
+
+ /* FIXME: remove this after switching all handlers to the new authentication
+ * infrastructure. */
+ if (! s->auth.identity) {
+ s->auth.identity = rgw::auth::transform_old_authinfo(s);
+ }
+
+ ldpp_dout(s, 2) << "reading op permissions" << dendl;
+ ret = req->read_permissions(op);
+ if (ret < 0) {
+ abort_req(s, op, ret);
+ goto done;
+ }
+
+ ldpp_dout(s, 2) << "init op" << dendl;
+ ret = op->init_processing();
+ if (ret < 0) {
+ abort_req(s, op, ret);
+ goto done;
+ }
+
+ ldpp_dout(s, 2) << "verifying op mask" << dendl;
+ ret = op->verify_op_mask();
+ if (ret < 0) {
+ abort_req(s, op, ret);
+ goto done;
+ }
+
+ ldpp_dout(s, 2) << "verifying op permissions" << dendl;
+ ret = op->verify_permission();
+ if (ret < 0) {
+ if (s->system_request) {
+ dout(2) << "overriding permissions due to system operation" << dendl;
+ } else if (s->auth.identity->is_admin_of(s->user->user_id)) {
+ dout(2) << "overriding permissions due to admin operation" << dendl;
+ } else {
+ abort_req(s, op, ret);
+ goto done;
+ }
+ }
+
+ ldpp_dout(s, 2) << "verifying op params" << dendl;
+ ret = op->verify_params();
+ if (ret < 0) {
+ abort_req(s, op, ret);
+ goto done;
+ }
+
+ op->pre_exec();
+ req->exec_start();
+
+ done:
+ return (ret < 0 ? ret : s->err.ret);
+ }
+
+ int RGWLibProcess::finish_request(RGWLibContinuedReq* req)
+ {
+ RGWOp *op = (req->op) ? req->op : dynamic_cast<RGWOp*>(req);
+ if (! op) {
+ dout(1) << "failed to derive cognate RGWOp (invalid op?)" << dendl;
+ return -EINVAL;
+ }
+
+ int ret = req->exec_finish();
+ int op_ret = op->get_ret();
+
+ dout(1) << "====== " << __func__
+ << " finishing continued request req=" << hex << req << dec
+ << " op status=" << op_ret
+ << " ======" << dendl;
+
+ return ret;
+ }
+
+ int RGWLibFrontend::init()
+ {
+ pprocess = new RGWLibProcess(g_ceph_context, &env,
+ g_conf()->rgw_thread_pool_size, conf);
+ return 0;
+ }
+
+ int RGWLib::init()
+ {
+ vector<const char*> args;
+ return init(args);
+ }
+
+ int RGWLib::init(vector<const char*>& args)
+ {
+ int r = 0;
+
+ /* alternative default for module */
+ map<string,string> defaults = {
+ { "debug_rgw", "1/5" },
+ { "keyring", "$rgw_data/keyring" },
+ { "log_file", "/var/log/radosgw/$cluster-$name.log" }
+ };
+
+ cct = global_init(&defaults, args,
+ CEPH_ENTITY_TYPE_CLIENT,
+ CODE_ENVIRONMENT_DAEMON,
+ CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS);
+
+ Mutex mutex("main");
+ SafeTimer init_timer(g_ceph_context, mutex);
+ init_timer.init();
+ mutex.Lock();
+ init_timer.add_event_after(g_conf()->rgw_init_timeout, new C_InitTimeout);
+ mutex.Unlock();
+
+ common_init_finish(g_ceph_context);
+
+ rgw_tools_init(g_ceph_context);
+
+ rgw_init_resolver();
+ rgw::curl::setup_curl(boost::none);
+ rgw_http_client_init(g_ceph_context);
+
+ store = RGWStoreManager::get_storage(g_ceph_context,
+ g_conf()->rgw_enable_gc_threads,
+ g_conf()->rgw_enable_lc_threads,
+ g_conf()->rgw_enable_quota_threads,
+ g_conf()->rgw_run_sync_thread,
+ g_conf().get_val<bool>("rgw_dynamic_resharding"));
+
+ if (!store) {
+ mutex.Lock();
+ init_timer.cancel_all_events();
+ init_timer.shutdown();
+ mutex.Unlock();
+
+ derr << "Couldn't init storage provider (RADOS)" << dendl;
+ return -EIO;
+ }
+
+ r = rgw_perf_start(g_ceph_context);
+
+ rgw_rest_init(g_ceph_context, store, store->svc.zone->get_zonegroup());
+
+ mutex.Lock();
+ init_timer.cancel_all_events();
+ init_timer.shutdown();
+ mutex.Unlock();
+
+ if (r)
+ return -EIO;
+
+ const string& ldap_uri = store->ctx()->_conf->rgw_ldap_uri;
+ const string& ldap_binddn = store->ctx()->_conf->rgw_ldap_binddn;
+ const string& ldap_searchdn = store->ctx()->_conf->rgw_ldap_searchdn;
+ const string& ldap_searchfilter = store->ctx()->_conf->rgw_ldap_searchfilter;
+ const string& ldap_dnattr =
+ store->ctx()->_conf->rgw_ldap_dnattr;
+ std::string ldap_bindpw = parse_rgw_ldap_bindpw(store->ctx());
+
+ ldh = new rgw::LDAPHelper(ldap_uri, ldap_binddn, ldap_bindpw.c_str(),
+ ldap_searchdn, ldap_searchfilter, ldap_dnattr);
+ ldh->init();
+ ldh->bind();
+
+ rgw_user_init(store);
+ rgw_bucket_init(store->meta_mgr);
+ rgw_log_usage_init(g_ceph_context, store);
+
+ // XXX ex-RGWRESTMgr_lib, mgr->set_logging(true)
+
+ if (!g_conf()->rgw_ops_log_socket_path.empty()) {
+ olog = new OpsLogSocket(g_ceph_context, g_conf()->rgw_ops_log_data_backlog);
+ olog->init(g_conf()->rgw_ops_log_socket_path);
+ }
+
+ int port = 80;
+ RGWProcessEnv env = { store, &rest, olog, port };
+
+ string fe_count{"0"};
+ fec = new RGWFrontendConfig("rgwlib");
+ fe = new RGWLibFrontend(env, fec);
+
+ init_async_signal_handler();
+ register_async_signal_handler(SIGUSR1, handle_sigterm);
+
+ map<string, string> service_map_meta;
+ service_map_meta["pid"] = stringify(getpid());
+ service_map_meta["frontend_type#" + fe_count] = "rgw-nfs";
+ service_map_meta["frontend_config#" + fe_count] = fec->get_config();
+
+ fe->init();
+ if (r < 0) {
+ derr << "ERROR: failed initializing frontend" << dendl;
+ return r;
+ }
+
+ fe->run();
+
+ r = store->register_to_service_map("rgw-nfs", service_map_meta);
+ if (r < 0) {
+ derr << "ERROR: failed to register to service map: " << cpp_strerror(-r) << dendl;
+ /* ignore error */
+ }
+
+ return 0;
+ } /* RGWLib::init() */
+
+ int RGWLib::stop()
+ {
+ derr << "shutting down" << dendl;
+
+ fe->stop();
+
+ fe->join();
+
+ delete fe;
+ delete fec;
+ delete ldh;
+
+ unregister_async_signal_handler(SIGUSR1, handle_sigterm);
+ shutdown_async_signal_handler();
+
+ rgw_log_usage_finalize();
+
+ delete olog;
+
+ RGWStoreManager::close_storage(store);
+
+ rgw_tools_cleanup();
+ rgw_shutdown_resolver();
+ rgw_http_client_cleanup();
+ rgw::curl::cleanup_curl();
+
+ rgw_perf_stop(g_ceph_context);
+
+ dout(1) << "final shutdown" << dendl;
+ cct.reset();
+
+ return 0;
+ } /* RGWLib::stop() */
+
+ int RGWLibIO::set_uid(RGWRados *store, const rgw_user& uid)
+ {
+ int ret = rgw_get_user_info_by_uid(store, uid, user_info, NULL);
+ if (ret < 0) {
+ derr << "ERROR: failed reading user info: uid=" << uid << " ret="
+ << ret << dendl;
+ }
+ return ret;
+ }
+
+ int RGWLibRequest::read_permissions(RGWOp* op) {
+ /* bucket and object ops */
+ int ret =
+ rgw_build_bucket_policies(rgwlib.get_store(), get_state());
+ if (ret < 0) {
+ ldout(get_state()->cct, 10) << "read_permissions (bucket policy) on "
+ << get_state()->bucket << ":"
+ << get_state()->object
+ << " only_bucket=" << only_bucket()
+ << " ret=" << ret << dendl;
+ if (ret == -ENODATA)
+ ret = -EACCES;
+ } else if (! only_bucket()) {
+ /* object ops */
+ ret = rgw_build_object_policies(rgwlib.get_store(), get_state(),
+ op->prefetch_data());
+ if (ret < 0) {
+ ldout(get_state()->cct, 10) << "read_permissions (object policy) on"
+ << get_state()->bucket << ":"
+ << get_state()->object
+ << " ret=" << ret << dendl;
+ if (ret == -ENODATA)
+ ret = -EACCES;
+ }
+ }
+ return ret;
+ } /* RGWLibRequest::read_permissions */
+
+ int RGWHandler_Lib::authorize(const DoutPrefixProvider *dpp)
+ {
+ /* TODO: handle
+ * 1. subusers
+ * 2. anonymous access
+ * 3. system access
+ * 4. ?
+ *
+ * Much or all of this depends on handling the cached authorization
+ * correctly (e.g., dealing with keystone) at mount time.
+ */
+ s->perm_mask = RGW_PERM_FULL_CONTROL;
+
+ // populate the owner info
+ s->owner.set_id(s->user->user_id);
+ s->owner.set_name(s->user->display_name);
+
+ return 0;
+ } /* RGWHandler_Lib::authorize */
+
+} /* namespace rgw */
+
+extern "C" {
+
+int librgw_create(librgw_t* rgw, int argc, char **argv)
+{
+ using namespace rgw;
+
+ int rc = -EINVAL;
+
+ if (! g_ceph_context) {
+ std::lock_guard<std::mutex> lg(librgw_mtx);
+ if (! g_ceph_context) {
+ vector<const char*> args;
+ std::vector<std::string> spl_args;
+ // last non-0 argument will be split and consumed
+ if (argc > 1) {
+ const std::string spl_arg{argv[(--argc)]};
+ get_str_vec(spl_arg, " \t", spl_args);
+ }
+ argv_to_vec(argc, const_cast<const char**>(argv), args);
+ // append split args, if any
+ for (const auto& elt : spl_args) {
+ args.push_back(elt.c_str());
+ }
+ rc = rgwlib.init(args);
+ }
+ }
+
+ *rgw = g_ceph_context->get();
+
+ return rc;
+}
+
+void librgw_shutdown(librgw_t rgw)
+{
+ using namespace rgw;
+
+ CephContext* cct = static_cast<CephContext*>(rgw);
+ rgwlib.stop();
+ cct->put();
+}
+
+} /* extern "C" */
diff --git a/src/rgw/librgw_admin_user.cc b/src/rgw/librgw_admin_user.cc
new file mode 100644
index 00000000..928f04cb
--- /dev/null
+++ b/src/rgw/librgw_admin_user.cc
@@ -0,0 +1,186 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * create rgw admin user
+ *
+ * Copyright (C) 2015 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "include/compat.h"
+#include <sys/types.h>
+#include <string.h>
+#include <chrono>
+#include <errno.h>
+#include <thread>
+#include <string>
+#include <mutex>
+
+#include "include/types.h"
+#include "include/rgw/librgw_admin_user.h"
+#include "include/str_list.h"
+#include "include/stringify.h"
+#include "global/global_init.h"
+#include "global/signal_handler.h"
+#include "common/config.h"
+#include "common/errno.h"
+#include "common/Timer.h"
+#include "common/Throttle.h"
+#include "common/WorkQueue.h"
+#include "common/ceph_argparse.h"
+#include "common/ceph_context.h"
+#include "common/common_init.h"
+#include "common/dout.h"
+
+#include "rgw_admin_user.h"
+#include "rgw_rados.h"
+#include "rgw_os_lib.h"
+#include "rgw_auth.h"
+#include "rgw_auth_s3.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+bool global_stop = false;
+
+static void handle_sigterm(int signum)
+{
+ dout(20) << __func__ << " SIGUSR1 ignored" << dendl;
+}
+
+namespace rgw {
+
+ using std::string;
+
+ static std::mutex librgw_admin_user_mtx;
+
+ RGWLibAdmin rgw_lib_admin;
+
+ class C_InitTimeout : public Context {
+ public:
+ C_InitTimeout() {}
+ void finish(int r) override {
+ derr << "Initialization timeout, failed to initialize" << dendl;
+ exit(1);
+ }
+ };
+
+ int RGWLibAdmin::init()
+ {
+ vector<const char*> args;
+ return init(args);
+ }
+
+ int RGWLibAdmin::init(vector<const char*>& args)
+ {
+ /* alternative default for module */
+ map<string,string> defaults = {
+ { "debug_rgw", "1/5" },
+ { "keyring", "$rgw_data/keyring" },
+ { "log_file", "/var/log/radosgw/$cluster-$name.log" }
+ };
+
+ cct = global_init(&defaults, args,
+ CEPH_ENTITY_TYPE_CLIENT,
+ CODE_ENVIRONMENT_UTILITY, 0);
+
+ Mutex mutex("main");
+ SafeTimer init_timer(g_ceph_context, mutex);
+ init_timer.init();
+ mutex.Lock();
+ init_timer.add_event_after(g_conf()->rgw_init_timeout, new C_InitTimeout);
+ mutex.Unlock();
+
+ common_init_finish(g_ceph_context);
+
+ store = RGWStoreManager::get_storage(g_ceph_context, false, false, false, false, false);
+
+ if (!store) {
+ mutex.Lock();
+ init_timer.cancel_all_events();
+ init_timer.shutdown();
+ mutex.Unlock();
+
+ derr << "Couldn't init storage provider (RADOS)" << dendl;
+ return -EIO;
+ }
+
+ mutex.Lock();
+ init_timer.cancel_all_events();
+ init_timer.shutdown();
+ mutex.Unlock();
+
+ rgw_user_init(store);
+
+ init_async_signal_handler();
+ register_async_signal_handler(SIGUSR1, handle_sigterm);
+
+ return 0;
+ } /* RGWLibAdmin::init() */
+
+ int RGWLibAdmin::stop()
+ {
+ derr << "shutting down" << dendl;
+
+ unregister_async_signal_handler(SIGUSR1, handle_sigterm);
+ shutdown_async_signal_handler();
+
+ RGWStoreManager::close_storage(store);
+
+ dout(1) << "final shutdown" << dendl;
+ cct.reset();
+
+ return 0;
+ } /* RGWLibAdmin::stop() */
+
+} /* namespace rgw */
+
+extern "C" {
+
+int librgw_admin_user_create(librgw_admin_user_t* rgw_admin_user, int argc, char **argv)
+{
+ using namespace rgw;
+
+ int rc = -EINVAL;
+
+ if (! g_ceph_context) {
+ std::lock_guard<std::mutex> lg(librgw_admin_user_mtx);
+ if (! g_ceph_context) {
+ vector<const char*> args;
+ std::vector<std::string> spl_args;
+ // last non-0 argument will be split and consumed
+ if (argc > 1) {
+ const std::string spl_arg{argv[(--argc)]};
+ get_str_vec(spl_arg, " \t", spl_args);
+ }
+ argv_to_vec(argc, const_cast<const char**>(argv), args);
+ // append split args, if any
+ for (const auto& elt : spl_args) {
+ args.push_back(elt.c_str());
+ }
+ rc = rgw_lib_admin.init(args);
+ }
+ }
+
+ *rgw_admin_user = g_ceph_context->get();
+
+ return rc;
+}
+
+void librgw_admin_user_shutdown(librgw_admin_user_t rgw_admin_user)
+{
+ using namespace rgw;
+
+ CephContext* cct = static_cast<CephContext*>(rgw_admin_user);
+ rgw_lib_admin.stop();
+ cct->put();
+}
+
+} /* extern "C" */
+
+
diff --git a/src/rgw/rgw-orphan-list b/src/rgw/rgw-orphan-list
new file mode 100755
index 00000000..7f60c651
--- /dev/null
+++ b/src/rgw/rgw-orphan-list
@@ -0,0 +1,144 @@
+#!/usr/bin/env bash
+
+# version 2020-10-20
+
+# IMPORTANT: affects order produced by 'sort' and 'ceph-diff-sorted'
+# relies on this ordering
+export LANG=C
+
+out_dir="."
+temp_file=/tmp/temp.$$
+timestamp=$(date -u +%Y%m%d%H%M%S)
+lspools_err="${out_dir}/lspools-${timestamp}.error"
+rados_out="${out_dir}/rados-${timestamp}.intermediate"
+rados_odd="${out_dir}/rados-${timestamp}.issues"
+rados_err="${out_dir}/rados-${timestamp}.error"
+rgwadmin_out="${out_dir}/radosgw-admin-${timestamp}.intermediate"
+rgwadmin_err="${out_dir}/radosgw-admin-${timestamp}.error"
+delta_out="${out_dir}/orphan-list-${timestamp}.out"
+
+error_out() {
+ echo "An error was encountered while running '$1'. Aborting."
+ if [ $# -gt 2 ] ;then
+ echo "Error: $3"
+ fi
+ if [ $# -gt 1 ] ;then
+ echo "Review file '$2' for details."
+ fi
+ echo "***"
+ echo "*** WARNING: The results are incomplete. Do not use! ***"
+ echo "***"
+ exit 1
+}
+
+prompt_pool() {
+ # note: all prompts go to stderr so stdout contains just the result
+ >&2 echo "Available pools:"
+ rados lspools >"$temp_file" 2>"$lspools_err"
+ if [ "$?" -ne 0 ] ;then
+ error_out "rados lspools" "$lspools_err"
+ fi
+ >&2 sed 's/^/ /' "$temp_file" # list pools and indent
+ >&2 printf "Which pool do you want to search for orphans? "
+ local mypool
+ read mypool
+ echo $mypool
+}
+
+if [ $# -eq 0 ] ;then
+ pool="$(prompt_pool)"
+elif [ $# -eq 1 ] ;then
+ pool="$1"
+else
+ error_out "Usage: $0 [pool]"
+fi
+
+echo "Pool is \"$pool\"."
+
+echo "Note: output files produced will be tagged with the current timestamp -- ${timestamp}."
+
+echo "running 'rados ls' at $(date)"
+# since --format is not specified, plain should be used
+rados ls --pool="$pool" --all >"$rados_out" 2>"$rados_err"
+if [ "$?" -ne 0 ] ;then
+ error_out "rados ls" "$rados_err"
+fi
+
+# NOTE: Each entry (line of output) of `rados ls --all` should be in
+# one of four formats depending on whether or not an entry has a
+# namespace and/or locator:
+#
+# <TAB>oid
+# <TAB>oid<TAB>locator
+# namespace<TAB>oid
+# namespace<TAB>oid<TAB>locator
+#
+# Any occurrences of the 2nd, 3rd, or 4th (i.e., existence of
+# namespace and/or locator) should cause the create of the "odd" file
+# and an explanation in the output, and those entries will not be
+# retained, and therefore they will not be called out as orphans. They
+# will need special handling by the end-user as we do not expect
+# namespaces or locators.
+
+# check for namespaces -- any line that does not begin with a tab
+# indicates a namespace; add those to "odd" file and set flag; note:
+# this also picks up entries with namespace and locator
+grep $'^[^\t]' "$rados_out" >"$rados_odd"
+if [ "${PIPESTATUS[0]}" -eq 0 ] ;then
+ namespace_found=1
+fi
+
+# check for locators (w/o namespace); we idenitfy them by skipping
+# past the empty namespace (i.e., one TAB), skipping past the oid,
+# then looking for a TAB; note we use egrep to get the '+' character
+# and the $ in front of the ' allows the \t to be interpreted as a TAB
+egrep $'^\t[[:graph:]]+\t' "$rados_out" >>"$rados_odd"
+if [ "${PIPESTATUS[0]}" -eq 0 ] ;then
+ locator_found=1
+fi
+
+# extract the entries that are just oids (i.e., no namespace or
+# locator) for further processing; only look at lines that begin with
+# a TAB and do not contain a second TAB, and then grab everything
+# after the initial TAB
+grep $'^\t' "$rados_out" | grep -v $'^\t.*\t' | sed -E 's/^\t//' >"$temp_file"
+mv -f "$temp_file" "$rados_out"
+
+sort -u "$rados_out" >"$temp_file"
+mv -f "$temp_file" "$rados_out"
+
+echo "running 'radosgw-admin bucket radoslist' at $(date)"
+radosgw-admin bucket radoslist >"$rgwadmin_out" 2>"$rgwadmin_err"
+if [ "$?" -ne 0 ] ;then
+ error_out "radosgw-admin radoslist" "$rgwadmin_err"
+fi
+sort -u "$rgwadmin_out" >"$temp_file"
+mv -f "$temp_file" "$rgwadmin_out"
+
+echo "computing delta at $(date)"
+ceph-diff-sorted "$rados_out" "$rgwadmin_out" | grep "^<" | sed 's/^< *//' >"$delta_out"
+# use PIPESTATUS to get at exit status of first process in above pipe;
+# 0 means same, 1 means different, >1 means error
+if [ "${PIPESTATUS[0]}" -gt 1 ] ;then
+ error_out "ceph-diff-sorted"
+fi
+
+found=$(wc -l < "$delta_out")
+possible=$(wc -l < "$rados_out")
+percentage=0
+if [ $possible -ne 0 ] ;then
+ percentage=$(expr 100 \* $found / $possible)
+fi
+
+echo "$found potential orphans found out of a possible $possible (${percentage}%)."
+echo "The results can be found in '${delta_out}'."
+echo " Intermediate files are '${rados_out}' and '${rgwadmin_out}'."
+if [ -n "$namespace_found" -o -n "$locator_found" ] ;then
+ echo " Note: 'rados ls' found entries that might be in a namespace or might"
+ echo " have a locator; see '${rados_odd}' for those entries."
+fi
+echo "***"
+echo "*** WARNING: This is EXPERIMENTAL code and the results should be used"
+echo "*** only with CAUTION!"
+echo "***"
+echo "Done at $(date)."
diff --git a/src/rgw/rgw_acl.cc b/src/rgw/rgw_acl.cc
new file mode 100644
index 00000000..8c02f8e3
--- /dev/null
+++ b/src/rgw/rgw_acl.cc
@@ -0,0 +1,174 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <string.h>
+
+#include <iostream>
+#include <map>
+
+#include "include/types.h"
+
+#include "common/Formatter.h"
+
+#include "rgw_acl.h"
+#include "rgw_user.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+
+void RGWAccessControlList::_add_grant(ACLGrant *grant)
+{
+ ACLPermission& perm = grant->get_permission();
+ ACLGranteeType& type = grant->get_type();
+ switch (type.get_type()) {
+ case ACL_TYPE_REFERER:
+ referer_list.emplace_back(grant->get_referer(), perm.get_permissions());
+
+ /* We're specially handling the Swift's .r:* as the S3 API has a similar
+ * concept and thus we can have a small portion of compatibility here. */
+ if (grant->get_referer() == RGW_REFERER_WILDCARD) {
+ acl_group_map[ACL_GROUP_ALL_USERS] |= perm.get_permissions();
+ }
+ break;
+ case ACL_TYPE_GROUP:
+ acl_group_map[grant->get_group()] |= perm.get_permissions();
+ break;
+ default:
+ {
+ rgw_user id;
+ if (!grant->get_id(id)) {
+ ldout(cct, 0) << "ERROR: grant->get_id() failed" << dendl;
+ }
+ acl_user_map[id.to_str()] |= perm.get_permissions();
+ }
+ }
+}
+
+void RGWAccessControlList::add_grant(ACLGrant *grant)
+{
+ rgw_user id;
+ grant->get_id(id); // not that this will return false for groups, but that's ok, we won't search groups
+ grant_map.insert(pair<string, ACLGrant>(id.to_str(), *grant));
+ _add_grant(grant);
+}
+
+uint32_t RGWAccessControlList::get_perm(const DoutPrefixProvider* dpp,
+ const rgw::auth::Identity& auth_identity,
+ const uint32_t perm_mask)
+{
+ ldpp_dout(dpp, 5) << "Searching permissions for identity=" << auth_identity
+ << " mask=" << perm_mask << dendl;
+
+ return perm_mask & auth_identity.get_perms_from_aclspec(dpp, acl_user_map);
+}
+
+uint32_t RGWAccessControlList::get_group_perm(ACLGroupTypeEnum group,
+ const uint32_t perm_mask)
+{
+ ldout(cct, 5) << "Searching permissions for group=" << (int)group
+ << " mask=" << perm_mask << dendl;
+
+ const auto iter = acl_group_map.find((uint32_t)group);
+ if (iter != acl_group_map.end()) {
+ ldout(cct, 5) << "Found permission: " << iter->second << dendl;
+ return iter->second & perm_mask;
+ }
+ ldout(cct, 5) << "Permissions for group not found" << dendl;
+ return 0;
+}
+
+uint32_t RGWAccessControlList::get_referer_perm(const uint32_t current_perm,
+ const std::string http_referer,
+ const uint32_t perm_mask)
+{
+ ldout(cct, 5) << "Searching permissions for referer=" << http_referer
+ << " mask=" << perm_mask << dendl;
+
+ /* This function is basically a transformation from current perm to
+ * a new one that takes into consideration the Swift's HTTP referer-
+ * based ACLs. We need to go through all items to respect negative
+ * grants. */
+ uint32_t referer_perm = current_perm;
+ for (const auto& r : referer_list) {
+ if (r.is_match(http_referer)) {
+ referer_perm = r.perm;
+ }
+ }
+
+ ldout(cct, 5) << "Found referer permission=" << referer_perm << dendl;
+ return referer_perm & perm_mask;
+}
+
+uint32_t RGWAccessControlPolicy::get_perm(const DoutPrefixProvider* dpp,
+ const rgw::auth::Identity& auth_identity,
+ const uint32_t perm_mask,
+ const char * const http_referer)
+{
+ ldpp_dout(dpp, 20) << "-- Getting permissions begin with perm_mask=" << perm_mask
+ << dendl;
+
+ uint32_t perm = acl.get_perm(dpp, auth_identity, perm_mask);
+
+ if (auth_identity.is_owner_of(owner.get_id())) {
+ perm |= perm_mask & (RGW_PERM_READ_ACP | RGW_PERM_WRITE_ACP);
+ }
+
+ if (perm == perm_mask) {
+ return perm;
+ }
+
+ /* should we continue looking up? */
+ if ((perm & perm_mask) != perm_mask) {
+ perm |= acl.get_group_perm(ACL_GROUP_ALL_USERS, perm_mask);
+
+ if (false == auth_identity.is_owner_of(rgw_user(RGW_USER_ANON_ID))) {
+ /* this is not the anonymous user */
+ perm |= acl.get_group_perm(ACL_GROUP_AUTHENTICATED_USERS, perm_mask);
+ }
+ }
+
+ /* Should we continue looking up even deeper? */
+ if (nullptr != http_referer && (perm & perm_mask) != perm_mask) {
+ perm = acl.get_referer_perm(perm, http_referer, perm_mask);
+ }
+
+ ldpp_dout(dpp, 5) << "-- Getting permissions done for identity=" << auth_identity
+ << ", owner=" << owner.get_id()
+ << ", perm=" << perm << dendl;
+
+ return perm;
+}
+
+bool RGWAccessControlPolicy::verify_permission(const DoutPrefixProvider* dpp,
+ const rgw::auth::Identity& auth_identity,
+ const uint32_t user_perm_mask,
+ const uint32_t perm,
+ const char * const http_referer)
+{
+ uint32_t test_perm = perm | RGW_PERM_READ_OBJS | RGW_PERM_WRITE_OBJS;
+
+ uint32_t policy_perm = get_perm(dpp, auth_identity, test_perm, http_referer);
+
+ /* the swift WRITE_OBJS perm is equivalent to the WRITE obj, just
+ convert those bits. Note that these bits will only be set on
+ buckets, so the swift READ permission on bucket will allow listing
+ the bucket content */
+ if (policy_perm & RGW_PERM_WRITE_OBJS) {
+ policy_perm |= (RGW_PERM_WRITE | RGW_PERM_WRITE_ACP);
+ }
+ if (policy_perm & RGW_PERM_READ_OBJS) {
+ policy_perm |= (RGW_PERM_READ | RGW_PERM_READ_ACP);
+ }
+
+ uint32_t acl_perm = policy_perm & perm & user_perm_mask;
+
+ ldpp_dout(dpp, 10) << " identity=" << auth_identity
+ << " requested perm (type)=" << perm
+ << ", policy perm=" << policy_perm
+ << ", user_perm_mask=" << user_perm_mask
+ << ", acl perm=" << acl_perm << dendl;
+
+ return (perm == acl_perm);
+}
+
+
diff --git a/src/rgw/rgw_acl.h b/src/rgw/rgw_acl.h
new file mode 100644
index 00000000..0e84d75e
--- /dev/null
+++ b/src/rgw/rgw_acl.h
@@ -0,0 +1,469 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_ACL_H
+#define CEPH_RGW_ACL_H
+
+#include <map>
+#include <string>
+#include <include/types.h>
+
+#include <boost/optional.hpp>
+#include <boost/utility/string_ref.hpp>
+
+#include "common/debug.h"
+
+#include "rgw_basic_types.h"
+
+#define RGW_PERM_NONE 0x00
+#define RGW_PERM_READ 0x01
+#define RGW_PERM_WRITE 0x02
+#define RGW_PERM_READ_ACP 0x04
+#define RGW_PERM_WRITE_ACP 0x08
+#define RGW_PERM_READ_OBJS 0x10
+#define RGW_PERM_WRITE_OBJS 0x20
+#define RGW_PERM_FULL_CONTROL ( RGW_PERM_READ | RGW_PERM_WRITE | \
+ RGW_PERM_READ_ACP | RGW_PERM_WRITE_ACP )
+#define RGW_PERM_ALL_S3 RGW_PERM_FULL_CONTROL
+#define RGW_PERM_INVALID 0xFF00
+
+static constexpr char RGW_REFERER_WILDCARD[] = "*";
+
+enum ACLGranteeTypeEnum {
+/* numbers are encoded, should not change */
+ ACL_TYPE_CANON_USER = 0,
+ ACL_TYPE_EMAIL_USER = 1,
+ ACL_TYPE_GROUP = 2,
+ ACL_TYPE_UNKNOWN = 3,
+ ACL_TYPE_REFERER = 4,
+};
+
+enum ACLGroupTypeEnum {
+/* numbers are encoded should not change */
+ ACL_GROUP_NONE = 0,
+ ACL_GROUP_ALL_USERS = 1,
+ ACL_GROUP_AUTHENTICATED_USERS = 2,
+};
+
+class ACLPermission
+{
+protected:
+ int flags;
+public:
+ ACLPermission() : flags(0) {}
+ ~ACLPermission() {}
+ uint32_t get_permissions() const { return flags; }
+ void set_permissions(uint32_t perm) { flags = perm; }
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(2, 2, bl);
+ encode(flags, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
+ decode(flags, bl);
+ DECODE_FINISH(bl);
+ }
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<ACLPermission*>& o);
+};
+WRITE_CLASS_ENCODER(ACLPermission)
+
+class ACLGranteeType
+{
+protected:
+ __u32 type;
+public:
+ ACLGranteeType() : type(ACL_TYPE_UNKNOWN) {}
+ virtual ~ACLGranteeType() {}
+// virtual const char *to_string() = 0;
+ ACLGranteeTypeEnum get_type() const { return (ACLGranteeTypeEnum)type; }
+ void set(ACLGranteeTypeEnum t) { type = t; }
+// virtual void set(const char *s) = 0;
+ void encode(bufferlist& bl) const {
+ ENCODE_START(2, 2, bl);
+ encode(type, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
+ decode(type, bl);
+ DECODE_FINISH(bl);
+ }
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<ACLGranteeType*>& o);
+};
+WRITE_CLASS_ENCODER(ACLGranteeType)
+
+class ACLGrantee
+{
+public:
+ ACLGrantee() {}
+ ~ACLGrantee() {}
+};
+
+
+class ACLGrant
+{
+protected:
+ ACLGranteeType type;
+ rgw_user id;
+ string email;
+ ACLPermission permission;
+ string name;
+ ACLGroupTypeEnum group;
+ string url_spec;
+
+public:
+ ACLGrant() : group(ACL_GROUP_NONE) {}
+ virtual ~ACLGrant() {}
+
+ /* there's an assumption here that email/uri/id encodings are
+ different and there can't be any overlap */
+ bool get_id(rgw_user& _id) const {
+ switch(type.get_type()) {
+ case ACL_TYPE_EMAIL_USER:
+ _id = email; // implies from_str() that parses the 't:u' syntax
+ return true;
+ case ACL_TYPE_GROUP:
+ case ACL_TYPE_REFERER:
+ return false;
+ default:
+ _id = id;
+ return true;
+ }
+ }
+ ACLGranteeType& get_type() { return type; }
+ const ACLGranteeType& get_type() const { return type; }
+ ACLPermission& get_permission() { return permission; }
+ const ACLPermission& get_permission() const { return permission; }
+ ACLGroupTypeEnum get_group() const { return group; }
+ const string& get_referer() const { return url_spec; }
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(5, 3, bl);
+ encode(type, bl);
+ string s;
+ id.to_str(s);
+ encode(s, bl);
+ string uri;
+ encode(uri, bl);
+ encode(email, bl);
+ encode(permission, bl);
+ encode(name, bl);
+ __u32 g = (__u32)group;
+ encode(g, bl);
+ encode(url_spec, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START_LEGACY_COMPAT_LEN(5, 3, 3, bl);
+ decode(type, bl);
+ string s;
+ decode(s, bl);
+ id.from_str(s);
+ string uri;
+ decode(uri, bl);
+ decode(email, bl);
+ decode(permission, bl);
+ decode(name, bl);
+ if (struct_v > 1) {
+ __u32 g;
+ decode(g, bl);
+ group = (ACLGroupTypeEnum)g;
+ } else {
+ group = uri_to_group(uri);
+ }
+ if (struct_v >= 5) {
+ decode(url_spec, bl);
+ } else {
+ url_spec.clear();
+ }
+ DECODE_FINISH(bl);
+ }
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<ACLGrant*>& o);
+
+ ACLGroupTypeEnum uri_to_group(string& uri);
+
+ void set_canon(const rgw_user& _id, const string& _name, const uint32_t perm) {
+ type.set(ACL_TYPE_CANON_USER);
+ id = _id;
+ name = _name;
+ permission.set_permissions(perm);
+ }
+ void set_group(ACLGroupTypeEnum _group, const uint32_t perm) {
+ type.set(ACL_TYPE_GROUP);
+ group = _group;
+ permission.set_permissions(perm);
+ }
+ void set_referer(const std::string& _url_spec, const uint32_t perm) {
+ type.set(ACL_TYPE_REFERER);
+ url_spec = _url_spec;
+ permission.set_permissions(perm);
+ }
+};
+WRITE_CLASS_ENCODER(ACLGrant)
+
+struct ACLReferer {
+ std::string url_spec;
+ uint32_t perm;
+
+ ACLReferer() : perm(0) {}
+ ACLReferer(const std::string& url_spec,
+ const uint32_t perm)
+ : url_spec(url_spec),
+ perm(perm) {
+ }
+
+ bool is_match(boost::string_ref http_referer) const {
+ const auto http_host = get_http_host(http_referer);
+ if (!http_host || http_host->length() < url_spec.length()) {
+ return false;
+ }
+
+ if ("*" == url_spec) {
+ return true;
+ }
+
+ if (http_host->compare(url_spec) == 0) {
+ return true;
+ }
+
+ if ('.' == url_spec[0]) {
+ /* Wildcard support: a referer matches the spec when its last char are
+ * perfectly equal to spec. */
+ return http_host->ends_with(url_spec);
+ }
+
+ return false;
+ }
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(url_spec, bl);
+ encode(perm, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START_LEGACY_COMPAT_LEN(1, 1, 1, bl);
+ decode(url_spec, bl);
+ decode(perm, bl);
+ DECODE_FINISH(bl);
+ }
+ void dump(Formatter *f) const;
+
+private:
+ boost::optional<boost::string_ref> get_http_host(const boost::string_ref url) const {
+ size_t pos = url.find("://");
+ if (pos == boost::string_ref::npos || url.starts_with("://") ||
+ url.ends_with("://") || url.ends_with('@')) {
+ return boost::none;
+ }
+ boost::string_ref url_sub = url.substr(pos + strlen("://"));
+ pos = url_sub.find('@');
+ if (pos != boost::string_ref::npos) {
+ url_sub = url_sub.substr(pos + 1);
+ }
+ pos = url_sub.find_first_of("/:");
+ if (pos == boost::string_ref::npos) {
+ /* no port or path exists */
+ return url_sub;
+ }
+ return url_sub.substr(0, pos);
+ }
+};
+WRITE_CLASS_ENCODER(ACLReferer)
+
+namespace rgw {
+namespace auth {
+ class Identity;
+}
+}
+
+class RGWAccessControlList
+{
+protected:
+ CephContext *cct;
+ /* FIXME: in the feature we should consider switching to uint32_t also
+ * in data structures. */
+ map<string, int> acl_user_map;
+ map<uint32_t, int> acl_group_map;
+ list<ACLReferer> referer_list;
+ multimap<string, ACLGrant> grant_map;
+ void _add_grant(ACLGrant *grant);
+public:
+ explicit RGWAccessControlList(CephContext *_cct) : cct(_cct) {}
+ RGWAccessControlList() : cct(NULL) {}
+
+ void set_ctx(CephContext *ctx) {
+ cct = ctx;
+ }
+
+ virtual ~RGWAccessControlList() {}
+
+ uint32_t get_perm(const DoutPrefixProvider* dpp,
+ const rgw::auth::Identity& auth_identity,
+ uint32_t perm_mask);
+ uint32_t get_group_perm(ACLGroupTypeEnum group, uint32_t perm_mask);
+ uint32_t get_referer_perm(uint32_t current_perm,
+ std::string http_referer,
+ uint32_t perm_mask);
+ void encode(bufferlist& bl) const {
+ ENCODE_START(4, 3, bl);
+ bool maps_initialized = true;
+ encode(maps_initialized, bl);
+ encode(acl_user_map, bl);
+ encode(grant_map, bl);
+ encode(acl_group_map, bl);
+ encode(referer_list, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START_LEGACY_COMPAT_LEN(4, 3, 3, bl);
+ bool maps_initialized;
+ decode(maps_initialized, bl);
+ decode(acl_user_map, bl);
+ decode(grant_map, bl);
+ if (struct_v >= 2) {
+ decode(acl_group_map, bl);
+ } else if (!maps_initialized) {
+ multimap<string, ACLGrant>::iterator iter;
+ for (iter = grant_map.begin(); iter != grant_map.end(); ++iter) {
+ ACLGrant& grant = iter->second;
+ _add_grant(&grant);
+ }
+ }
+ if (struct_v >= 4) {
+ decode(referer_list, bl);
+ }
+ DECODE_FINISH(bl);
+ }
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<RGWAccessControlList*>& o);
+
+ void add_grant(ACLGrant *grant);
+
+ multimap<string, ACLGrant>& get_grant_map() { return grant_map; }
+ const multimap<string, ACLGrant>& get_grant_map() const { return grant_map; }
+
+ void create_default(const rgw_user& id, string name) {
+ acl_user_map.clear();
+ acl_group_map.clear();
+ referer_list.clear();
+
+ ACLGrant grant;
+ grant.set_canon(id, name, RGW_PERM_FULL_CONTROL);
+ add_grant(&grant);
+ }
+};
+WRITE_CLASS_ENCODER(RGWAccessControlList)
+
+class ACLOwner
+{
+protected:
+ rgw_user id;
+ string display_name;
+public:
+ ACLOwner() {}
+ ~ACLOwner() {}
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(3, 2, bl);
+ string s;
+ id.to_str(s);
+ encode(s, bl);
+ encode(display_name, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START_LEGACY_COMPAT_LEN(3, 2, 2, bl);
+ string s;
+ decode(s, bl);
+ id.from_str(s);
+ decode(display_name, bl);
+ DECODE_FINISH(bl);
+ }
+ void dump(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+ static void generate_test_instances(list<ACLOwner*>& o);
+ void set_id(const rgw_user& _id) { id = _id; }
+ void set_name(const string& name) { display_name = name; }
+
+ rgw_user& get_id() { return id; }
+ const rgw_user& get_id() const { return id; }
+ string& get_display_name() { return display_name; }
+};
+WRITE_CLASS_ENCODER(ACLOwner)
+
+class RGWAccessControlPolicy
+{
+protected:
+ CephContext *cct;
+ RGWAccessControlList acl;
+ ACLOwner owner;
+
+public:
+ explicit RGWAccessControlPolicy(CephContext *_cct) : cct(_cct), acl(_cct) {}
+ RGWAccessControlPolicy() : cct(NULL), acl(NULL) {}
+ virtual ~RGWAccessControlPolicy() {}
+
+ void set_ctx(CephContext *ctx) {
+ cct = ctx;
+ acl.set_ctx(ctx);
+ }
+
+ uint32_t get_perm(const DoutPrefixProvider* dpp,
+ const rgw::auth::Identity& auth_identity,
+ uint32_t perm_mask,
+ const char * http_referer);
+ uint32_t get_group_perm(ACLGroupTypeEnum group, uint32_t perm_mask);
+ bool verify_permission(const DoutPrefixProvider* dpp,
+ const rgw::auth::Identity& auth_identity,
+ uint32_t user_perm_mask,
+ uint32_t perm,
+ const char * http_referer = nullptr);
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(2, 2, bl);
+ encode(owner, bl);
+ encode(acl, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
+ decode(owner, bl);
+ decode(acl, bl);
+ DECODE_FINISH(bl);
+ }
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<RGWAccessControlPolicy*>& o);
+ void decode_owner(bufferlist::const_iterator& bl) { // sometimes we only need that, should be faster
+ DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
+ decode(owner, bl);
+ DECODE_FINISH(bl);
+ }
+
+ void set_owner(ACLOwner& o) { owner = o; }
+ ACLOwner& get_owner() {
+ return owner;
+ }
+
+ void create_default(const rgw_user& id, string& name) {
+ acl.create_default(id, name);
+ owner.set_id(id);
+ owner.set_name(name);
+ }
+ RGWAccessControlList& get_acl() {
+ return acl;
+ }
+ const RGWAccessControlList& get_acl() const {
+ return acl;
+ }
+
+ virtual bool compare_group_name(string& id, ACLGroupTypeEnum group) { return false; }
+};
+WRITE_CLASS_ENCODER(RGWAccessControlPolicy)
+
+#endif
diff --git a/src/rgw/rgw_acl_s3.cc b/src/rgw/rgw_acl_s3.cc
new file mode 100644
index 00000000..5f026ff3
--- /dev/null
+++ b/src/rgw/rgw_acl_s3.cc
@@ -0,0 +1,616 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <string.h>
+
+#include <iostream>
+#include <map>
+
+#include "include/types.h"
+
+#include "rgw_acl_s3.h"
+#include "rgw_user.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+
+
+#define RGW_URI_ALL_USERS "http://acs.amazonaws.com/groups/global/AllUsers"
+#define RGW_URI_AUTH_USERS "http://acs.amazonaws.com/groups/global/AuthenticatedUsers"
+
+static string rgw_uri_all_users = RGW_URI_ALL_USERS;
+static string rgw_uri_auth_users = RGW_URI_AUTH_USERS;
+
+void ACLPermission_S3::to_xml(ostream& out)
+{
+ if ((flags & RGW_PERM_FULL_CONTROL) == RGW_PERM_FULL_CONTROL) {
+ out << "<Permission>FULL_CONTROL</Permission>";
+ } else {
+ if (flags & RGW_PERM_READ)
+ out << "<Permission>READ</Permission>";
+ if (flags & RGW_PERM_WRITE)
+ out << "<Permission>WRITE</Permission>";
+ if (flags & RGW_PERM_READ_ACP)
+ out << "<Permission>READ_ACP</Permission>";
+ if (flags & RGW_PERM_WRITE_ACP)
+ out << "<Permission>WRITE_ACP</Permission>";
+ }
+}
+
+bool ACLPermission_S3::
+xml_end(const char *el)
+{
+ const char *s = data.c_str();
+ if (strcasecmp(s, "READ") == 0) {
+ flags |= RGW_PERM_READ;
+ return true;
+ } else if (strcasecmp(s, "WRITE") == 0) {
+ flags |= RGW_PERM_WRITE;
+ return true;
+ } else if (strcasecmp(s, "READ_ACP") == 0) {
+ flags |= RGW_PERM_READ_ACP;
+ return true;
+ } else if (strcasecmp(s, "WRITE_ACP") == 0) {
+ flags |= RGW_PERM_WRITE_ACP;
+ return true;
+ } else if (strcasecmp(s, "FULL_CONTROL") == 0) {
+ flags |= RGW_PERM_FULL_CONTROL;
+ return true;
+ }
+ return false;
+}
+
+
+class ACLGranteeType_S3 {
+public:
+ static const char *to_string(ACLGranteeType& type) {
+ switch (type.get_type()) {
+ case ACL_TYPE_CANON_USER:
+ return "CanonicalUser";
+ case ACL_TYPE_EMAIL_USER:
+ return "AmazonCustomerByEmail";
+ case ACL_TYPE_GROUP:
+ return "Group";
+ default:
+ return "unknown";
+ }
+ }
+
+ static void set(const char *s, ACLGranteeType& type) {
+ if (!s) {
+ type.set(ACL_TYPE_UNKNOWN);
+ return;
+ }
+ if (strcmp(s, "CanonicalUser") == 0)
+ type.set(ACL_TYPE_CANON_USER);
+ else if (strcmp(s, "AmazonCustomerByEmail") == 0)
+ type.set(ACL_TYPE_EMAIL_USER);
+ else if (strcmp(s, "Group") == 0)
+ type.set(ACL_TYPE_GROUP);
+ else
+ type.set(ACL_TYPE_UNKNOWN);
+ }
+};
+
+class ACLID_S3 : public XMLObj
+{
+public:
+ ACLID_S3() {}
+ ~ACLID_S3() override {}
+ string& to_str() { return data; }
+};
+
+class ACLURI_S3 : public XMLObj
+{
+public:
+ ACLURI_S3() {}
+ ~ACLURI_S3() override {}
+};
+
+class ACLEmail_S3 : public XMLObj
+{
+public:
+ ACLEmail_S3() {}
+ ~ACLEmail_S3() override {}
+};
+
+class ACLDisplayName_S3 : public XMLObj
+{
+public:
+ ACLDisplayName_S3() {}
+ ~ACLDisplayName_S3() override {}
+};
+
+bool ACLOwner_S3::xml_end(const char *el) {
+ ACLID_S3 *acl_id = static_cast<ACLID_S3 *>(find_first("ID"));
+ ACLID_S3 *acl_name = static_cast<ACLID_S3 *>(find_first("DisplayName"));
+
+ // ID is mandatory
+ if (!acl_id)
+ return false;
+ id = acl_id->get_data();
+
+ // DisplayName is optional
+ if (acl_name)
+ display_name = acl_name->get_data();
+ else
+ display_name = "";
+
+ return true;
+}
+
+void ACLOwner_S3::to_xml(ostream& out) {
+ string s;
+ id.to_str(s);
+ if (s.empty())
+ return;
+ out << "<Owner>" << "<ID>" << s << "</ID>";
+ if (!display_name.empty())
+ out << "<DisplayName>" << display_name << "</DisplayName>";
+ out << "</Owner>";
+}
+
+bool ACLGrant_S3::xml_end(const char *el) {
+ ACLGrantee_S3 *acl_grantee;
+ ACLID_S3 *acl_id;
+ ACLURI_S3 *acl_uri;
+ ACLEmail_S3 *acl_email;
+ ACLPermission_S3 *acl_permission;
+ ACLDisplayName_S3 *acl_name;
+ string uri;
+
+ acl_grantee = static_cast<ACLGrantee_S3 *>(find_first("Grantee"));
+ if (!acl_grantee)
+ return false;
+ string type_str;
+ if (!acl_grantee->get_attr("xsi:type", type_str))
+ return false;
+ ACLGranteeType_S3::set(type_str.c_str(), type);
+
+ acl_permission = static_cast<ACLPermission_S3 *>(find_first("Permission"));
+ if (!acl_permission)
+ return false;
+
+ permission = *acl_permission;
+
+ id.clear();
+ name.clear();
+ email.clear();
+
+ switch (type.get_type()) {
+ case ACL_TYPE_CANON_USER:
+ acl_id = static_cast<ACLID_S3 *>(acl_grantee->find_first("ID"));
+ if (!acl_id)
+ return false;
+ id = acl_id->to_str();
+ acl_name = static_cast<ACLDisplayName_S3 *>(acl_grantee->find_first("DisplayName"));
+ if (acl_name)
+ name = acl_name->get_data();
+ break;
+ case ACL_TYPE_GROUP:
+ acl_uri = static_cast<ACLURI_S3 *>(acl_grantee->find_first("URI"));
+ if (!acl_uri)
+ return false;
+ uri = acl_uri->get_data();
+ group = uri_to_group(uri);
+ break;
+ case ACL_TYPE_EMAIL_USER:
+ acl_email = static_cast<ACLEmail_S3 *>(acl_grantee->find_first("EmailAddress"));
+ if (!acl_email)
+ return false;
+ email = acl_email->get_data();
+ break;
+ default:
+ // unknown user type
+ return false;
+ };
+ return true;
+}
+
+void ACLGrant_S3::to_xml(CephContext *cct, ostream& out) {
+ ACLPermission_S3& perm = static_cast<ACLPermission_S3 &>(permission);
+
+ /* only show s3 compatible permissions */
+ if (!(perm.get_permissions() & RGW_PERM_ALL_S3))
+ return;
+
+ string uri;
+
+ out << "<Grant>" <<
+ "<Grantee xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:type=\"" << ACLGranteeType_S3::to_string(type) << "\">";
+ switch (type.get_type()) {
+ case ACL_TYPE_CANON_USER:
+ out << "<ID>" << id << "</ID>";
+ if (name.size()) {
+ out << "<DisplayName>" << name << "</DisplayName>";
+ }
+ break;
+ case ACL_TYPE_EMAIL_USER:
+ out << "<EmailAddress>" << email << "</EmailAddress>";
+ break;
+ case ACL_TYPE_GROUP:
+ if (!group_to_uri(group, uri)) {
+ ldout(cct, 0) << "ERROR: group_to_uri failed with group=" << (int)group << dendl;
+ break;
+ }
+ out << "<URI>" << uri << "</URI>";
+ break;
+ default:
+ break;
+ }
+ out << "</Grantee>";
+ perm.to_xml(out);
+ out << "</Grant>";
+}
+
+bool ACLGrant_S3::group_to_uri(ACLGroupTypeEnum group, string& uri)
+{
+ switch (group) {
+ case ACL_GROUP_ALL_USERS:
+ uri = rgw_uri_all_users;
+ return true;
+ case ACL_GROUP_AUTHENTICATED_USERS:
+ uri = rgw_uri_auth_users;
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool RGWAccessControlList_S3::xml_end(const char *el) {
+ XMLObjIter iter = find("Grant");
+ ACLGrant_S3 *grant = static_cast<ACLGrant_S3 *>(iter.get_next());
+ while (grant) {
+ add_grant(grant);
+ grant = static_cast<ACLGrant_S3 *>(iter.get_next());
+ }
+ return true;
+}
+
+void RGWAccessControlList_S3::to_xml(ostream& out) {
+ multimap<string, ACLGrant>::iterator iter;
+ out << "<AccessControlList>";
+ for (iter = grant_map.begin(); iter != grant_map.end(); ++iter) {
+ ACLGrant_S3& grant = static_cast<ACLGrant_S3 &>(iter->second);
+ grant.to_xml(cct, out);
+ }
+ out << "</AccessControlList>";
+}
+
+struct s3_acl_header {
+ int rgw_perm;
+ const char *http_header;
+};
+
+static const char *get_acl_header(const RGWEnv *env,
+ const struct s3_acl_header *perm)
+{
+ const char *header = perm->http_header;
+
+ return env->get(header, NULL);
+}
+
+static int parse_grantee_str(RGWRados *store, string& grantee_str,
+ const struct s3_acl_header *perm, ACLGrant& grant)
+{
+ string id_type, id_val_quoted;
+ int rgw_perm = perm->rgw_perm;
+ int ret;
+
+ RGWUserInfo info;
+
+ ret = parse_key_value(grantee_str, id_type, id_val_quoted);
+ if (ret < 0)
+ return ret;
+
+ string id_val = rgw_trim_quotes(id_val_quoted);
+
+ if (strcasecmp(id_type.c_str(), "emailAddress") == 0) {
+ ret = rgw_get_user_info_by_email(store, id_val, info);
+ if (ret < 0)
+ return ret;
+
+ grant.set_canon(info.user_id, info.display_name, rgw_perm);
+ } else if (strcasecmp(id_type.c_str(), "id") == 0) {
+ rgw_user user(id_val);
+ ret = rgw_get_user_info_by_uid(store, user, info);
+ if (ret < 0)
+ return ret;
+
+ grant.set_canon(info.user_id, info.display_name, rgw_perm);
+ } else if (strcasecmp(id_type.c_str(), "uri") == 0) {
+ ACLGroupTypeEnum gid = grant.uri_to_group(id_val);
+ if (gid == ACL_GROUP_NONE)
+ return -EINVAL;
+
+ grant.set_group(gid, rgw_perm);
+ } else {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int parse_acl_header(RGWRados *store, const RGWEnv *env,
+ const struct s3_acl_header *perm, std::list<ACLGrant>& _grants)
+{
+ std::list<string> grantees;
+ std::string hacl_str;
+
+ const char *hacl = get_acl_header(env, perm);
+ if (hacl == NULL)
+ return 0;
+
+ hacl_str = hacl;
+ get_str_list(hacl_str, ",", grantees);
+
+ for (list<string>::iterator it = grantees.begin(); it != grantees.end(); ++it) {
+ ACLGrant grant;
+ int ret = parse_grantee_str(store, *it, perm, grant);
+ if (ret < 0)
+ return ret;
+
+ _grants.push_back(grant);
+ }
+
+ return 0;
+}
+
+int RGWAccessControlList_S3::create_canned(ACLOwner& owner, ACLOwner& bucket_owner, const string& canned_acl)
+{
+ acl_user_map.clear();
+ grant_map.clear();
+
+ ACLGrant owner_grant;
+
+ rgw_user bid = bucket_owner.get_id();
+ string bname = bucket_owner.get_display_name();
+
+ /* owner gets full control */
+ owner_grant.set_canon(owner.get_id(), owner.get_display_name(), RGW_PERM_FULL_CONTROL);
+ add_grant(&owner_grant);
+
+ if (canned_acl.size() == 0 || canned_acl.compare("private") == 0) {
+ return 0;
+ }
+
+ ACLGrant bucket_owner_grant;
+ ACLGrant group_grant;
+ if (canned_acl.compare("public-read") == 0) {
+ group_grant.set_group(ACL_GROUP_ALL_USERS, RGW_PERM_READ);
+ add_grant(&group_grant);
+ } else if (canned_acl.compare("public-read-write") == 0) {
+ group_grant.set_group(ACL_GROUP_ALL_USERS, RGW_PERM_READ);
+ add_grant(&group_grant);
+ group_grant.set_group(ACL_GROUP_ALL_USERS, RGW_PERM_WRITE);
+ add_grant(&group_grant);
+ } else if (canned_acl.compare("authenticated-read") == 0) {
+ group_grant.set_group(ACL_GROUP_AUTHENTICATED_USERS, RGW_PERM_READ);
+ add_grant(&group_grant);
+ } else if (canned_acl.compare("bucket-owner-read") == 0) {
+ bucket_owner_grant.set_canon(bid, bname, RGW_PERM_READ);
+ if (bid.compare(owner.get_id()) != 0)
+ add_grant(&bucket_owner_grant);
+ } else if (canned_acl.compare("bucket-owner-full-control") == 0) {
+ bucket_owner_grant.set_canon(bid, bname, RGW_PERM_FULL_CONTROL);
+ if (bid.compare(owner.get_id()) != 0)
+ add_grant(&bucket_owner_grant);
+ } else {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int RGWAccessControlList_S3::create_from_grants(std::list<ACLGrant>& grants)
+{
+ if (grants.empty())
+ return -EINVAL;
+
+ acl_user_map.clear();
+ grant_map.clear();
+
+ for (std::list<ACLGrant>::iterator it = grants.begin(); it != grants.end(); ++it) {
+ ACLGrant g = *it;
+ add_grant(&g);
+ }
+
+ return 0;
+}
+
+bool RGWAccessControlPolicy_S3::xml_end(const char *el) {
+ RGWAccessControlList_S3 *s3acl =
+ static_cast<RGWAccessControlList_S3 *>(find_first("AccessControlList"));
+ if (!s3acl)
+ return false;
+
+ acl = *s3acl;
+
+ ACLOwner *owner_p = static_cast<ACLOwner_S3 *>(find_first("Owner"));
+ if (!owner_p)
+ return false;
+ owner = *owner_p;
+ return true;
+}
+
+void RGWAccessControlPolicy_S3::to_xml(ostream& out) {
+ out << "<AccessControlPolicy xmlns=\"" << XMLNS_AWS_S3 << "\">";
+ ACLOwner_S3& _owner = static_cast<ACLOwner_S3 &>(owner);
+ RGWAccessControlList_S3& _acl = static_cast<RGWAccessControlList_S3 &>(acl);
+ _owner.to_xml(out);
+ _acl.to_xml(out);
+ out << "</AccessControlPolicy>";
+}
+
+static const s3_acl_header acl_header_perms[] = {
+ {RGW_PERM_READ, "HTTP_X_AMZ_GRANT_READ"},
+ {RGW_PERM_WRITE, "HTTP_X_AMZ_GRANT_WRITE"},
+ {RGW_PERM_READ_ACP,"HTTP_X_AMZ_GRANT_READ_ACP"},
+ {RGW_PERM_WRITE_ACP, "HTTP_X_AMZ_GRANT_WRITE_ACP"},
+ {RGW_PERM_FULL_CONTROL, "HTTP_X_AMZ_GRANT_FULL_CONTROL"},
+ {0, NULL}
+};
+
+int RGWAccessControlPolicy_S3::create_from_headers(RGWRados *store, const RGWEnv *env, ACLOwner& _owner)
+{
+ std::list<ACLGrant> grants;
+ int r = 0;
+
+ for (const struct s3_acl_header *p = acl_header_perms; p->rgw_perm; p++) {
+ r = parse_acl_header(store, env, p, grants);
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ RGWAccessControlList_S3& _acl = static_cast<RGWAccessControlList_S3 &>(acl);
+ r = _acl.create_from_grants(grants);
+
+ owner = _owner;
+
+ return r;
+}
+
+/*
+ can only be called on object that was parsed
+ */
+int RGWAccessControlPolicy_S3::rebuild(RGWRados *store, ACLOwner *owner, RGWAccessControlPolicy& dest)
+{
+ if (!owner)
+ return -EINVAL;
+
+ ACLOwner *requested_owner = static_cast<ACLOwner_S3 *>(find_first("Owner"));
+ if (requested_owner) {
+ rgw_user& requested_id = requested_owner->get_id();
+ if (!requested_id.empty() && requested_id.compare(owner->get_id()) != 0)
+ return -EPERM;
+ }
+
+ RGWUserInfo owner_info;
+ if (rgw_get_user_info_by_uid(store, owner->get_id(), owner_info) < 0) {
+ ldout(cct, 10) << "owner info does not exist" << dendl;
+ return -EINVAL;
+ }
+ ACLOwner& dest_owner = dest.get_owner();
+ dest_owner.set_id(owner->get_id());
+ dest_owner.set_name(owner_info.display_name);
+
+ ldout(cct, 20) << "owner id=" << owner->get_id() << dendl;
+ ldout(cct, 20) << "dest owner id=" << dest.get_owner().get_id() << dendl;
+
+ RGWAccessControlList& dst_acl = dest.get_acl();
+
+ multimap<string, ACLGrant>& grant_map = acl.get_grant_map();
+ multimap<string, ACLGrant>::iterator iter;
+ for (iter = grant_map.begin(); iter != grant_map.end(); ++iter) {
+ ACLGrant& src_grant = iter->second;
+ ACLGranteeType& type = src_grant.get_type();
+ ACLGrant new_grant;
+ bool grant_ok = false;
+ rgw_user uid;
+ RGWUserInfo grant_user;
+ switch (type.get_type()) {
+ case ACL_TYPE_EMAIL_USER:
+ {
+ string email;
+ rgw_user u;
+ if (!src_grant.get_id(u)) {
+ ldout(cct, 0) << "ERROR: src_grant.get_id() failed" << dendl;
+ return -EINVAL;
+ }
+ email = u.id;
+ ldout(cct, 10) << "grant user email=" << email << dendl;
+ if (rgw_get_user_info_by_email(store, email, grant_user) < 0) {
+ ldout(cct, 10) << "grant user email not found or other error" << dendl;
+ return -ERR_UNRESOLVABLE_EMAIL;
+ }
+ uid = grant_user.user_id;
+ }
+ case ACL_TYPE_CANON_USER:
+ {
+ if (type.get_type() == ACL_TYPE_CANON_USER) {
+ if (!src_grant.get_id(uid)) {
+ ldout(cct, 0) << "ERROR: src_grant.get_id() failed" << dendl;
+ return -EINVAL;
+ }
+ }
+
+ if (grant_user.user_id.empty() && rgw_get_user_info_by_uid(store, uid, grant_user) < 0) {
+ ldout(cct, 10) << "grant user does not exist:" << uid << dendl;
+ return -EINVAL;
+ } else {
+ ACLPermission& perm = src_grant.get_permission();
+ new_grant.set_canon(uid, grant_user.display_name, perm.get_permissions());
+ grant_ok = true;
+ rgw_user new_id;
+ new_grant.get_id(new_id);
+ ldout(cct, 10) << "new grant: " << new_id << ":" << grant_user.display_name << dendl;
+ }
+ }
+ break;
+ case ACL_TYPE_GROUP:
+ {
+ string uri;
+ if (ACLGrant_S3::group_to_uri(src_grant.get_group(), uri)) {
+ new_grant = src_grant;
+ grant_ok = true;
+ ldout(cct, 10) << "new grant: " << uri << dendl;
+ } else {
+ ldout(cct, 10) << "bad grant group:" << (int)src_grant.get_group() << dendl;
+ return -EINVAL;
+ }
+ }
+ default:
+ break;
+ }
+ if (grant_ok) {
+ dst_acl.add_grant(&new_grant);
+ }
+ }
+
+ return 0;
+}
+
+bool RGWAccessControlPolicy_S3::compare_group_name(string& id, ACLGroupTypeEnum group)
+{
+ switch (group) {
+ case ACL_GROUP_ALL_USERS:
+ return (id.compare(RGW_USER_ANON_ID) == 0);
+ case ACL_GROUP_AUTHENTICATED_USERS:
+ return (id.compare(rgw_uri_auth_users) == 0);
+ default:
+ return id.empty();
+ }
+
+ // shouldn't get here
+ return false;
+}
+
+XMLObj *RGWACLXMLParser_S3::alloc_obj(const char *el)
+{
+ XMLObj * obj = NULL;
+ if (strcmp(el, "AccessControlPolicy") == 0) {
+ obj = new RGWAccessControlPolicy_S3(cct);
+ } else if (strcmp(el, "Owner") == 0) {
+ obj = new ACLOwner_S3();
+ } else if (strcmp(el, "AccessControlList") == 0) {
+ obj = new RGWAccessControlList_S3(cct);
+ } else if (strcmp(el, "ID") == 0) {
+ obj = new ACLID_S3();
+ } else if (strcmp(el, "DisplayName") == 0) {
+ obj = new ACLDisplayName_S3();
+ } else if (strcmp(el, "Grant") == 0) {
+ obj = new ACLGrant_S3();
+ } else if (strcmp(el, "Grantee") == 0) {
+ obj = new ACLGrantee_S3();
+ } else if (strcmp(el, "Permission") == 0) {
+ obj = new ACLPermission_S3();
+ } else if (strcmp(el, "URI") == 0) {
+ obj = new ACLURI_S3();
+ } else if (strcmp(el, "EmailAddress") == 0) {
+ obj = new ACLEmail_S3();
+ }
+
+ return obj;
+}
+
diff --git a/src/rgw/rgw_acl_s3.h b/src/rgw/rgw_acl_s3.h
new file mode 100644
index 00000000..41877667
--- /dev/null
+++ b/src/rgw/rgw_acl_s3.h
@@ -0,0 +1,111 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_ACL_S3_H
+#define CEPH_RGW_ACL_S3_H
+
+#include <map>
+#include <string>
+#include <iosfwd>
+#include <include/types.h>
+
+#include "include/str_list.h"
+#include "rgw_xml.h"
+#include "rgw_acl.h"
+
+class RGWRados;
+
+class ACLPermission_S3 : public ACLPermission, public XMLObj
+{
+public:
+ ACLPermission_S3() {}
+ ~ACLPermission_S3() override {}
+
+ bool xml_end(const char *el) override;
+ void to_xml(ostream& out);
+};
+
+class ACLGrantee_S3 : public ACLGrantee, public XMLObj
+{
+public:
+ ACLGrantee_S3() {}
+ ~ACLGrantee_S3() override {}
+
+ bool xml_start(const char *el, const char **attr);
+};
+
+
+class ACLGrant_S3 : public ACLGrant, public XMLObj
+{
+public:
+ ACLGrant_S3() {}
+ ~ACLGrant_S3() override {}
+
+ void to_xml(CephContext *cct, ostream& out);
+ bool xml_end(const char *el) override;
+ bool xml_start(const char *el, const char **attr);
+
+ static ACLGroupTypeEnum uri_to_group(string& uri);
+ static bool group_to_uri(ACLGroupTypeEnum group, string& uri);
+};
+
+class RGWAccessControlList_S3 : public RGWAccessControlList, public XMLObj
+{
+public:
+ explicit RGWAccessControlList_S3(CephContext *_cct) : RGWAccessControlList(_cct) {}
+ ~RGWAccessControlList_S3() override {}
+
+ bool xml_end(const char *el) override;
+ void to_xml(ostream& out);
+
+ int create_canned(ACLOwner& owner, ACLOwner& bucket_owner, const string& canned_acl);
+ int create_from_grants(std::list<ACLGrant>& grants);
+};
+
+class ACLOwner_S3 : public ACLOwner, public XMLObj
+{
+public:
+ ACLOwner_S3() {}
+ ~ACLOwner_S3() override {}
+
+ bool xml_end(const char *el) override;
+ void to_xml(ostream& out);
+};
+
+class RGWEnv;
+
+class RGWAccessControlPolicy_S3 : public RGWAccessControlPolicy, public XMLObj
+{
+public:
+ explicit RGWAccessControlPolicy_S3(CephContext *_cct) : RGWAccessControlPolicy(_cct) {}
+ ~RGWAccessControlPolicy_S3() override {}
+
+ bool xml_end(const char *el) override;
+
+ void to_xml(ostream& out);
+ int rebuild(RGWRados *store, ACLOwner *owner, RGWAccessControlPolicy& dest);
+ bool compare_group_name(string& id, ACLGroupTypeEnum group) override;
+
+ virtual int create_canned(ACLOwner& _owner, ACLOwner& bucket_owner, const string& canned_acl) {
+ RGWAccessControlList_S3& _acl = static_cast<RGWAccessControlList_S3 &>(acl);
+ int ret = _acl.create_canned(_owner, bucket_owner, canned_acl);
+ owner = _owner;
+ return ret;
+ }
+ int create_from_headers(RGWRados *store, const RGWEnv *env, ACLOwner& _owner);
+};
+
+/**
+ * Interfaces with the webserver's XML handling code
+ * to parse it in a way that makes sense for the rgw.
+ */
+class RGWACLXMLParser_S3 : public RGWXMLParser
+{
+ CephContext *cct;
+
+ XMLObj *alloc_obj(const char *el) override;
+public:
+ explicit RGWACLXMLParser_S3(CephContext *_cct) : cct(_cct) {}
+};
+
+#endif
diff --git a/src/rgw/rgw_acl_swift.cc b/src/rgw/rgw_acl_swift.cc
new file mode 100644
index 00000000..18a99912
--- /dev/null
+++ b/src/rgw/rgw_acl_swift.cc
@@ -0,0 +1,430 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <string.h>
+
+#include <vector>
+
+#include <boost/algorithm/string/predicate.hpp>
+
+#include "common/ceph_json.h"
+#include "rgw_common.h"
+#include "rgw_user.h"
+#include "rgw_acl_swift.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+
+#define SWIFT_PERM_READ RGW_PERM_READ_OBJS
+#define SWIFT_PERM_WRITE RGW_PERM_WRITE_OBJS
+/* FIXME: do we really need separate RW? */
+#define SWIFT_PERM_RWRT (SWIFT_PERM_READ | SWIFT_PERM_WRITE)
+#define SWIFT_PERM_ADMIN RGW_PERM_FULL_CONTROL
+
+#define SWIFT_GROUP_ALL_USERS ".r:*"
+
+static int parse_list(const char* uid_list,
+ std::vector<std::string>& uids) /* out */
+{
+ char *s = strdup(uid_list);
+ if (!s) {
+ return -ENOMEM;
+ }
+
+ char *tokctx;
+ const char *p = strtok_r(s, " ,", &tokctx);
+ while (p) {
+ if (*p) {
+ string acl = p;
+ uids.push_back(acl);
+ }
+ p = strtok_r(NULL, " ,", &tokctx);
+ }
+ free(s);
+ return 0;
+}
+
+static bool is_referrer(const std::string& designator)
+{
+ return designator.compare(".r") == 0 ||
+ designator.compare(".ref") == 0 ||
+ designator.compare(".referer") == 0 ||
+ designator.compare(".referrer") == 0;
+}
+
+static bool uid_is_public(const string& uid)
+{
+ if (uid[0] != '.' || uid[1] != 'r')
+ return false;
+
+ int pos = uid.find(':');
+ if (pos < 0 || pos == (int)uid.size())
+ return false;
+
+ string sub = uid.substr(0, pos);
+ string after = uid.substr(pos + 1);
+
+ if (after.compare("*") != 0)
+ return false;
+
+ return is_referrer(sub);
+}
+
+static boost::optional<ACLGrant> referrer_to_grant(std::string url_spec,
+ const uint32_t perm)
+{
+ /* This function takes url_spec as non-ref std::string because of the trim
+ * operation that is essential to preserve compliance with Swift. It can't
+ * be easily accomplished with boost::string_ref. */
+ try {
+ bool is_negative;
+ ACLGrant grant;
+
+ if ('-' == url_spec[0]) {
+ url_spec = url_spec.substr(1);
+ boost::algorithm::trim(url_spec);
+
+ is_negative = true;
+ } else {
+ is_negative = false;
+ }
+
+ if (url_spec != RGW_REFERER_WILDCARD) {
+ if ('*' == url_spec[0]) {
+ url_spec = url_spec.substr(1);
+ boost::algorithm::trim(url_spec);
+ }
+
+ if (url_spec.empty() || url_spec == ".") {
+ return boost::none;
+ }
+ } else {
+ /* Please be aware we're specially handling the .r:* in _add_grant()
+ * of RGWAccessControlList as the S3 API has a similar concept, and
+ * thus we can have a small portion of compatibility. */
+ }
+
+ grant.set_referer(url_spec, is_negative ? 0 : perm);
+ return grant;
+ } catch (const std::out_of_range&) {
+ return boost::none;
+ }
+}
+
+static ACLGrant user_to_grant(CephContext* const cct,
+ RGWRados* const store,
+ const std::string& uid,
+ const uint32_t perm)
+{
+ rgw_user user(uid);
+ RGWUserInfo grant_user;
+ ACLGrant grant;
+
+ if (rgw_get_user_info_by_uid(store, user, grant_user) < 0) {
+ ldout(cct, 10) << "grant user does not exist: " << uid << dendl;
+ /* skipping silently */
+ grant.set_canon(user, std::string(), perm);
+ } else {
+ grant.set_canon(user, grant_user.display_name, perm);
+ }
+
+ return grant;
+}
+
+int RGWAccessControlPolicy_SWIFT::add_grants(RGWRados* const store,
+ const std::vector<std::string>& uids,
+ const uint32_t perm)
+{
+ for (const auto& uid : uids) {
+ boost::optional<ACLGrant> grant;
+ ldout(cct, 20) << "trying to add grant for ACL uid=" << uid << dendl;
+
+ /* Let's check whether the item has a separator potentially indicating
+ * a special meaning (like an HTTP referral-based grant). */
+ const size_t pos = uid.find(':');
+ if (std::string::npos == pos) {
+ /* No, it don't have -- we've got just a regular user identifier. */
+ grant = user_to_grant(cct, store, uid, perm);
+ } else {
+ /* Yes, *potentially* an HTTP referral. */
+ auto designator = uid.substr(0, pos);
+ auto designatee = uid.substr(pos + 1);
+
+ /* Swift strips whitespaces at both beginning and end. */
+ boost::algorithm::trim(designator);
+ boost::algorithm::trim(designatee);
+
+ if (! boost::algorithm::starts_with(designator, ".")) {
+ grant = user_to_grant(cct, store, uid, perm);
+ } else if ((perm & SWIFT_PERM_WRITE) == 0 && is_referrer(designator)) {
+ /* HTTP referrer-based ACLs aren't acceptable for writes. */
+ grant = referrer_to_grant(designatee, perm);
+ }
+ }
+
+ if (grant) {
+ acl.add_grant(&*grant);
+ } else {
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+
+int RGWAccessControlPolicy_SWIFT::create(RGWRados* const store,
+ const rgw_user& id,
+ const std::string& name,
+ const char* read_list,
+ const char* write_list,
+ uint32_t& rw_mask)
+{
+ acl.create_default(id, name);
+ owner.set_id(id);
+ owner.set_name(name);
+ rw_mask = 0;
+
+ if (read_list) {
+ std::vector<std::string> uids;
+ int r = parse_list(read_list, uids);
+ if (r < 0) {
+ ldout(cct, 0) << "ERROR: parse_list for read returned r="
+ << r << dendl;
+ return r;
+ }
+
+ r = add_grants(store, uids, SWIFT_PERM_READ);
+ if (r < 0) {
+ ldout(cct, 0) << "ERROR: add_grants for read returned r="
+ << r << dendl;
+ return r;
+ }
+ rw_mask |= SWIFT_PERM_READ;
+ }
+ if (write_list) {
+ std::vector<std::string> uids;
+ int r = parse_list(write_list, uids);
+ if (r < 0) {
+ ldout(cct, 0) << "ERROR: parse_list for write returned r="
+ << r << dendl;
+ return r;
+ }
+
+ r = add_grants(store, uids, SWIFT_PERM_WRITE);
+ if (r < 0) {
+ ldout(cct, 0) << "ERROR: add_grants for write returned r="
+ << r << dendl;
+ return r;
+ }
+ rw_mask |= SWIFT_PERM_WRITE;
+ }
+ return 0;
+}
+
+void RGWAccessControlPolicy_SWIFT::filter_merge(uint32_t rw_mask,
+ RGWAccessControlPolicy_SWIFT *old)
+{
+ /* rw_mask&SWIFT_PERM_READ => setting read acl,
+ * rw_mask&SWIFT_PERM_WRITE => setting write acl
+ * when bit is cleared, copy matching elements from old.
+ */
+ if (rw_mask == (SWIFT_PERM_READ|SWIFT_PERM_WRITE)) {
+ return;
+ }
+ rw_mask ^= (SWIFT_PERM_READ|SWIFT_PERM_WRITE);
+ for (auto &iter: old->acl.get_grant_map()) {
+ ACLGrant& grant = iter.second;
+ uint32_t perm = grant.get_permission().get_permissions();
+ rgw_user id;
+ string url_spec;
+ if (!grant.get_id(id)) {
+ if (grant.get_group() != ACL_GROUP_ALL_USERS) {
+ url_spec = grant.get_referer();
+ if (url_spec.empty()) {
+ continue;
+ }
+ if (perm == 0) {
+ /* We need to carry also negative, HTTP referrer-based ACLs. */
+ perm = SWIFT_PERM_READ;
+ }
+ }
+ }
+ if (perm & rw_mask) {
+ acl.add_grant(&grant);
+ }
+ }
+}
+
+void RGWAccessControlPolicy_SWIFT::to_str(string& read, string& write)
+{
+ multimap<string, ACLGrant>& m = acl.get_grant_map();
+ multimap<string, ACLGrant>::iterator iter;
+
+ for (iter = m.begin(); iter != m.end(); ++iter) {
+ ACLGrant& grant = iter->second;
+ const uint32_t perm = grant.get_permission().get_permissions();
+ rgw_user id;
+ string url_spec;
+ if (!grant.get_id(id)) {
+ if (grant.get_group() == ACL_GROUP_ALL_USERS) {
+ id = SWIFT_GROUP_ALL_USERS;
+ } else {
+ url_spec = grant.get_referer();
+ if (url_spec.empty()) {
+ continue;
+ }
+ id = (perm != 0) ? ".r:" + url_spec : ".r:-" + url_spec;
+ }
+ }
+ if (perm & SWIFT_PERM_READ) {
+ if (!read.empty()) {
+ read.append(",");
+ }
+ read.append(id.to_str());
+ } else if (perm & SWIFT_PERM_WRITE) {
+ if (!write.empty()) {
+ write.append(",");
+ }
+ write.append(id.to_str());
+ } else if (perm == 0 && !url_spec.empty()) {
+ /* only X-Container-Read headers support referers */
+ if (!read.empty()) {
+ read.append(",");
+ }
+ read.append(id.to_str());
+ }
+ }
+}
+
+void RGWAccessControlPolicy_SWIFTAcct::add_grants(RGWRados * const store,
+ const std::vector<std::string>& uids,
+ const uint32_t perm)
+{
+ for (const auto& uid : uids) {
+ ACLGrant grant;
+ RGWUserInfo grant_user;
+
+ if (uid_is_public(uid)) {
+ grant.set_group(ACL_GROUP_ALL_USERS, perm);
+ acl.add_grant(&grant);
+ } else {
+ rgw_user user(uid);
+
+ if (rgw_get_user_info_by_uid(store, user, grant_user) < 0) {
+ ldout(cct, 10) << "grant user does not exist:" << uid << dendl;
+ /* skipping silently */
+ grant.set_canon(user, std::string(), perm);
+ acl.add_grant(&grant);
+ } else {
+ grant.set_canon(user, grant_user.display_name, perm);
+ acl.add_grant(&grant);
+ }
+ }
+ }
+}
+
+bool RGWAccessControlPolicy_SWIFTAcct::create(RGWRados * const store,
+ const rgw_user& id,
+ const std::string& name,
+ const std::string& acl_str)
+{
+ acl.create_default(id, name);
+ owner.set_id(id);
+ owner.set_name(name);
+
+ JSONParser parser;
+
+ if (!parser.parse(acl_str.c_str(), acl_str.length())) {
+ ldout(cct, 0) << "ERROR: JSONParser::parse returned error=" << dendl;
+ return false;
+ }
+
+ JSONObjIter iter = parser.find_first("admin");
+ if (!iter.end() && (*iter)->is_array()) {
+ std::vector<std::string> admin;
+ decode_json_obj(admin, *iter);
+ ldout(cct, 0) << "admins: " << admin << dendl;
+
+ add_grants(store, admin, SWIFT_PERM_ADMIN);
+ }
+
+ iter = parser.find_first("read-write");
+ if (!iter.end() && (*iter)->is_array()) {
+ std::vector<std::string> readwrite;
+ decode_json_obj(readwrite, *iter);
+ ldout(cct, 0) << "read-write: " << readwrite << dendl;
+
+ add_grants(store, readwrite, SWIFT_PERM_RWRT);
+ }
+
+ iter = parser.find_first("read-only");
+ if (!iter.end() && (*iter)->is_array()) {
+ std::vector<std::string> readonly;
+ decode_json_obj(readonly, *iter);
+ ldout(cct, 0) << "read-only: " << readonly << dendl;
+
+ add_grants(store, readonly, SWIFT_PERM_READ);
+ }
+
+ return true;
+}
+
+boost::optional<std::string> RGWAccessControlPolicy_SWIFTAcct::to_str() const
+{
+ std::vector<std::string> admin;
+ std::vector<std::string> readwrite;
+ std::vector<std::string> readonly;
+
+ /* Parition the grant map into three not-overlapping groups. */
+ for (const auto& item : get_acl().get_grant_map()) {
+ const ACLGrant& grant = item.second;
+ const uint32_t perm = grant.get_permission().get_permissions();
+
+ rgw_user id;
+ if (!grant.get_id(id)) {
+ if (grant.get_group() != ACL_GROUP_ALL_USERS) {
+ continue;
+ }
+ id = SWIFT_GROUP_ALL_USERS;
+ } else if (owner.get_id() == id) {
+ continue;
+ }
+
+ if (SWIFT_PERM_ADMIN == (perm & SWIFT_PERM_ADMIN)) {
+ admin.insert(admin.end(), id.to_str());
+ } else if (SWIFT_PERM_RWRT == (perm & SWIFT_PERM_RWRT)) {
+ readwrite.insert(readwrite.end(), id.to_str());
+ } else if (SWIFT_PERM_READ == (perm & SWIFT_PERM_READ)) {
+ readonly.insert(readonly.end(), id.to_str());
+ } else {
+ // FIXME: print a warning
+ }
+ }
+
+ /* If there is no grant to serialize, let's exit earlier to not return
+ * an empty JSON object which brakes the functional tests of Swift. */
+ if (admin.empty() && readwrite.empty() && readonly.empty()) {
+ return boost::none;
+ }
+
+ /* Serialize the groups. */
+ JSONFormatter formatter;
+
+ formatter.open_object_section("acl");
+ if (!readonly.empty()) {
+ encode_json("read-only", readonly, &formatter);
+ }
+ if (!readwrite.empty()) {
+ encode_json("read-write", readwrite, &formatter);
+ }
+ if (!admin.empty()) {
+ encode_json("admin", admin, &formatter);
+ }
+ formatter.close_section();
+
+ std::ostringstream oss;
+ formatter.flush(oss);
+
+ return oss.str();
+}
diff --git a/src/rgw/rgw_acl_swift.h b/src/rgw/rgw_acl_swift.h
new file mode 100644
index 00000000..f5365b04
--- /dev/null
+++ b/src/rgw/rgw_acl_swift.h
@@ -0,0 +1,55 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_ACL_SWIFT_H
+#define CEPH_RGW_ACL_SWIFT_H
+
+#include <map>
+#include <vector>
+#include <string>
+#include <include/types.h>
+
+#include <boost/optional.hpp>
+
+#include "rgw_acl.h"
+
+class RGWAccessControlPolicy_SWIFT : public RGWAccessControlPolicy
+{
+ int add_grants(RGWRados *store,
+ const std::vector<std::string>& uids,
+ uint32_t perm);
+
+public:
+ explicit RGWAccessControlPolicy_SWIFT(CephContext* const cct)
+ : RGWAccessControlPolicy(cct) {
+ }
+ ~RGWAccessControlPolicy_SWIFT() override = default;
+
+ int create(RGWRados *store,
+ const rgw_user& id,
+ const std::string& name,
+ const char* read_list,
+ const char* write_list,
+ uint32_t& rw_mask);
+ void filter_merge(uint32_t mask, RGWAccessControlPolicy_SWIFT *policy);
+ void to_str(std::string& read, std::string& write);
+};
+
+class RGWAccessControlPolicy_SWIFTAcct : public RGWAccessControlPolicy
+{
+public:
+ explicit RGWAccessControlPolicy_SWIFTAcct(CephContext * const cct)
+ : RGWAccessControlPolicy(cct) {
+ }
+ ~RGWAccessControlPolicy_SWIFTAcct() override {}
+
+ void add_grants(RGWRados *store,
+ const std::vector<std::string>& uids,
+ uint32_t perm);
+ bool create(RGWRados *store,
+ const rgw_user& id,
+ const std::string& name,
+ const std::string& acl_str);
+ boost::optional<std::string> to_str() const;
+};
+#endif
diff --git a/src/rgw/rgw_admin.cc b/src/rgw/rgw_admin.cc
new file mode 100644
index 00000000..675cce34
--- /dev/null
+++ b/src/rgw/rgw_admin.cc
@@ -0,0 +1,8463 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <errno.h>
+#include <iostream>
+#include <sstream>
+#include <string>
+
+#include <boost/optional.hpp>
+
+extern "C" {
+#include <liboath/oath.h>
+}
+
+#include "auth/Crypto.h"
+#include "compressor/Compressor.h"
+
+#include "common/armor.h"
+#include "common/ceph_json.h"
+#include "common/config.h"
+#include "common/ceph_argparse.h"
+#include "common/Formatter.h"
+#include "common/errno.h"
+#include "common/safe_io.h"
+
+#include "include/util.h"
+
+#include "cls/rgw/cls_rgw_types.h"
+#include "cls/rgw/cls_rgw_client.h"
+
+#include "global/global_init.h"
+
+#include "include/utime.h"
+#include "include/str_list.h"
+
+#include "rgw_user.h"
+#include "rgw_bucket.h"
+#include "rgw_otp.h"
+#include "rgw_rados.h"
+#include "rgw_acl.h"
+#include "rgw_acl_s3.h"
+#include "rgw_lc.h"
+#include "rgw_log.h"
+#include "rgw_formats.h"
+#include "rgw_usage.h"
+#include "rgw_orphan.h"
+#include "rgw_sync.h"
+#include "rgw_sync_log_trim.h"
+#include "rgw_data_sync.h"
+#include "rgw_rest_conn.h"
+#include "rgw_realm_watcher.h"
+#include "rgw_role.h"
+#include "rgw_reshard.h"
+#include "rgw_http_client_curl.h"
+#include "rgw_zone.h"
+#include "rgw_pubsub.h"
+#include "rgw_sync_module_pubsub.h"
+
+#include "services/svc_sync_modules.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+#define SECRET_KEY_LEN 40
+#define PUBLIC_ID_LEN 20
+
+static RGWRados *store = NULL;
+
+static const DoutPrefixProvider* dpp() {
+ struct GlobalPrefix : public DoutPrefixProvider {
+ CephContext *get_cct() const override { return store->ctx(); }
+ unsigned get_subsys() const override { return dout_subsys; }
+ std::ostream& gen_prefix(std::ostream& out) const override { return out; }
+ };
+ static GlobalPrefix global_dpp;
+ return &global_dpp;
+}
+
+void usage()
+{
+ cout << "usage: radosgw-admin <cmd> [options...]" << std::endl;
+ cout << "commands:\n";
+ cout << " user create create a new user\n" ;
+ cout << " user modify modify user\n";
+ cout << " user info get user info\n";
+ cout << " user rm remove user\n";
+ cout << " user suspend suspend a user\n";
+ cout << " user enable re-enable user after suspension\n";
+ cout << " user check check user info\n";
+ cout << " user stats show user stats as accounted by quota subsystem\n";
+ cout << " user list list users\n";
+ cout << " caps add add user capabilities\n";
+ cout << " caps rm remove user capabilities\n";
+ cout << " subuser create create a new subuser\n" ;
+ cout << " subuser modify modify subuser\n";
+ cout << " subuser rm remove subuser\n";
+ cout << " key create create access key\n";
+ cout << " key rm remove access key\n";
+ cout << " bucket list list buckets (specify --allow-unordered for\n";
+ cout << " faster, unsorted listing)\n";
+ cout << " bucket limit check show bucket sharding stats\n";
+ cout << " bucket link link bucket to specified user\n";
+ cout << " bucket unlink unlink bucket from specified user\n";
+ cout << " bucket stats returns bucket statistics\n";
+ cout << " bucket rm remove bucket\n";
+ cout << " bucket check check bucket index\n";
+ cout << " bucket reshard reshard bucket\n";
+ cout << " bucket rewrite rewrite all objects in the specified bucket\n";
+ cout << " bucket sync disable disable bucket sync\n";
+ cout << " bucket sync enable enable bucket sync\n";
+ cout << " bucket radoslist list rados objects backing bucket's objects\n";
+ cout << " bi get retrieve bucket index object entries\n";
+ cout << " bi put store bucket index object entries\n";
+ cout << " bi list list raw bucket index entries\n";
+ cout << " bi purge purge bucket index entries\n";
+ cout << " object rm remove object\n";
+ cout << " object put put object\n";
+ cout << " object stat stat an object for its metadata\n";
+ cout << " object unlink unlink object from bucket index\n";
+ cout << " object rewrite rewrite the specified object\n";
+ cout << " objects expire run expired objects cleanup\n";
+ cout << " objects expire-stale list list stale expired objects (caused by reshard)\n";
+ cout << " objects expire-stale rm remove stale expired objects\n";
+ cout << " period rm remove a period\n";
+ cout << " period get get period info\n";
+ cout << " period get-current get current period info\n";
+ cout << " period pull pull a period\n";
+ cout << " period push push a period\n";
+ cout << " period list list all periods\n";
+ cout << " period update update the staging period\n";
+ cout << " period commit commit the staging period\n";
+ cout << " quota set set quota params\n";
+ cout << " quota enable enable quota\n";
+ cout << " quota disable disable quota\n";
+ cout << " global quota get view global quota params\n";
+ cout << " global quota set set global quota params\n";
+ cout << " global quota enable enable a global quota\n";
+ cout << " global quota disable disable a global quota\n";
+ cout << " realm create create a new realm\n";
+ cout << " realm rm remove a realm\n";
+ cout << " realm get show realm info\n";
+ cout << " realm get-default get default realm name\n";
+ cout << " realm list list realms\n";
+ cout << " realm list-periods list all realm periods\n";
+ cout << " realm rename rename a realm\n";
+ cout << " realm set set realm info (requires infile)\n";
+ cout << " realm default set realm as default\n";
+ cout << " realm pull pull a realm and its current period\n";
+ cout << " zonegroup add add a zone to a zonegroup\n";
+ cout << " zonegroup create create a new zone group info\n";
+ cout << " zonegroup default set default zone group\n";
+ cout << " zonegroup rm remove a zone group info\n";
+ cout << " zonegroup get show zone group info\n";
+ cout << " zonegroup modify modify an existing zonegroup\n";
+ cout << " zonegroup set set zone group info (requires infile)\n";
+ cout << " zonegroup rm remove a zone from a zonegroup\n";
+ cout << " zonegroup rename rename a zone group\n";
+ cout << " zonegroup list list all zone groups set on this cluster\n";
+ cout << " zonegroup placement list list zonegroup's placement targets\n";
+ cout << " zonegroup placement get get a placement target of a specific zonegroup\n";
+ cout << " zonegroup placement add add a placement target id to a zonegroup\n";
+ cout << " zonegroup placement modify modify a placement target of a specific zonegroup\n";
+ cout << " zonegroup placement rm remove a placement target from a zonegroup\n";
+ cout << " zonegroup placement default set a zonegroup's default placement target\n";
+ cout << " zone create create a new zone\n";
+ cout << " zone rm remove a zone\n";
+ cout << " zone get show zone cluster params\n";
+ cout << " zone modify modify an existing zone\n";
+ cout << " zone set set zone cluster params (requires infile)\n";
+ cout << " zone list list all zones set on this cluster\n";
+ cout << " zone rename rename a zone\n";
+ cout << " zone placement list list zone's placement targets\n";
+ cout << " zone placement get get a zone placement target\n";
+ cout << " zone placement add add a zone placement target\n";
+ cout << " zone placement modify modify a zone placement target\n";
+ cout << " zone placement rm remove a zone placement target\n";
+ cout << " metadata sync status get metadata sync status\n";
+ cout << " metadata sync init init metadata sync\n";
+ cout << " metadata sync run run metadata sync\n";
+ cout << " data sync status get data sync status of the specified source zone\n";
+ cout << " data sync init init data sync for the specified source zone\n";
+ cout << " data sync run run data sync for the specified source zone\n";
+ cout << " pool add add an existing pool for data placement\n";
+ cout << " pool rm remove an existing pool from data placement set\n";
+ cout << " pools list list placement active set\n";
+ cout << " policy read bucket/object policy\n";
+ cout << " log list list log objects\n";
+ cout << " log show dump a log from specific object or (bucket + date\n";
+ cout << " + bucket-id)\n";
+ cout << " (NOTE: required to specify formatting of date\n";
+ cout << " to \"YYYY-MM-DD-hh\")\n";
+ cout << " log rm remove log object\n";
+ cout << " usage show show usage (by user, by bucket, date range)\n";
+ cout << " usage trim trim usage (by user, by bucket, date range)\n";
+ cout << " usage clear reset all the usage stats for the cluster\n";
+ cout << " gc list dump expired garbage collection objects (specify\n";
+ cout << " --include-all to list all entries, including unexpired)\n";
+ cout << " gc process manually process garbage (specify\n";
+ cout << " --include-all to process all entries, including unexpired)\n";
+ cout << " lc list list all bucket lifecycle progress\n";
+ cout << " lc get get a lifecycle bucket configuration\n";
+ cout << " lc process manually process lifecycle\n";
+ cout << " lc reshard fix fix LC for a resharded bucket\n";
+ cout << " metadata get get metadata info\n";
+ cout << " metadata put put metadata info\n";
+ cout << " metadata rm remove metadata info\n";
+ cout << " metadata list list metadata info\n";
+ cout << " mdlog list list metadata log\n";
+ cout << " mdlog trim trim metadata log (use start-date, end-date or\n";
+ cout << " start-marker, end-marker)\n";
+ cout << " mdlog status read metadata log status\n";
+ cout << " bilog list list bucket index log\n";
+ cout << " bilog trim trim bucket index log (use start-marker, end-marker)\n";
+ cout << " datalog list list data log\n";
+ cout << " datalog trim trim data log\n";
+ cout << " datalog status read data log status\n";
+ cout << " orphans find init and run search for leaked rados objects (use job-id, pool)\n";
+ cout << " orphans finish clean up search for leaked rados objects\n";
+ cout << " orphans list-jobs list the current job-ids for orphans search\n";
+ cout << " role create create a AWS role for use with STS\n";
+ cout << " role rm remove a role\n";
+ cout << " role get get a role\n";
+ cout << " role list list roles with specified path prefix\n";
+ cout << " role modify modify the assume role policy of an existing role\n";
+ cout << " role-policy put add/update permission policy to role\n";
+ cout << " role-policy list list policies attached to a role\n";
+ cout << " role-policy get get the specified inline policy document embedded with the given role\n";
+ cout << " role-policy rm remove policy attached to a role\n";
+ cout << " reshard add schedule a resharding of a bucket\n";
+ cout << " reshard list list all bucket resharding or scheduled to be resharded\n";
+ cout << " reshard status read bucket resharding status\n";
+ cout << " reshard process process of scheduled reshard jobs\n";
+ cout << " reshard cancel cancel resharding a bucket\n";
+ cout << " reshard stale-instances list list stale-instances from bucket resharding\n";
+ cout << " reshard stale-instances rm cleanup stale-instances from bucket resharding\n";
+ cout << " sync error list list sync error\n";
+ cout << " sync error trim trim sync error\n";
+ cout << " mfa create create a new MFA TOTP token\n";
+ cout << " mfa list list MFA TOTP tokens\n";
+ cout << " mfa get show MFA TOTP token\n";
+ cout << " mfa remove delete MFA TOTP token\n";
+ cout << " mfa check check MFA TOTP token\n";
+ cout << " mfa resync re-sync MFA TOTP token\n";
+ cout << "options:\n";
+ cout << " --tenant=<tenant> tenant name\n";
+ cout << " --uid=<id> user id\n";
+ cout << " --subuser=<name> subuser name\n";
+ cout << " --access-key=<key> S3 access key\n";
+ cout << " --email=<email> user's email address\n";
+ cout << " --secret/--secret-key=<key>\n";
+ cout << " specify secret key\n";
+ cout << " --gen-access-key generate random access key (for S3)\n";
+ cout << " --gen-secret generate random secret key\n";
+ cout << " --key-type=<type> key type, options are: swift, s3\n";
+ cout << " --temp-url-key[-2]=<key> temp url key\n";
+ cout << " --access=<access> Set access permissions for sub-user, should be one\n";
+ cout << " of read, write, readwrite, full\n";
+ cout << " --display-name=<name> user's display name\n";
+ cout << " --max-buckets max number of buckets for a user\n";
+ cout << " --admin set the admin flag on the user\n";
+ cout << " --system set the system flag on the user\n";
+ cout << " --op-mask set the op mask on the user\n";
+ cout << " --bucket=<bucket> Specify the bucket name. Also used by the quota command.\n";
+ cout << " --pool=<pool> Specify the pool name. Also used to scan for leaked rados objects.\n";
+ cout << " --object=<object> object name\n";
+ cout << " --date=<date> date in the format yyyy-mm-dd\n";
+ cout << " --start-date=<date> start date in the format yyyy-mm-dd\n";
+ cout << " --end-date=<date> end date in the format yyyy-mm-dd\n";
+ cout << " --bucket-id=<bucket-id> bucket id\n";
+ cout << " --shard-id=<shard-id> optional for: \n";
+ cout << " mdlog list\n";
+ cout << " data sync status\n";
+ cout << " required for: \n";
+ cout << " mdlog trim\n";
+ cout << " --max-entries=<entries> max entries for listing operations\n";
+ cout << " --metadata-key=<key> key to retrieve metadata from with metadata get\n";
+ cout << " --remote=<remote> zone or zonegroup id of remote gateway\n";
+ cout << " --period=<id> period id\n";
+ cout << " --url=<url> url for pushing/pulling period/realm\n";
+ cout << " --epoch=<number> period epoch\n";
+ cout << " --commit commit the period during 'period update'\n";
+ cout << " --staging get staging period info\n";
+ cout << " --master set as master\n";
+ cout << " --master-zone=<id> master zone id\n";
+ cout << " --rgw-realm=<name> realm name\n";
+ cout << " --realm-id=<id> realm id\n";
+ cout << " --realm-new-name=<name> realm new name\n";
+ cout << " --rgw-zonegroup=<name> zonegroup name\n";
+ cout << " --zonegroup-id=<id> zonegroup id\n";
+ cout << " --zonegroup-new-name=<name>\n";
+ cout << " zonegroup new name\n";
+ cout << " --rgw-zone=<name> name of zone in which radosgw is running\n";
+ cout << " --zone-id=<id> zone id\n";
+ cout << " --zone-new-name=<name> zone new name\n";
+ cout << " --source-zone specify the source zone (for data sync)\n";
+ cout << " --default set entity (realm, zonegroup, zone) as default\n";
+ cout << " --read-only set zone as read-only (when adding to zonegroup)\n";
+ cout << " --redirect-zone specify zone id to redirect when response is 404 (not found)\n";
+ cout << " --placement-id placement id for zonegroup placement commands\n";
+ cout << " --storage-class storage class for zonegroup placement commands\n";
+ cout << " --tags=<list> list of tags for zonegroup placement add and modify commands\n";
+ cout << " --tags-add=<list> list of tags to add for zonegroup placement modify command\n";
+ cout << " --tags-rm=<list> list of tags to remove for zonegroup placement modify command\n";
+ cout << " --endpoints=<list> zone endpoints\n";
+ cout << " --index-pool=<pool> placement target index pool\n";
+ cout << " --data-pool=<pool> placement target data pool\n";
+ cout << " --data-extra-pool=<pool> placement target data extra (non-ec) pool\n";
+ cout << " --placement-index-type=<type>\n";
+ cout << " placement target index type (normal, indexless, or #id)\n";
+ cout << " --compression=<type> placement target compression type (plugin name or empty/none)\n";
+ cout << " --tier-type=<type> zone tier type\n";
+ cout << " --tier-config=<k>=<v>[,...]\n";
+ cout << " set zone tier config keys, values\n";
+ cout << " --tier-config-rm=<k>[,...]\n";
+ cout << " unset zone tier config keys\n";
+ cout << " --sync-from-all[=false] set/reset whether zone syncs from all zonegroup peers\n";
+ cout << " --sync-from=[zone-name][,...]\n";
+ cout << " set list of zones to sync from\n";
+ cout << " --sync-from-rm=[zone-name][,...]\n";
+ cout << " remove zones from list of zones to sync from\n";
+ cout << " --fix besides checking bucket index, will also fix it\n";
+ cout << " --check-objects bucket check: rebuilds bucket index according to\n";
+ cout << " actual objects state\n";
+ cout << " --format=<format> specify output format for certain operations: xml,\n";
+ cout << " json\n";
+ cout << " --purge-data when specified, user removal will also purge all the\n";
+ cout << " user data\n";
+ cout << " --purge-keys when specified, subuser removal will also purge all the\n";
+ cout << " subuser keys\n";
+ cout << " --purge-objects remove a bucket's objects before deleting it\n";
+ cout << " (NOTE: required to delete a non-empty bucket)\n";
+ cout << " --sync-stats option to 'user stats', update user stats with current\n";
+ cout << " stats reported by user's buckets indexes\n";
+ cout << " --reset-stats option to 'user stats', reset stats in accordance with user buckets\n";
+ cout << " --show-log-entries=<flag> enable/disable dump of log entries on log show\n";
+ cout << " --show-log-sum=<flag> enable/disable dump of log summation on log show\n";
+ cout << " --skip-zero-entries log show only dumps entries that don't have zero value\n";
+ cout << " in one of the numeric field\n";
+ cout << " --infile=<file> specify a file to read in when setting data\n";
+ cout << " --categories=<list> comma separated list of categories, used in usage show\n";
+ cout << " --caps=<caps> list of caps (e.g., \"usage=read, write; user=read\")\n";
+ cout << " --yes-i-really-mean-it required for certain operations\n";
+ cout << " --warnings-only when specified with bucket limit check, list\n";
+ cout << " only buckets nearing or over the current max\n";
+ cout << " objects per shard value\n";
+ cout << " --bypass-gc when specified with bucket deletion, triggers\n";
+ cout << " object deletions by not involving GC\n";
+ cout << " --inconsistent-index when specified with bucket deletion and bypass-gc set to true,\n";
+ cout << " ignores bucket index consistency\n";
+ cout << " --min-rewrite-size min object size for bucket rewrite (default 4M)\n";
+ cout << " --max-rewrite-size max object size for bucket rewrite (default ULLONG_MAX)\n";
+ cout << " --min-rewrite-stripe-size min stripe size for object rewrite (default 0)\n";
+ cout << " --trim-delay-ms time interval in msec to limit the frequency of sync error log entries trimming operations,\n";
+ cout << " the trimming process will sleep the specified msec for every 1000 entries trimmed\n";
+ cout << "\n";
+ cout << "<date> := \"YYYY-MM-DD[ hh:mm:ss]\"\n";
+ cout << "\nQuota options:\n";
+ cout << " --max-objects specify max objects (negative value to disable)\n";
+ cout << " --max-size specify max size (in B/K/M/G/T, negative value to disable)\n";
+ cout << " --quota-scope scope of quota (bucket, user)\n";
+ cout << "\nOrphans search options:\n";
+ cout << " --num-shards num of shards to use for keeping the temporary scan info\n";
+ cout << " --orphan-stale-secs num of seconds to wait before declaring an object to be an orphan (default: 86400)\n";
+ cout << " --job-id set the job id (for orphans find)\n";
+ cout << " --max-concurrent-ios maximum concurrent ios for orphans find (default: 32)\n";
+ cout << " --detail detailed mode, log and stat head objects as well\n";
+ cout << "\nOrphans list-jobs options:\n";
+ cout << " --extra-info provide extra info in job list\n";
+ cout << "\nRole options:\n";
+ cout << " --role-name name of the role to create\n";
+ cout << " --path path to the role\n";
+ cout << " --assume-role-policy-doc the trust relationship policy document that grants an entity permission to assume the role\n";
+ cout << " --policy-name name of the policy document\n";
+ cout << " --policy-doc permission policy document\n";
+ cout << " --path-prefix path prefix for filtering roles\n";
+ cout << "\nMFA options:\n";
+ cout << " --totp-serial a string that represents the ID of a TOTP token\n";
+ cout << " --totp-seed the secret seed that is used to calculate the TOTP\n";
+ cout << " --totp-seconds the time resolution that is being used for TOTP generation\n";
+ cout << " --totp-window the number of TOTP tokens that are checked before and after the current token when validating token\n";
+ cout << " --totp-pin the valid value of a TOTP token at a certain time\n";
+ cout << "\n";
+ generic_client_usage();
+}
+
+enum {
+ OPT_NO_CMD = 0,
+ OPT_USER_CREATE,
+ OPT_USER_INFO,
+ OPT_USER_MODIFY,
+ OPT_USER_RM,
+ OPT_USER_SUSPEND,
+ OPT_USER_ENABLE,
+ OPT_USER_CHECK,
+ OPT_USER_STATS,
+ OPT_USER_LIST,
+ OPT_SUBUSER_CREATE,
+ OPT_SUBUSER_MODIFY,
+ OPT_SUBUSER_RM,
+ OPT_KEY_CREATE,
+ OPT_KEY_RM,
+ OPT_BUCKETS_LIST,
+ OPT_BUCKET_LIMIT_CHECK,
+ OPT_BUCKET_LINK,
+ OPT_BUCKET_UNLINK,
+ OPT_BUCKET_STATS,
+ OPT_BUCKET_CHECK,
+ OPT_BUCKET_SYNC_STATUS,
+ OPT_BUCKET_SYNC_MARKERS,
+ OPT_BUCKET_SYNC_INIT,
+ OPT_BUCKET_SYNC_RUN,
+ OPT_BUCKET_SYNC_DISABLE,
+ OPT_BUCKET_SYNC_ENABLE,
+ OPT_BUCKET_RM,
+ OPT_BUCKET_REWRITE,
+ OPT_BUCKET_RESHARD,
+ OPT_BUCKET_CHOWN,
+ OPT_BUCKET_RADOS_LIST,
+ OPT_POLICY,
+ OPT_POOL_ADD,
+ OPT_POOL_RM,
+ OPT_POOLS_LIST,
+ OPT_LOG_LIST,
+ OPT_LOG_SHOW,
+ OPT_LOG_RM,
+ OPT_USAGE_SHOW,
+ OPT_USAGE_TRIM,
+ OPT_USAGE_CLEAR,
+ OPT_OBJECT_PUT,
+ OPT_OBJECT_RM,
+ OPT_OBJECT_UNLINK,
+ OPT_OBJECT_STAT,
+ OPT_OBJECT_REWRITE,
+ OPT_OBJECTS_EXPIRE,
+ OPT_OBJECTS_EXPIRE_STALE_LIST,
+ OPT_OBJECTS_EXPIRE_STALE_RM,
+ OPT_BI_GET,
+ OPT_BI_PUT,
+ OPT_BI_LIST,
+ OPT_BI_PURGE,
+ OPT_OLH_GET,
+ OPT_OLH_READLOG,
+ OPT_QUOTA_SET,
+ OPT_QUOTA_ENABLE,
+ OPT_QUOTA_DISABLE,
+ OPT_GC_LIST,
+ OPT_GC_PROCESS,
+ OPT_LC_LIST,
+ OPT_LC_GET,
+ OPT_LC_PROCESS,
+ OPT_LC_RESHARD_FIX,
+ OPT_ORPHANS_FIND,
+ OPT_ORPHANS_FINISH,
+ OPT_ORPHANS_LIST_JOBS,
+ OPT_ZONEGROUP_ADD,
+ OPT_ZONEGROUP_CREATE,
+ OPT_ZONEGROUP_DEFAULT,
+ OPT_ZONEGROUP_DELETE,
+ OPT_ZONEGROUP_GET,
+ OPT_ZONEGROUP_MODIFY,
+ OPT_ZONEGROUP_SET,
+ OPT_ZONEGROUP_LIST,
+ OPT_ZONEGROUP_REMOVE,
+ OPT_ZONEGROUP_RENAME,
+ OPT_ZONEGROUP_PLACEMENT_ADD,
+ OPT_ZONEGROUP_PLACEMENT_MODIFY,
+ OPT_ZONEGROUP_PLACEMENT_RM,
+ OPT_ZONEGROUP_PLACEMENT_LIST,
+ OPT_ZONEGROUP_PLACEMENT_GET,
+ OPT_ZONEGROUP_PLACEMENT_DEFAULT,
+ OPT_ZONE_CREATE,
+ OPT_ZONE_DELETE,
+ OPT_ZONE_GET,
+ OPT_ZONE_MODIFY,
+ OPT_ZONE_SET,
+ OPT_ZONE_LIST,
+ OPT_ZONE_RENAME,
+ OPT_ZONE_DEFAULT,
+ OPT_ZONE_PLACEMENT_ADD,
+ OPT_ZONE_PLACEMENT_MODIFY,
+ OPT_ZONE_PLACEMENT_RM,
+ OPT_ZONE_PLACEMENT_LIST,
+ OPT_ZONE_PLACEMENT_GET,
+ OPT_CAPS_ADD,
+ OPT_CAPS_RM,
+ OPT_METADATA_GET,
+ OPT_METADATA_PUT,
+ OPT_METADATA_RM,
+ OPT_METADATA_LIST,
+ OPT_METADATA_SYNC_STATUS,
+ OPT_METADATA_SYNC_INIT,
+ OPT_METADATA_SYNC_RUN,
+ OPT_MDLOG_LIST,
+ OPT_MDLOG_AUTOTRIM,
+ OPT_MDLOG_TRIM,
+ OPT_MDLOG_FETCH,
+ OPT_MDLOG_STATUS,
+ OPT_SYNC_ERROR_LIST,
+ OPT_SYNC_ERROR_TRIM,
+ OPT_BILOG_LIST,
+ OPT_BILOG_TRIM,
+ OPT_BILOG_STATUS,
+ OPT_BILOG_AUTOTRIM,
+ OPT_DATA_SYNC_STATUS,
+ OPT_DATA_SYNC_INIT,
+ OPT_DATA_SYNC_RUN,
+ OPT_DATALOG_LIST,
+ OPT_DATALOG_STATUS,
+ OPT_DATALOG_AUTOTRIM,
+ OPT_DATALOG_TRIM,
+ OPT_REALM_CREATE,
+ OPT_REALM_DELETE,
+ OPT_REALM_GET,
+ OPT_REALM_GET_DEFAULT,
+ OPT_REALM_LIST,
+ OPT_REALM_LIST_PERIODS,
+ OPT_REALM_RENAME,
+ OPT_REALM_SET,
+ OPT_REALM_DEFAULT,
+ OPT_REALM_PULL,
+ OPT_PERIOD_DELETE,
+ OPT_PERIOD_GET,
+ OPT_PERIOD_GET_CURRENT,
+ OPT_PERIOD_PULL,
+ OPT_PERIOD_PUSH,
+ OPT_PERIOD_LIST,
+ OPT_PERIOD_UPDATE,
+ OPT_PERIOD_COMMIT,
+ OPT_GLOBAL_QUOTA_GET,
+ OPT_GLOBAL_QUOTA_SET,
+ OPT_GLOBAL_QUOTA_ENABLE,
+ OPT_GLOBAL_QUOTA_DISABLE,
+ OPT_SYNC_STATUS,
+ OPT_ROLE_CREATE,
+ OPT_ROLE_DELETE,
+ OPT_ROLE_GET,
+ OPT_ROLE_MODIFY,
+ OPT_ROLE_LIST,
+ OPT_ROLE_POLICY_PUT,
+ OPT_ROLE_POLICY_LIST,
+ OPT_ROLE_POLICY_GET,
+ OPT_ROLE_POLICY_DELETE,
+ OPT_RESHARD_ADD,
+ OPT_RESHARD_LIST,
+ OPT_RESHARD_STATUS,
+ OPT_RESHARD_PROCESS,
+ OPT_RESHARD_CANCEL,
+ OPT_MFA_CREATE,
+ OPT_MFA_REMOVE,
+ OPT_MFA_GET,
+ OPT_MFA_LIST,
+ OPT_MFA_CHECK,
+ OPT_MFA_RESYNC,
+ OPT_RESHARD_STALE_INSTANCES_LIST,
+ OPT_RESHARD_STALE_INSTANCES_DELETE,
+ OPT_PUBSUB_TOPICS_LIST,
+ OPT_PUBSUB_TOPIC_CREATE,
+ OPT_PUBSUB_TOPIC_GET,
+ OPT_PUBSUB_TOPIC_RM,
+ OPT_PUBSUB_NOTIFICATION_CREATE,
+ OPT_PUBSUB_NOTIFICATION_RM,
+ OPT_PUBSUB_SUB_GET,
+ OPT_PUBSUB_SUB_CREATE,
+ OPT_PUBSUB_SUB_RM,
+ OPT_PUBSUB_SUB_PULL,
+ OPT_PUBSUB_EVENT_RM,
+};
+
+static int get_cmd(const char *cmd, const char *prev_cmd, const char *prev_prev_cmd, bool *need_more)
+{
+ using ceph::util::match_str;
+
+ *need_more = false;
+ // NOTE: please keep the checks in alphabetical order !!!
+ if (strcmp(cmd, "bi") == 0 ||
+ strcmp(cmd, "bilog") == 0 ||
+ strcmp(cmd, "buckets") == 0 ||
+ strcmp(cmd, "caps") == 0 ||
+ strcmp(cmd, "data") == 0 ||
+ strcmp(cmd, "datalog") == 0 ||
+ strcmp(cmd, "error") == 0 ||
+ strcmp(cmd, "event") == 0 ||
+ strcmp(cmd, "expire-stale") == 0 ||
+ strcmp(cmd, "gc") == 0 ||
+ strcmp(cmd, "global") == 0 ||
+ strcmp(cmd, "key") == 0 ||
+ strcmp(cmd, "log") == 0 ||
+ strcmp(cmd, "lc") == 0 ||
+ strcmp(cmd, "mdlog") == 0 ||
+ strcmp(cmd, "metadata") == 0 ||
+ strcmp(cmd, "mfa") == 0 ||
+ strcmp(cmd, "notification") == 0 ||
+ strcmp(cmd, "object") == 0 ||
+ strcmp(cmd, "objects") == 0 ||
+ strcmp(cmd, "olh") == 0 ||
+ strcmp(cmd, "orphans") == 0 ||
+ strcmp(cmd, "period") == 0 ||
+ strcmp(cmd, "placement") == 0 ||
+ strcmp(cmd, "pool") == 0 ||
+ strcmp(cmd, "pools") == 0 ||
+ strcmp(cmd, "pubsub") == 0 ||
+ strcmp(cmd, "quota") == 0 ||
+ strcmp(cmd, "realm") == 0 ||
+ strcmp(cmd, "role") == 0 ||
+ strcmp(cmd, "role-policy") == 0 ||
+ strcmp(cmd, "stale-instances") == 0 ||
+ strcmp(cmd, "sub") == 0 ||
+ strcmp(cmd, "subuser") == 0 ||
+ strcmp(cmd, "sync") == 0 ||
+ strcmp(cmd, "topic") == 0 ||
+ strcmp(cmd, "topics") == 0 ||
+ strcmp(cmd, "usage") == 0 ||
+ strcmp(cmd, "user") == 0 ||
+ strcmp(cmd, "zone") == 0 ||
+ strcmp(cmd, "zonegroup") == 0 ||
+ strcmp(cmd, "zonegroups") == 0) {
+ *need_more = true;
+ return 0;
+ }
+
+ /*
+ * can do both radosgw-admin bucket reshard, and radosgw-admin reshard bucket
+ */
+ if (strcmp(cmd, "reshard") == 0 &&
+ !(prev_cmd && strcmp(prev_cmd, "bucket") == 0)) {
+ *need_more = true;
+ return 0;
+ }
+ if (strcmp(cmd, "bucket") == 0 &&
+ !(prev_cmd && strcmp(prev_cmd, "reshard") == 0)) {
+ *need_more = true;
+ return 0;
+ }
+
+ if (strcmp(cmd, "policy") == 0)
+ return OPT_POLICY;
+
+ if (!prev_cmd)
+ return -EINVAL;
+
+ if (strcmp(prev_cmd, "user") == 0) {
+ if (strcmp(cmd, "create") == 0)
+ return OPT_USER_CREATE;
+ if (strcmp(cmd, "info") == 0)
+ return OPT_USER_INFO;
+ if (strcmp(cmd, "modify") == 0)
+ return OPT_USER_MODIFY;
+ if (strcmp(cmd, "rm") == 0)
+ return OPT_USER_RM;
+ if (strcmp(cmd, "suspend") == 0)
+ return OPT_USER_SUSPEND;
+ if (strcmp(cmd, "enable") == 0)
+ return OPT_USER_ENABLE;
+ if (strcmp(cmd, "check") == 0)
+ return OPT_USER_CHECK;
+ if (strcmp(cmd, "stats") == 0)
+ return OPT_USER_STATS;
+ if (strcmp(cmd, "list") == 0)
+ return OPT_USER_LIST;
+ } else if (strcmp(prev_cmd, "subuser") == 0) {
+ if (strcmp(cmd, "create") == 0)
+ return OPT_SUBUSER_CREATE;
+ if (strcmp(cmd, "modify") == 0)
+ return OPT_SUBUSER_MODIFY;
+ if (strcmp(cmd, "rm") == 0)
+ return OPT_SUBUSER_RM;
+ } else if (strcmp(prev_cmd, "key") == 0) {
+ if (strcmp(cmd, "create") == 0)
+ return OPT_KEY_CREATE;
+ if (strcmp(cmd, "rm") == 0)
+ return OPT_KEY_RM;
+ } else if (strcmp(prev_cmd, "buckets") == 0) {
+ if (strcmp(cmd, "list") == 0)
+ return OPT_BUCKETS_LIST;
+ } else if (strcmp(prev_cmd, "bucket") == 0) {
+ if (strcmp(cmd, "list") == 0)
+ return OPT_BUCKETS_LIST;
+ if (strcmp(cmd, "link") == 0)
+ return OPT_BUCKET_LINK;
+ if (strcmp(cmd, "unlink") == 0)
+ return OPT_BUCKET_UNLINK;
+ if (strcmp(cmd, "stats") == 0)
+ return OPT_BUCKET_STATS;
+ if (strcmp(cmd, "rm") == 0)
+ return OPT_BUCKET_RM;
+ if (strcmp(cmd, "rewrite") == 0)
+ return OPT_BUCKET_REWRITE;
+ if (strcmp(cmd, "reshard") == 0)
+ return OPT_BUCKET_RESHARD;
+ if (strcmp(cmd, "check") == 0)
+ return OPT_BUCKET_CHECK;
+ if (strcmp(cmd, "radoslist") == 0)
+ return OPT_BUCKET_RADOS_LIST;
+ if (strcmp(cmd, "sync") == 0) {
+ *need_more = true;
+ return 0;
+ }
+ if (strcmp(cmd, "limit") == 0) {
+ *need_more = true;
+ return 0;
+ }
+ } else if (prev_prev_cmd && strcmp(prev_prev_cmd, "bucket") == 0) {
+ if (strcmp(prev_cmd, "sync") == 0) {
+ if (strcmp(cmd, "status") == 0)
+ return OPT_BUCKET_SYNC_STATUS;
+ if (strcmp(cmd, "markers") == 0)
+ return OPT_BUCKET_SYNC_MARKERS;
+ if (strcmp(cmd, "init") == 0)
+ return OPT_BUCKET_SYNC_INIT;
+ if (strcmp(cmd, "run") == 0)
+ return OPT_BUCKET_SYNC_RUN;
+ if (strcmp(cmd, "disable") == 0)
+ return OPT_BUCKET_SYNC_DISABLE;
+ if (strcmp(cmd, "enable") == 0)
+ return OPT_BUCKET_SYNC_ENABLE;
+ } else if ((strcmp(prev_cmd, "limit") == 0) &&
+ (strcmp(cmd, "check") == 0)) {
+ return OPT_BUCKET_LIMIT_CHECK;
+ }
+ } else if (strcmp(prev_cmd, "log") == 0) {
+ if (strcmp(cmd, "list") == 0)
+ return OPT_LOG_LIST;
+ if (strcmp(cmd, "show") == 0)
+ return OPT_LOG_SHOW;
+ if (strcmp(cmd, "rm") == 0)
+ return OPT_LOG_RM;
+ } else if (strcmp(prev_cmd, "usage") == 0) {
+ if (strcmp(cmd, "show") == 0)
+ return OPT_USAGE_SHOW;
+ if (strcmp(cmd, "trim") == 0)
+ return OPT_USAGE_TRIM;
+ if (strcmp(cmd, "clear") == 0)
+ return OPT_USAGE_CLEAR;
+ } else if (strcmp(prev_cmd, "caps") == 0) {
+ if (strcmp(cmd, "add") == 0)
+ return OPT_CAPS_ADD;
+ if (strcmp(cmd, "rm") == 0)
+ return OPT_CAPS_RM;
+ } else if (strcmp(prev_cmd, "pool") == 0) {
+ if (strcmp(cmd, "add") == 0)
+ return OPT_POOL_ADD;
+ if (strcmp(cmd, "rm") == 0)
+ return OPT_POOL_RM;
+ if (strcmp(cmd, "list") == 0)
+ return OPT_POOLS_LIST;
+ } else if (strcmp(prev_cmd, "pools") == 0) {
+ if (strcmp(cmd, "list") == 0)
+ return OPT_POOLS_LIST;
+ } else if (strcmp(prev_cmd, "object") == 0) {
+ if (strcmp(cmd, "put") == 0)
+ return OPT_OBJECT_PUT;
+ if (strcmp(cmd, "rm") == 0)
+ return OPT_OBJECT_RM;
+ if (strcmp(cmd, "unlink") == 0)
+ return OPT_OBJECT_UNLINK;
+ if (strcmp(cmd, "stat") == 0)
+ return OPT_OBJECT_STAT;
+ if (strcmp(cmd, "rewrite") == 0)
+ return OPT_OBJECT_REWRITE;
+ } else if (strcmp(prev_cmd, "objects") == 0) {
+ if (strcmp(cmd, "expire") == 0)
+ return OPT_OBJECTS_EXPIRE;
+ } else if ((prev_prev_cmd && strcmp(prev_prev_cmd, "objects") == 0) &&
+ (strcmp(prev_cmd, "expire-stale") == 0)) {
+ if (strcmp(cmd, "list") == 0)
+ return OPT_OBJECTS_EXPIRE_STALE_LIST;
+ if (strcmp(cmd, "rm") == 0)
+ return OPT_OBJECTS_EXPIRE_STALE_RM;
+ } else if (strcmp(prev_cmd, "olh") == 0) {
+ if (strcmp(cmd, "get") == 0)
+ return OPT_OLH_GET;
+ if (strcmp(cmd, "readlog") == 0)
+ return OPT_OLH_READLOG;
+ } else if (strcmp(prev_cmd, "bi") == 0) {
+ if (strcmp(cmd, "get") == 0)
+ return OPT_BI_GET;
+ if (strcmp(cmd, "put") == 0)
+ return OPT_BI_PUT;
+ if (strcmp(cmd, "list") == 0)
+ return OPT_BI_LIST;
+ if (strcmp(cmd, "purge") == 0)
+ return OPT_BI_PURGE;
+ } else if ((prev_prev_cmd && strcmp(prev_prev_cmd, "global") == 0) &&
+ (strcmp(prev_cmd, "quota") == 0)) {
+ if (strcmp(cmd, "get") == 0)
+ return OPT_GLOBAL_QUOTA_GET;
+ if (strcmp(cmd, "set") == 0)
+ return OPT_GLOBAL_QUOTA_SET;
+ if (strcmp(cmd, "enable") == 0)
+ return OPT_GLOBAL_QUOTA_ENABLE;
+ if (strcmp(cmd, "disable") == 0)
+ return OPT_GLOBAL_QUOTA_DISABLE;
+ } else if (strcmp(prev_cmd, "period") == 0) {
+ if (match_str(cmd, "rm", "delete"))
+ return OPT_PERIOD_DELETE;
+ if (strcmp(cmd, "get") == 0)
+ return OPT_PERIOD_GET;
+ if (strcmp(cmd, "get-current") == 0)
+ return OPT_PERIOD_GET_CURRENT;
+ if (strcmp(cmd, "pull") == 0)
+ return OPT_PERIOD_PULL;
+ if (strcmp(cmd, "push") == 0)
+ return OPT_PERIOD_PUSH;
+ if (strcmp(cmd, "list") == 0)
+ return OPT_PERIOD_LIST;
+ if (strcmp(cmd, "update") == 0)
+ return OPT_PERIOD_UPDATE;
+ if (strcmp(cmd, "commit") == 0)
+ return OPT_PERIOD_COMMIT;
+ } else if (strcmp(prev_cmd, "realm") == 0) {
+ if (strcmp(cmd, "create") == 0)
+ return OPT_REALM_CREATE;
+ if (match_str(cmd, "rm", "delete"))
+ return OPT_REALM_DELETE;
+ if (strcmp(cmd, "get") == 0)
+ return OPT_REALM_GET;
+ if (strcmp(cmd, "get-default") == 0)
+ return OPT_REALM_GET_DEFAULT;
+ if (strcmp(cmd, "list") == 0)
+ return OPT_REALM_LIST;
+ if (strcmp(cmd, "list-periods") == 0)
+ return OPT_REALM_LIST_PERIODS;
+ if (strcmp(cmd, "rename") == 0)
+ return OPT_REALM_RENAME;
+ if (strcmp(cmd, "set") == 0)
+ return OPT_REALM_SET;
+ if (strcmp(cmd, "default") == 0)
+ return OPT_REALM_DEFAULT;
+ if (strcmp(cmd, "pull") == 0)
+ return OPT_REALM_PULL;
+ } else if ((prev_prev_cmd && strcmp(prev_prev_cmd, "zonegroup") == 0) &&
+ (strcmp(prev_cmd, "placement") == 0)) {
+ if (strcmp(cmd, "add") == 0)
+ return OPT_ZONEGROUP_PLACEMENT_ADD;
+ if (strcmp(cmd, "modify") == 0)
+ return OPT_ZONEGROUP_PLACEMENT_MODIFY;
+ if (strcmp(cmd, "rm") == 0)
+ return OPT_ZONEGROUP_PLACEMENT_RM;
+ if (strcmp(cmd, "list") == 0)
+ return OPT_ZONEGROUP_PLACEMENT_LIST;
+ if (strcmp(cmd, "get") == 0)
+ return OPT_ZONEGROUP_PLACEMENT_GET;
+ if (strcmp(cmd, "default") == 0)
+ return OPT_ZONEGROUP_PLACEMENT_DEFAULT;
+ } else if (strcmp(prev_cmd, "zonegroup") == 0) {
+ if (strcmp(cmd, "add") == 0)
+ return OPT_ZONEGROUP_ADD;
+ if (strcmp(cmd, "create")== 0)
+ return OPT_ZONEGROUP_CREATE;
+ if (strcmp(cmd, "default") == 0)
+ return OPT_ZONEGROUP_DEFAULT;
+ if (strcmp(cmd, "delete") == 0)
+ return OPT_ZONEGROUP_DELETE;
+ if (strcmp(cmd, "get") == 0)
+ return OPT_ZONEGROUP_GET;
+ if (strcmp(cmd, "modify") == 0)
+ return OPT_ZONEGROUP_MODIFY;
+ if (strcmp(cmd, "list") == 0)
+ return OPT_ZONEGROUP_LIST;
+ if (strcmp(cmd, "set") == 0)
+ return OPT_ZONEGROUP_SET;
+ if (match_str(cmd, "rm", "remove"))
+ return OPT_ZONEGROUP_REMOVE;
+ if (strcmp(cmd, "rename") == 0)
+ return OPT_ZONEGROUP_RENAME;
+ } else if (strcmp(prev_cmd, "quota") == 0) {
+ if (strcmp(cmd, "set") == 0)
+ return OPT_QUOTA_SET;
+ if (strcmp(cmd, "enable") == 0)
+ return OPT_QUOTA_ENABLE;
+ if (strcmp(cmd, "disable") == 0)
+ return OPT_QUOTA_DISABLE;
+ } else if (strcmp(prev_cmd, "zonegroups") == 0) {
+ if (strcmp(cmd, "list") == 0)
+ return OPT_ZONEGROUP_LIST;
+ } else if ((prev_prev_cmd && strcmp(prev_prev_cmd, "zone") == 0) &&
+ (strcmp(prev_cmd, "placement") == 0)) {
+ if (strcmp(cmd, "add") == 0)
+ return OPT_ZONE_PLACEMENT_ADD;
+ if (strcmp(cmd, "modify") == 0)
+ return OPT_ZONE_PLACEMENT_MODIFY;
+ if (strcmp(cmd, "rm") == 0)
+ return OPT_ZONE_PLACEMENT_RM;
+ if (strcmp(cmd, "list") == 0)
+ return OPT_ZONE_PLACEMENT_LIST;
+ if (strcmp(cmd, "get") == 0)
+ return OPT_ZONE_PLACEMENT_GET;
+ } else if (strcmp(prev_cmd, "zone") == 0) {
+ if (match_str(cmd, "rm", "delete"))
+ return OPT_ZONE_DELETE;
+ if (strcmp(cmd, "create") == 0)
+ return OPT_ZONE_CREATE;
+ if (strcmp(cmd, "get") == 0)
+ return OPT_ZONE_GET;
+ if (strcmp(cmd, "set") == 0)
+ return OPT_ZONE_SET;
+ if (strcmp(cmd, "list") == 0)
+ return OPT_ZONE_LIST;
+ if (strcmp(cmd, "modify") == 0)
+ return OPT_ZONE_MODIFY;
+ if (strcmp(cmd, "rename") == 0)
+ return OPT_ZONE_RENAME;
+ if (strcmp(cmd, "default") == 0)
+ return OPT_ZONE_DEFAULT;
+ } else if (strcmp(prev_cmd, "zones") == 0) {
+ if (strcmp(cmd, "list") == 0)
+ return OPT_ZONE_LIST;
+ } else if (strcmp(prev_cmd, "gc") == 0) {
+ if (strcmp(cmd, "list") == 0)
+ return OPT_GC_LIST;
+ if (strcmp(cmd, "process") == 0)
+ return OPT_GC_PROCESS;
+ } else if (strcmp(prev_cmd, "lc") == 0) {
+ if (strcmp(cmd, "list") == 0)
+ return OPT_LC_LIST;
+ if (strcmp(cmd, "get") == 0)
+ return OPT_LC_GET;
+ if (strcmp(cmd, "process") == 0)
+ return OPT_LC_PROCESS;
+ } else if ((prev_prev_cmd && strcmp(prev_prev_cmd, "lc") == 0) &&
+ strcmp(prev_cmd, "reshard") == 0) {
+ if (strcmp(cmd, "fix") == 0)
+ return OPT_LC_RESHARD_FIX;
+ } else if (strcmp(prev_cmd, "orphans") == 0) {
+ if (strcmp(cmd, "find") == 0)
+ return OPT_ORPHANS_FIND;
+ if (strcmp(cmd, "finish") == 0)
+ return OPT_ORPHANS_FINISH;
+ if (strcmp(cmd, "list-jobs") == 0)
+ return OPT_ORPHANS_LIST_JOBS;
+ } else if (strcmp(prev_cmd, "metadata") == 0) {
+ if (strcmp(cmd, "get") == 0)
+ return OPT_METADATA_GET;
+ if (strcmp(cmd, "put") == 0)
+ return OPT_METADATA_PUT;
+ if (strcmp(cmd, "rm") == 0)
+ return OPT_METADATA_RM;
+ if (strcmp(cmd, "list") == 0)
+ return OPT_METADATA_LIST;
+ if (strcmp(cmd, "sync") == 0) {
+ *need_more = true;
+ return 0;
+ }
+ } else if ((prev_prev_cmd && strcmp(prev_prev_cmd, "metadata") == 0) &&
+ (strcmp(prev_cmd, "sync") == 0)) {
+ if (strcmp(cmd, "status") == 0)
+ return OPT_METADATA_SYNC_STATUS;
+ if (strcmp(cmd, "init") == 0)
+ return OPT_METADATA_SYNC_INIT;
+ if (strcmp(cmd, "run") == 0)
+ return OPT_METADATA_SYNC_RUN;
+ } else if ((prev_prev_cmd && strcmp(prev_prev_cmd, "sync") == 0) &&
+ (strcmp(prev_cmd, "error") == 0)) {
+ if (strcmp(cmd, "list") == 0)
+ return OPT_SYNC_ERROR_LIST;
+ if (strcmp(cmd, "trim") == 0)
+ return OPT_SYNC_ERROR_TRIM;
+ } else if (strcmp(prev_cmd, "mdlog") == 0) {
+ if (strcmp(cmd, "list") == 0)
+ return OPT_MDLOG_LIST;
+ if (strcmp(cmd, "autotrim") == 0)
+ return OPT_MDLOG_AUTOTRIM;
+ if (strcmp(cmd, "trim") == 0)
+ return OPT_MDLOG_TRIM;
+ if (strcmp(cmd, "fetch") == 0)
+ return OPT_MDLOG_FETCH;
+ if (strcmp(cmd, "status") == 0)
+ return OPT_MDLOG_STATUS;
+ } else if (strcmp(prev_cmd, "bilog") == 0) {
+ if (strcmp(cmd, "list") == 0)
+ return OPT_BILOG_LIST;
+ if (strcmp(cmd, "trim") == 0)
+ return OPT_BILOG_TRIM;
+ if (strcmp(cmd, "status") == 0)
+ return OPT_BILOG_STATUS;
+ if (strcmp(cmd, "autotrim") == 0)
+ return OPT_BILOG_AUTOTRIM;
+ } else if (strcmp(prev_cmd, "data") == 0) {
+ if (strcmp(cmd, "sync") == 0) {
+ *need_more = true;
+ return 0;
+ }
+ } else if (strcmp(prev_cmd, "datalog") == 0) {
+ if (strcmp(cmd, "list") == 0)
+ return OPT_DATALOG_LIST;
+ if (strcmp(cmd, "autotrim") == 0)
+ return OPT_DATALOG_AUTOTRIM;
+ if (strcmp(cmd, "trim") == 0)
+ return OPT_DATALOG_TRIM;
+ if (strcmp(cmd, "status") == 0)
+ return OPT_DATALOG_STATUS;
+ } else if ((prev_prev_cmd && strcmp(prev_prev_cmd, "data") == 0) &&
+ (strcmp(prev_cmd, "sync") == 0)) {
+ if (strcmp(cmd, "status") == 0)
+ return OPT_DATA_SYNC_STATUS;
+ if (strcmp(cmd, "init") == 0)
+ return OPT_DATA_SYNC_INIT;
+ if (strcmp(cmd, "run") == 0)
+ return OPT_DATA_SYNC_RUN;
+ } else if (strcmp(prev_cmd, "sync") == 0) {
+ if (strcmp(cmd, "status") == 0)
+ return OPT_SYNC_STATUS;
+ } else if (strcmp(prev_cmd, "role") == 0) {
+ if (strcmp(cmd, "create") == 0)
+ return OPT_ROLE_CREATE;
+ if (strcmp(cmd, "rm") == 0)
+ return OPT_ROLE_DELETE;
+ if (strcmp(cmd, "get") == 0)
+ return OPT_ROLE_GET;
+ if (strcmp(cmd, "modify") == 0)
+ return OPT_ROLE_MODIFY;
+ if (strcmp(cmd, "list") == 0)
+ return OPT_ROLE_LIST;
+ } else if (strcmp(prev_cmd, "role-policy") == 0) {
+ if (strcmp(cmd, "put") == 0)
+ return OPT_ROLE_POLICY_PUT;
+ if (strcmp(cmd, "list") == 0)
+ return OPT_ROLE_POLICY_LIST;
+ if (strcmp(cmd, "get") == 0)
+ return OPT_ROLE_POLICY_GET;
+ if (match_str(cmd, "rm", "delete"))
+ return OPT_ROLE_POLICY_DELETE;
+ } else if (strcmp(prev_cmd, "reshard") == 0) {
+ if (strcmp(cmd, "bucket") == 0)
+ return OPT_BUCKET_RESHARD;
+ if (strcmp(cmd, "add") == 0)
+ return OPT_RESHARD_ADD;
+ if (strcmp(cmd, "list") == 0)
+ return OPT_RESHARD_LIST;
+ if (strcmp(cmd, "status") == 0)
+ return OPT_RESHARD_STATUS;
+ if (strcmp(cmd, "process") == 0)
+ return OPT_RESHARD_PROCESS;
+ if (strcmp(cmd, "cancel") == 0)
+ return OPT_RESHARD_CANCEL;
+ } else if (strcmp(prev_cmd, "mfa") == 0) {
+ if (strcmp(cmd, "create") == 0)
+ return OPT_MFA_CREATE;
+ if (strcmp(cmd, "remove") == 0)
+ return OPT_MFA_REMOVE;
+ if (strcmp(cmd, "get") == 0)
+ return OPT_MFA_GET;
+ if (strcmp(cmd, "list") == 0)
+ return OPT_MFA_LIST;
+ if (strcmp(cmd, "check") == 0)
+ return OPT_MFA_CHECK;
+ if (strcmp(cmd, "resync") == 0)
+ return OPT_MFA_RESYNC;
+ } else if ((prev_prev_cmd && strcmp(prev_prev_cmd, "reshard") == 0) &&
+ (strcmp(prev_cmd, "stale-instances") == 0)) {
+ if (strcmp(cmd, "list") == 0)
+ return OPT_RESHARD_STALE_INSTANCES_LIST;
+ if (match_str(cmd, "rm", "delete"))
+ return OPT_RESHARD_STALE_INSTANCES_DELETE;
+ } else if (prev_prev_cmd && strcmp(prev_prev_cmd, "pubsub") == 0) {
+ if (strcmp(prev_cmd, "topics") == 0) {
+ if (strcmp(cmd, "list") == 0)
+ return OPT_PUBSUB_TOPICS_LIST;
+ } else if (strcmp(prev_cmd, "topic") == 0) {
+ if (strcmp(cmd, "create") == 0)
+ return OPT_PUBSUB_TOPIC_CREATE;
+ if (strcmp(cmd, "get") == 0)
+ return OPT_PUBSUB_TOPIC_GET;
+ if (strcmp(cmd, "rm") == 0)
+ return OPT_PUBSUB_TOPIC_RM;
+ } else if (strcmp(prev_cmd, "notification") == 0) {
+ if (strcmp(cmd, "create") == 0)
+ return OPT_PUBSUB_NOTIFICATION_CREATE;
+ if (strcmp(cmd, "rm") == 0)
+ return OPT_PUBSUB_NOTIFICATION_RM;
+ } else if (strcmp(prev_cmd, "sub") == 0) {
+ if (strcmp(cmd, "get") == 0)
+ return OPT_PUBSUB_SUB_GET;
+ if (strcmp(cmd, "create") == 0)
+ return OPT_PUBSUB_SUB_CREATE;
+ if (strcmp(cmd, "rm") == 0)
+ return OPT_PUBSUB_SUB_RM;
+ if (strcmp(cmd, "pull") == 0)
+ return OPT_PUBSUB_SUB_PULL;
+ } else if (strcmp(prev_cmd, "event") == 0) {
+ if (strcmp(cmd, "rm") == 0)
+ return OPT_PUBSUB_EVENT_RM;
+ }
+ }
+ return -EINVAL;
+}
+
+BIIndexType get_bi_index_type(const string& type_str) {
+ if (type_str == "plain")
+ return BIIndexType::Plain;
+ if (type_str == "instance")
+ return BIIndexType::Instance;
+ if (type_str == "olh")
+ return BIIndexType::OLH;
+
+ return BIIndexType::Invalid;
+}
+
+void dump_bi_entry(bufferlist& bl, BIIndexType index_type, Formatter *formatter)
+{
+ auto iter = bl.cbegin();
+ switch (index_type) {
+ case BIIndexType::Plain:
+ case BIIndexType::Instance:
+ {
+ rgw_bucket_dir_entry entry;
+ decode(entry, iter);
+ encode_json("entry", entry, formatter);
+ }
+ break;
+ case BIIndexType::OLH:
+ {
+ rgw_bucket_olh_entry entry;
+ decode(entry, iter);
+ encode_json("entry", entry, formatter);
+ }
+ break;
+ default:
+ ceph_abort();
+ break;
+ }
+}
+
+static void show_user_info(RGWUserInfo& info, Formatter *formatter)
+{
+ encode_json("user_info", info, formatter);
+ formatter->flush(cout);
+ cout << std::endl;
+}
+
+static void show_perm_policy(string perm_policy, Formatter* formatter)
+{
+ formatter->open_object_section("role");
+ formatter->dump_string("Permission policy", perm_policy);
+ formatter->close_section();
+ formatter->flush(cout);
+}
+
+static void show_policy_names(std::vector<string> policy_names, Formatter* formatter)
+{
+ formatter->open_array_section("PolicyNames");
+ for (const auto& it : policy_names) {
+ formatter->dump_string("policyname", it);
+ }
+ formatter->close_section();
+ formatter->flush(cout);
+}
+
+static void show_role_info(RGWRole& role, Formatter* formatter)
+{
+ formatter->open_object_section("role");
+ role.dump(formatter);
+ formatter->close_section();
+ formatter->flush(cout);
+}
+
+static void show_roles_info(vector<RGWRole>& roles, Formatter* formatter)
+{
+ formatter->open_array_section("Roles");
+ for (const auto& it : roles) {
+ formatter->open_object_section("role");
+ it.dump(formatter);
+ formatter->close_section();
+ }
+ formatter->close_section();
+ formatter->flush(cout);
+}
+
+static void show_reshard_status(
+ const list<cls_rgw_bucket_instance_entry>& status, Formatter *formatter)
+{
+ formatter->open_array_section("status");
+ for (const auto& entry : status) {
+ formatter->open_object_section("entry");
+ formatter->dump_string("reshard_status", to_string(entry.reshard_status));
+ formatter->dump_string("new_bucket_instance_id",
+ entry.new_bucket_instance_id);
+ formatter->dump_int("num_shards", entry.num_shards);
+ formatter->close_section();
+ }
+ formatter->close_section();
+ formatter->flush(cout);
+}
+
+class StoreDestructor {
+ RGWRados *store;
+public:
+ explicit StoreDestructor(RGWRados *_s) : store(_s) {}
+ ~StoreDestructor() {
+ RGWStoreManager::close_storage(store);
+ rgw_http_client_cleanup();
+ }
+};
+
+static int init_bucket(const string& tenant_name,
+ const string& bucket_name,
+ const string& bucket_id,
+ RGWBucketInfo& bucket_info,
+ rgw_bucket& bucket,
+ map<string, bufferlist> *pattrs = nullptr)
+{
+ if (!bucket_name.empty()) {
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+ int r;
+ if (bucket_id.empty()) {
+ r = store->get_bucket_info(obj_ctx, tenant_name, bucket_name, bucket_info, nullptr, pattrs);
+ } else {
+ string bucket_instance_id = bucket_name + ":" + bucket_id;
+ r = store->get_bucket_instance_info(obj_ctx, bucket_instance_id, bucket_info, NULL, pattrs);
+ }
+ if (r < 0) {
+ cerr << "could not get bucket info for bucket=" << bucket_name << std::endl;
+ return r;
+ }
+ bucket = bucket_info.bucket;
+ }
+ return 0;
+}
+
+static int read_input(const string& infile, bufferlist& bl)
+{
+ int fd = 0;
+ if (infile.size()) {
+ fd = open(infile.c_str(), O_RDONLY);
+ if (fd < 0) {
+ int err = -errno;
+ cerr << "error reading input file " << infile << std::endl;
+ return err;
+ }
+ }
+
+#define READ_CHUNK 8196
+ int r;
+ int err;
+
+ do {
+ char buf[READ_CHUNK];
+
+ r = safe_read(fd, buf, READ_CHUNK);
+ if (r < 0) {
+ err = -errno;
+ cerr << "error while reading input" << std::endl;
+ goto out;
+ }
+ bl.append(buf, r);
+ } while (r > 0);
+ err = 0;
+
+ out:
+ if (infile.size()) {
+ close(fd);
+ }
+ return err;
+}
+
+template <class T>
+static int read_decode_json(const string& infile, T& t)
+{
+ bufferlist bl;
+ int ret = read_input(infile, bl);
+ if (ret < 0) {
+ cerr << "ERROR: failed to read input: " << cpp_strerror(-ret) << std::endl;
+ return ret;
+ }
+ JSONParser p;
+ if (!p.parse(bl.c_str(), bl.length())) {
+ cout << "failed to parse JSON" << std::endl;
+ return -EINVAL;
+ }
+
+ try {
+ decode_json_obj(t, &p);
+ } catch (JSONDecoder::err& e) {
+ cout << "failed to decode JSON input: " << e.message << std::endl;
+ return -EINVAL;
+ }
+ return 0;
+}
+
+template <class T, class K>
+static int read_decode_json(const string& infile, T& t, K *k)
+{
+ bufferlist bl;
+ int ret = read_input(infile, bl);
+ if (ret < 0) {
+ cerr << "ERROR: failed to read input: " << cpp_strerror(-ret) << std::endl;
+ return ret;
+ }
+ JSONParser p;
+ if (!p.parse(bl.c_str(), bl.length())) {
+ cout << "failed to parse JSON" << std::endl;
+ return -EINVAL;
+ }
+
+ try {
+ t.decode_json(&p, k);
+ } catch (JSONDecoder::err& e) {
+ cout << "failed to decode JSON input: " << e.message << std::endl;
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int parse_date_str(const string& date_str, utime_t& ut)
+{
+ uint64_t epoch = 0;
+ uint64_t nsec = 0;
+
+ if (!date_str.empty()) {
+ int ret = utime_t::parse_date(date_str, &epoch, &nsec);
+ if (ret < 0) {
+ cerr << "ERROR: failed to parse date: " << date_str << std::endl;
+ return -EINVAL;
+ }
+ }
+
+ ut = utime_t(epoch, nsec);
+
+ return 0;
+}
+
+template <class T>
+static bool decode_dump(const char *field_name, bufferlist& bl, Formatter *f)
+{
+ T t;
+
+ auto iter = bl.cbegin();
+
+ try {
+ decode(t, iter);
+ } catch (buffer::error& err) {
+ return false;
+ }
+
+ encode_json(field_name, t, f);
+
+ return true;
+}
+
+static bool dump_string(const char *field_name, bufferlist& bl, Formatter *f)
+{
+ string val = bl.to_str();
+ f->dump_string(field_name, val.c_str() /* hide encoded null termination chars */);
+
+ return true;
+}
+
+void set_quota_info(RGWQuotaInfo& quota, int opt_cmd, int64_t max_size, int64_t max_objects,
+ bool have_max_size, bool have_max_objects)
+{
+ switch (opt_cmd) {
+ case OPT_QUOTA_ENABLE:
+ case OPT_GLOBAL_QUOTA_ENABLE:
+ quota.enabled = true;
+
+ // falling through on purpose
+
+ case OPT_QUOTA_SET:
+ case OPT_GLOBAL_QUOTA_SET:
+ if (have_max_objects) {
+ if (max_objects < 0) {
+ quota.max_objects = -1;
+ } else {
+ quota.max_objects = max_objects;
+ }
+ }
+ if (have_max_size) {
+ if (max_size < 0) {
+ quota.max_size = -1;
+ } else {
+ quota.max_size = rgw_rounded_kb(max_size) * 1024;
+ }
+ }
+ break;
+ case OPT_QUOTA_DISABLE:
+ case OPT_GLOBAL_QUOTA_DISABLE:
+ quota.enabled = false;
+ break;
+ }
+}
+
+int set_bucket_quota(RGWRados *store, int opt_cmd,
+ const string& tenant_name, const string& bucket_name,
+ int64_t max_size, int64_t max_objects,
+ bool have_max_size, bool have_max_objects)
+{
+ RGWBucketInfo bucket_info;
+ map<string, bufferlist> attrs;
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+ int r = store->get_bucket_info(obj_ctx, tenant_name, bucket_name, bucket_info, NULL, &attrs);
+ if (r < 0) {
+ cerr << "could not get bucket info for bucket=" << bucket_name << ": " << cpp_strerror(-r) << std::endl;
+ return -r;
+ }
+
+ set_quota_info(bucket_info.quota, opt_cmd, max_size, max_objects, have_max_size, have_max_objects);
+
+ r = store->put_bucket_instance_info(bucket_info, false, real_time(), &attrs);
+ if (r < 0) {
+ cerr << "ERROR: failed writing bucket instance info: " << cpp_strerror(-r) << std::endl;
+ return -r;
+ }
+ return 0;
+}
+
+int set_user_bucket_quota(int opt_cmd, RGWUser& user, RGWUserAdminOpState& op_state, int64_t max_size, int64_t max_objects,
+ bool have_max_size, bool have_max_objects)
+{
+ RGWUserInfo& user_info = op_state.get_user_info();
+
+ set_quota_info(user_info.bucket_quota, opt_cmd, max_size, max_objects, have_max_size, have_max_objects);
+
+ op_state.set_bucket_quota(user_info.bucket_quota);
+
+ string err;
+ int r = user.modify(op_state, &err);
+ if (r < 0) {
+ cerr << "ERROR: failed updating user info: " << cpp_strerror(-r) << ": " << err << std::endl;
+ return -r;
+ }
+ return 0;
+}
+
+int set_user_quota(int opt_cmd, RGWUser& user, RGWUserAdminOpState& op_state, int64_t max_size, int64_t max_objects,
+ bool have_max_size, bool have_max_objects)
+{
+ RGWUserInfo& user_info = op_state.get_user_info();
+
+ set_quota_info(user_info.user_quota, opt_cmd, max_size, max_objects, have_max_size, have_max_objects);
+
+ op_state.set_user_quota(user_info.user_quota);
+
+ string err;
+ int r = user.modify(op_state, &err);
+ if (r < 0) {
+ cerr << "ERROR: failed updating user info: " << cpp_strerror(-r) << ": " << err << std::endl;
+ return -r;
+ }
+ return 0;
+}
+
+static bool bucket_object_check_filter(const string& name)
+{
+ rgw_obj_key k;
+ string ns; /* empty namespace */
+ return rgw_obj_key::oid_to_key_in_ns(name, &k, ns);
+}
+
+int check_min_obj_stripe_size(RGWRados *store, RGWBucketInfo& bucket_info, rgw_obj& obj, uint64_t min_stripe_size, bool *need_rewrite)
+{
+ map<string, bufferlist> attrs;
+ uint64_t obj_size;
+
+ RGWObjectCtx obj_ctx(store);
+ RGWRados::Object op_target(store, bucket_info, obj_ctx, obj);
+ RGWRados::Object::Read read_op(&op_target);
+
+ read_op.params.attrs = &attrs;
+ read_op.params.obj_size = &obj_size;
+
+ int ret = read_op.prepare();
+ if (ret < 0) {
+ lderr(store->ctx()) << "ERROR: failed to stat object, returned error: " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ map<string, bufferlist>::iterator iter;
+ iter = attrs.find(RGW_ATTR_MANIFEST);
+ if (iter == attrs.end()) {
+ *need_rewrite = (obj_size >= min_stripe_size);
+ return 0;
+ }
+
+ RGWObjManifest manifest;
+
+ try {
+ bufferlist& bl = iter->second;
+ auto biter = bl.cbegin();
+ decode(manifest, biter);
+ } catch (buffer::error& err) {
+ ldout(store->ctx(), 0) << "ERROR: failed to decode manifest" << dendl;
+ return -EIO;
+ }
+
+ map<uint64_t, RGWObjManifestPart>& objs = manifest.get_explicit_objs();
+ map<uint64_t, RGWObjManifestPart>::iterator oiter;
+ for (oiter = objs.begin(); oiter != objs.end(); ++oiter) {
+ RGWObjManifestPart& part = oiter->second;
+
+ if (part.size >= min_stripe_size) {
+ *need_rewrite = true;
+ return 0;
+ }
+ }
+ *need_rewrite = false;
+
+ return 0;
+}
+
+
+int check_obj_locator_underscore(RGWBucketInfo& bucket_info, rgw_obj& obj, rgw_obj_key& key, bool fix, bool remove_bad, Formatter *f) {
+ f->open_object_section("object");
+ f->open_object_section("key");
+ f->dump_string("type", "head");
+ f->dump_string("name", key.name);
+ f->dump_string("instance", key.instance);
+ f->close_section();
+
+ string oid;
+ string locator;
+
+ get_obj_bucket_and_oid_loc(obj, oid, locator);
+
+ f->dump_string("oid", oid);
+ f->dump_string("locator", locator);
+
+
+ RGWObjectCtx obj_ctx(store);
+
+ RGWRados::Object op_target(store, bucket_info, obj_ctx, obj);
+ RGWRados::Object::Read read_op(&op_target);
+
+ int ret = read_op.prepare();
+ bool needs_fixing = (ret == -ENOENT);
+
+ f->dump_bool("needs_fixing", needs_fixing);
+
+ string status = (needs_fixing ? "needs_fixing" : "ok");
+
+ if ((needs_fixing || remove_bad) && fix) {
+ ret = store->fix_head_obj_locator(bucket_info, needs_fixing, remove_bad, key);
+ if (ret < 0) {
+ cerr << "ERROR: fix_head_object_locator() returned ret=" << ret << std::endl;
+ goto done;
+ }
+ status = "fixed";
+ }
+
+done:
+ f->dump_string("status", status);
+
+ f->close_section();
+
+ return 0;
+}
+
+int check_obj_tail_locator_underscore(RGWBucketInfo& bucket_info, rgw_obj& obj, rgw_obj_key& key, bool fix, Formatter *f) {
+ f->open_object_section("object");
+ f->open_object_section("key");
+ f->dump_string("type", "tail");
+ f->dump_string("name", key.name);
+ f->dump_string("instance", key.instance);
+ f->close_section();
+
+ bool needs_fixing;
+ string status;
+
+ int ret = store->fix_tail_obj_locator(bucket_info, key, fix, &needs_fixing);
+ if (ret < 0) {
+ cerr << "ERROR: fix_tail_object_locator_underscore() returned ret=" << ret << std::endl;
+ status = "failed";
+ } else {
+ status = (needs_fixing && !fix ? "needs_fixing" : "ok");
+ }
+
+ f->dump_bool("needs_fixing", needs_fixing);
+ f->dump_string("status", status);
+
+ f->close_section();
+
+ return 0;
+}
+
+int do_check_object_locator(const string& tenant_name, const string& bucket_name,
+ bool fix, bool remove_bad, Formatter *f)
+{
+ if (remove_bad && !fix) {
+ cerr << "ERROR: can't have remove_bad specified without fix" << std::endl;
+ return -EINVAL;
+ }
+
+ RGWBucketInfo bucket_info;
+ rgw_bucket bucket;
+ string bucket_id;
+
+ f->open_object_section("bucket");
+ f->dump_string("bucket", bucket_name);
+ int ret = init_bucket(tenant_name, bucket_name, bucket_id, bucket_info, bucket);
+ if (ret < 0) {
+ cerr << "ERROR: could not init bucket: " << cpp_strerror(-ret) << std::endl;
+ return ret;
+ }
+ bool truncated;
+ int count = 0;
+
+ int max_entries = 1000;
+
+ string prefix;
+ string delim;
+ vector<rgw_bucket_dir_entry> result;
+ map<string, bool> common_prefixes;
+ string ns;
+
+ RGWRados::Bucket target(store, bucket_info);
+ RGWRados::Bucket::List list_op(&target);
+
+ string marker;
+
+ list_op.params.prefix = prefix;
+ list_op.params.delim = delim;
+ list_op.params.marker = rgw_obj_key(marker);
+ list_op.params.ns = ns;
+ list_op.params.enforce_ns = true;
+ list_op.params.list_versions = true;
+
+ f->open_array_section("check_objects");
+ do {
+ ret = list_op.list_objects(max_entries - count, &result, &common_prefixes, &truncated);
+ if (ret < 0) {
+ cerr << "ERROR: store->list_objects(): " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ count += result.size();
+
+ for (vector<rgw_bucket_dir_entry>::iterator iter = result.begin(); iter != result.end(); ++iter) {
+ rgw_obj_key key = iter->key;
+ rgw_obj obj(bucket, key);
+
+ if (key.name[0] == '_') {
+ ret = check_obj_locator_underscore(bucket_info, obj, key, fix, remove_bad, f);
+
+ if (ret >= 0) {
+ ret = check_obj_tail_locator_underscore(bucket_info, obj, key, fix, f);
+ if (ret < 0) {
+ cerr << "ERROR: check_obj_tail_locator_underscore(): " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ }
+ }
+ }
+ f->flush(cout);
+ } while (truncated && count < max_entries);
+ f->close_section();
+ f->close_section();
+
+ f->flush(cout);
+
+ return 0;
+}
+
+int set_bucket_sync_enabled(RGWRados *store, int opt_cmd, const string& tenant_name, const string& bucket_name)
+{
+ RGWBucketInfo bucket_info;
+ map<string, bufferlist> attrs;
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+
+ int r = store->get_bucket_info(obj_ctx, tenant_name, bucket_name, bucket_info, NULL, &attrs);
+ if (r < 0) {
+ cerr << "could not get bucket info for bucket=" << bucket_name << ": " << cpp_strerror(-r) << std::endl;
+ return -r;
+ }
+
+ if (opt_cmd == OPT_BUCKET_SYNC_ENABLE) {
+ bucket_info.flags &= ~BUCKET_DATASYNC_DISABLED;
+ } else if (opt_cmd == OPT_BUCKET_SYNC_DISABLE) {
+ bucket_info.flags |= BUCKET_DATASYNC_DISABLED;
+ }
+
+ r = store->put_bucket_instance_info(bucket_info, false, real_time(), &attrs);
+ if (r < 0) {
+ cerr << "ERROR: failed writing bucket instance info: " << cpp_strerror(-r) << std::endl;
+ return -r;
+ }
+
+ int shards_num = bucket_info.num_shards? bucket_info.num_shards : 1;
+ int shard_id = bucket_info.num_shards? 0 : -1;
+
+ if (opt_cmd == OPT_BUCKET_SYNC_DISABLE) {
+ r = store->stop_bi_log_entries(bucket_info, -1);
+ if (r < 0) {
+ lderr(store->ctx()) << "ERROR: failed writing stop bilog" << dendl;
+ return r;
+ }
+ } else {
+ r = store->resync_bi_log_entries(bucket_info, -1);
+ if (r < 0) {
+ lderr(store->ctx()) << "ERROR: failed writing resync bilog" << dendl;
+ return r;
+ }
+ }
+
+ for (int i = 0; i < shards_num; ++i, ++shard_id) {
+ r = store->data_log->add_entry(bucket_info.bucket, shard_id);
+ if (r < 0) {
+ lderr(store->ctx()) << "ERROR: failed writing data log" << dendl;
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+
+/// search for a matching zone/zonegroup id and return a connection if found
+static boost::optional<RGWRESTConn> get_remote_conn(RGWRados *store,
+ const RGWZoneGroup& zonegroup,
+ const std::string& remote)
+{
+ boost::optional<RGWRESTConn> conn;
+ if (remote == zonegroup.get_id()) {
+ conn.emplace(store->ctx(), store->svc.zone, remote, zonegroup.endpoints);
+ } else {
+ for (const auto& z : zonegroup.zones) {
+ const auto& zone = z.second;
+ if (remote == zone.id) {
+ conn.emplace(store->ctx(), store->svc.zone, remote, zone.endpoints);
+ break;
+ }
+ }
+ }
+ return conn;
+}
+
+/// search each zonegroup for a connection
+static boost::optional<RGWRESTConn> get_remote_conn(RGWRados *store,
+ const RGWPeriodMap& period_map,
+ const std::string& remote)
+{
+ boost::optional<RGWRESTConn> conn;
+ for (const auto& zg : period_map.zonegroups) {
+ conn = get_remote_conn(store, zg.second, remote);
+ if (conn) {
+ break;
+ }
+ }
+ return conn;
+}
+
+// we expect a very small response
+static constexpr size_t MAX_REST_RESPONSE = 128 * 1024;
+
+static int send_to_remote_gateway(RGWRESTConn* conn, req_info& info,
+ bufferlist& in_data, JSONParser& parser)
+{
+ if (!conn) {
+ return -EINVAL;
+ }
+
+ ceph::bufferlist response;
+ rgw_user user;
+ int ret = conn->forward(user, info, nullptr, MAX_REST_RESPONSE, &in_data, &response);
+
+ int parse_ret = parser.parse(response.c_str(), response.length());
+ if (parse_ret < 0) {
+ cerr << "failed to parse response" << std::endl;
+ return parse_ret;
+ }
+ return ret;
+}
+
+static int send_to_url(const string& url, const string& access,
+ const string& secret, req_info& info,
+ bufferlist& in_data, JSONParser& parser)
+{
+ if (access.empty() || secret.empty()) {
+ cerr << "An --access-key and --secret must be provided with --url." << std::endl;
+ return -EINVAL;
+ }
+ RGWAccessKey key;
+ key.id = access;
+ key.key = secret;
+
+ param_vec_t params;
+ RGWRESTSimpleRequest req(g_ceph_context, info.method, url, NULL, &params);
+
+ bufferlist response;
+ int ret = req.forward_request(key, info, MAX_REST_RESPONSE, &in_data, &response);
+
+ int parse_ret = parser.parse(response.c_str(), response.length());
+ if (parse_ret < 0) {
+ cout << "failed to parse response" << std::endl;
+ return parse_ret;
+ }
+ return ret;
+}
+
+static int send_to_remote_or_url(RGWRESTConn *conn, const string& url,
+ const string& access, const string& secret,
+ req_info& info, bufferlist& in_data,
+ JSONParser& parser)
+{
+ if (url.empty()) {
+ return send_to_remote_gateway(conn, info, in_data, parser);
+ }
+ return send_to_url(url, access, secret, info, in_data, parser);
+}
+
+static int commit_period(RGWRealm& realm, RGWPeriod& period,
+ string remote, const string& url,
+ const string& access, const string& secret,
+ bool force)
+{
+ const string& master_zone = period.get_master_zone();
+ if (master_zone.empty()) {
+ cerr << "cannot commit period: period does not have a master zone of a master zonegroup" << std::endl;
+ return -EINVAL;
+ }
+ // are we the period's master zone?
+ if (store->svc.zone->get_zone_params().get_id() == master_zone) {
+ // read the current period
+ RGWPeriod current_period;
+ int ret = current_period.init(g_ceph_context, store->svc.sysobj, realm.get_id());
+ if (ret < 0) {
+ cerr << "Error initializing current period: "
+ << cpp_strerror(-ret) << std::endl;
+ return ret;
+ }
+ // the master zone can commit locally
+ ret = period.commit(store, realm, current_period, cerr, force);
+ if (ret < 0) {
+ cerr << "failed to commit period: " << cpp_strerror(-ret) << std::endl;
+ }
+ return ret;
+ }
+
+ if (remote.empty() && url.empty()) {
+ // use the new master zone's connection
+ remote = master_zone;
+ cout << "Sending period to new master zone " << remote << std::endl;
+ }
+ boost::optional<RGWRESTConn> conn;
+ RGWRESTConn *remote_conn = nullptr;
+ if (!remote.empty()) {
+ conn = get_remote_conn(store, period.get_map(), remote);
+ if (!conn) {
+ cerr << "failed to find a zone or zonegroup for remote "
+ << remote << std::endl;
+ return -ENOENT;
+ }
+ remote_conn = &*conn;
+ }
+
+ // push period to the master with an empty period id
+ period.set_id("");
+
+ RGWEnv env;
+ req_info info(g_ceph_context, &env);
+ info.method = "POST";
+ info.request_uri = "/admin/realm/period";
+
+ // json format into a bufferlist
+ JSONFormatter jf(false);
+ encode_json("period", period, &jf);
+ bufferlist bl;
+ jf.flush(bl);
+
+ JSONParser p;
+ int ret = send_to_remote_or_url(remote_conn, url, access, secret, info, bl, p);
+ if (ret < 0) {
+ cerr << "request failed: " << cpp_strerror(-ret) << std::endl;
+
+ // did we parse an error message?
+ auto message = p.find_obj("Message");
+ if (message) {
+ cerr << "Reason: " << message->get_data() << std::endl;
+ }
+ return ret;
+ }
+
+ // decode the response and store it back
+ try {
+ decode_json_obj(period, &p);
+ } catch (JSONDecoder::err& e) {
+ cout << "failed to decode JSON input: " << e.message << std::endl;
+ return -EINVAL;
+ }
+ if (period.get_id().empty()) {
+ cerr << "Period commit got back an empty period id" << std::endl;
+ return -EINVAL;
+ }
+ // the master zone gave us back the period that it committed, so it's
+ // safe to save it as our latest epoch
+ ret = period.store_info(false);
+ if (ret < 0) {
+ cerr << "Error storing committed period " << period.get_id() << ": "
+ << cpp_strerror(ret) << std::endl;
+ return ret;
+ }
+ ret = period.set_latest_epoch(period.get_epoch());
+ if (ret < 0) {
+ cerr << "Error updating period epoch: " << cpp_strerror(ret) << std::endl;
+ return ret;
+ }
+ ret = period.reflect();
+ if (ret < 0) {
+ cerr << "Error updating local objects: " << cpp_strerror(ret) << std::endl;
+ return ret;
+ }
+ realm.notify_new_period(period);
+ return ret;
+}
+
+static int update_period(const string& realm_id, const string& realm_name,
+ const string& period_id, const string& period_epoch,
+ bool commit, const string& remote, const string& url,
+ const string& access, const string& secret,
+ Formatter *formatter, bool force)
+{
+ RGWRealm realm(realm_id, realm_name);
+ int ret = realm.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0 ) {
+ cerr << "Error initializing realm " << cpp_strerror(-ret) << std::endl;
+ return ret;
+ }
+ epoch_t epoch = 0;
+ if (!period_epoch.empty()) {
+ epoch = atoi(period_epoch.c_str());
+ }
+ RGWPeriod period(period_id, epoch);
+ ret = period.init(g_ceph_context, store->svc.sysobj, realm.get_id());
+ if (ret < 0) {
+ cerr << "period init failed: " << cpp_strerror(-ret) << std::endl;
+ return ret;
+ }
+ period.fork();
+ ret = period.update();
+ if(ret < 0) {
+ // Dropping the error message here, as both the ret codes were handled in
+ // period.update()
+ return ret;
+ }
+ ret = period.store_info(false);
+ if (ret < 0) {
+ cerr << "failed to store period: " << cpp_strerror(-ret) << std::endl;
+ return ret;
+ }
+ if (commit) {
+ ret = commit_period(realm, period, remote, url, access, secret, force);
+ if (ret < 0) {
+ cerr << "failed to commit period: " << cpp_strerror(-ret) << std::endl;
+ return ret;
+ }
+ }
+ encode_json("period", period, formatter);
+ formatter->flush(cout);
+ return 0;
+}
+
+static int init_bucket_for_sync(const string& tenant, const string& bucket_name,
+ const string& bucket_id, rgw_bucket& bucket)
+{
+ RGWBucketInfo bucket_info;
+
+ int ret = init_bucket(tenant, bucket_name, bucket_id, bucket_info, bucket);
+ if (ret < 0) {
+ cerr << "ERROR: could not init bucket: " << cpp_strerror(-ret) << std::endl;
+ return ret;
+ }
+
+ return 0;
+}
+
+static int do_period_pull(RGWRESTConn *remote_conn, const string& url,
+ const string& access_key, const string& secret_key,
+ const string& realm_id, const string& realm_name,
+ const string& period_id, const string& period_epoch,
+ RGWPeriod *period)
+{
+ RGWEnv env;
+ req_info info(g_ceph_context, &env);
+ info.method = "GET";
+ info.request_uri = "/admin/realm/period";
+
+ map<string, string> &params = info.args.get_params();
+ if (!realm_id.empty())
+ params["realm_id"] = realm_id;
+ if (!realm_name.empty())
+ params["realm_name"] = realm_name;
+ if (!period_id.empty())
+ params["period_id"] = period_id;
+ if (!period_epoch.empty())
+ params["epoch"] = period_epoch;
+
+ bufferlist bl;
+ JSONParser p;
+ int ret = send_to_remote_or_url(remote_conn, url, access_key, secret_key,
+ info, bl, p);
+ if (ret < 0) {
+ cerr << "request failed: " << cpp_strerror(-ret) << std::endl;
+ return ret;
+ }
+ ret = period->init(g_ceph_context, store->svc.sysobj, false);
+ if (ret < 0) {
+ cerr << "faile to init period " << cpp_strerror(-ret) << std::endl;
+ return ret;
+ }
+ try {
+ decode_json_obj(*period, &p);
+ } catch (JSONDecoder::err& e) {
+ cout << "failed to decode JSON input: " << e.message << std::endl;
+ return -EINVAL;
+ }
+ ret = period->store_info(false);
+ if (ret < 0) {
+ cerr << "Error storing period " << period->get_id() << ": " << cpp_strerror(ret) << std::endl;
+ }
+ // store latest epoch (ignore errors)
+ period->update_latest_epoch(period->get_epoch());
+ return 0;
+}
+
+static int read_current_period_id(RGWRados* store, const std::string& realm_id,
+ const std::string& realm_name,
+ std::string* period_id)
+{
+ RGWRealm realm(realm_id, realm_name);
+ int ret = realm.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0) {
+ std::cerr << "failed to read realm: " << cpp_strerror(-ret) << std::endl;
+ return ret;
+ }
+ *period_id = realm.get_current_period();
+ return 0;
+}
+
+void flush_ss(stringstream& ss, list<string>& l)
+{
+ if (!ss.str().empty()) {
+ l.push_back(ss.str());
+ }
+ ss.str("");
+}
+
+stringstream& push_ss(stringstream& ss, list<string>& l, int tab = 0)
+{
+ flush_ss(ss, l);
+ if (tab > 0) {
+ ss << setw(tab) << "" << setw(1);
+ }
+ return ss;
+}
+
+static void get_md_sync_status(list<string>& status)
+{
+ RGWMetaSyncStatusManager sync(store, store->get_async_rados());
+
+ int ret = sync.init();
+ if (ret < 0) {
+ status.push_back(string("failed to retrieve sync info: sync.init() failed: ") + cpp_strerror(-ret));
+ return;
+ }
+
+ rgw_meta_sync_status sync_status;
+ ret = sync.read_sync_status(&sync_status);
+ if (ret < 0) {
+ status.push_back(string("failed to read sync status: ") + cpp_strerror(-ret));
+ return;
+ }
+
+ string status_str;
+ switch (sync_status.sync_info.state) {
+ case rgw_meta_sync_info::StateInit:
+ status_str = "init";
+ break;
+ case rgw_meta_sync_info::StateBuildingFullSyncMaps:
+ status_str = "preparing for full sync";
+ break;
+ case rgw_meta_sync_info::StateSync:
+ status_str = "syncing";
+ break;
+ default:
+ status_str = "unknown";
+ }
+
+ status.push_back(status_str);
+
+ uint64_t full_total = 0;
+ uint64_t full_complete = 0;
+
+ int num_full = 0;
+ int num_inc = 0;
+ int total_shards = 0;
+ set<int> shards_behind_set;
+
+ for (auto marker_iter : sync_status.sync_markers) {
+ full_total += marker_iter.second.total_entries;
+ total_shards++;
+ if (marker_iter.second.state == rgw_meta_sync_marker::SyncState::FullSync) {
+ num_full++;
+ full_complete += marker_iter.second.pos;
+ int shard_id = marker_iter.first;
+ shards_behind_set.insert(shard_id);
+ } else {
+ full_complete += marker_iter.second.total_entries;
+ }
+ if (marker_iter.second.state == rgw_meta_sync_marker::SyncState::IncrementalSync) {
+ num_inc++;
+ }
+ }
+
+ stringstream ss;
+ push_ss(ss, status) << "full sync: " << num_full << "/" << total_shards << " shards";
+
+ if (num_full > 0) {
+ push_ss(ss, status) << "full sync: " << full_total - full_complete << " entries to sync";
+ }
+
+ push_ss(ss, status) << "incremental sync: " << num_inc << "/" << total_shards << " shards";
+
+ map<int, RGWMetadataLogInfo> master_shards_info;
+ string master_period = store->svc.zone->get_current_period_id();
+
+ ret = sync.read_master_log_shards_info(master_period, &master_shards_info);
+ if (ret < 0) {
+ status.push_back(string("failed to fetch master sync status: ") + cpp_strerror(-ret));
+ return;
+ }
+
+ map<int, string> shards_behind;
+ if (sync_status.sync_info.period != master_period) {
+ status.push_back(string("master is on a different period: master_period=" +
+ master_period + " local_period=" + sync_status.sync_info.period));
+ } else {
+ for (auto local_iter : sync_status.sync_markers) {
+ int shard_id = local_iter.first;
+ auto iter = master_shards_info.find(shard_id);
+
+ if (iter == master_shards_info.end()) {
+ /* huh? */
+ derr << "ERROR: could not find remote sync shard status for shard_id=" << shard_id << dendl;
+ continue;
+ }
+ auto master_marker = iter->second.marker;
+ if (local_iter.second.state == rgw_meta_sync_marker::SyncState::IncrementalSync &&
+ master_marker > local_iter.second.marker) {
+ shards_behind[shard_id] = local_iter.second.marker;
+ shards_behind_set.insert(shard_id);
+ }
+ }
+ }
+
+ int total_behind = shards_behind.size() + (sync_status.sync_info.num_shards - num_inc);
+ if (total_behind == 0) {
+ push_ss(ss, status) << "metadata is caught up with master";
+ } else {
+ push_ss(ss, status) << "metadata is behind on " << total_behind << " shards";
+
+ push_ss(ss, status) << "behind shards: " << "[" << shards_behind_set << "]";
+
+ map<int, rgw_mdlog_shard_data> master_pos;
+ ret = sync.read_master_log_shards_next(sync_status.sync_info.period, shards_behind, &master_pos);
+ if (ret < 0) {
+ derr << "ERROR: failed to fetch master next positions (" << cpp_strerror(-ret) << ")" << dendl;
+ } else {
+ std::optional<std::pair<int, ceph::real_time>> oldest;
+
+ for (auto iter : master_pos) {
+ rgw_mdlog_shard_data& shard_data = iter.second;
+
+ if (!shard_data.entries.empty()) {
+ rgw_mdlog_entry& entry = shard_data.entries.front();
+ if (!oldest) {
+ oldest.emplace(iter.first, entry.timestamp);
+ } else if (!ceph::real_clock::is_zero(entry.timestamp) && entry.timestamp < oldest->second) {
+ oldest.emplace(iter.first, entry.timestamp);
+ }
+ }
+ }
+
+ if (oldest) {
+ push_ss(ss, status) << "oldest incremental change not applied: "
+ << oldest->second << " [" << oldest->first << ']';
+ }
+ }
+ }
+
+ flush_ss(ss, status);
+}
+
+static void get_data_sync_status(const string& source_zone, list<string>& status, int tab)
+{
+ stringstream ss;
+
+ RGWZone *sz;
+
+ if (!store->svc.zone->find_zone_by_id(source_zone, &sz)) {
+ push_ss(ss, status, tab) << string("zone not found");
+ flush_ss(ss, status);
+ return;
+ }
+
+ if (!store->svc.zone->zone_syncs_from(store->svc.zone->get_zone(), *sz)) {
+ push_ss(ss, status, tab) << string("not syncing from zone");
+ flush_ss(ss, status);
+ return;
+ }
+ RGWDataSyncStatusManager sync(store, store->get_async_rados(), source_zone, nullptr);
+
+ int ret = sync.init();
+ if (ret < 0) {
+ push_ss(ss, status, tab) << string("failed to retrieve sync info: ") + cpp_strerror(-ret);
+ flush_ss(ss, status);
+ return;
+ }
+
+ rgw_data_sync_status sync_status;
+ ret = sync.read_sync_status(&sync_status);
+ if (ret < 0 && ret != -ENOENT) {
+ push_ss(ss, status, tab) << string("failed read sync status: ") + cpp_strerror(-ret);
+ return;
+ }
+
+ set<int> recovering_shards;
+ ret = sync.read_recovering_shards(sync_status.sync_info.num_shards, recovering_shards);
+ if (ret < 0 && ret != ENOENT) {
+ push_ss(ss, status, tab) << string("failed read recovering shards: ") + cpp_strerror(-ret);
+ return;
+ }
+
+ string status_str;
+ switch (sync_status.sync_info.state) {
+ case rgw_data_sync_info::StateInit:
+ status_str = "init";
+ break;
+ case rgw_data_sync_info::StateBuildingFullSyncMaps:
+ status_str = "preparing for full sync";
+ break;
+ case rgw_data_sync_info::StateSync:
+ status_str = "syncing";
+ break;
+ default:
+ status_str = "unknown";
+ }
+
+ push_ss(ss, status, tab) << status_str;
+
+ uint64_t full_total = 0;
+ uint64_t full_complete = 0;
+
+ int num_full = 0;
+ int num_inc = 0;
+ int total_shards = 0;
+ set<int> shards_behind_set;
+
+ for (auto marker_iter : sync_status.sync_markers) {
+ full_total += marker_iter.second.total_entries;
+ total_shards++;
+ if (marker_iter.second.state == rgw_data_sync_marker::SyncState::FullSync) {
+ num_full++;
+ full_complete += marker_iter.second.pos;
+ int shard_id = marker_iter.first;
+ shards_behind_set.insert(shard_id);
+ } else {
+ full_complete += marker_iter.second.total_entries;
+ }
+ if (marker_iter.second.state == rgw_data_sync_marker::SyncState::IncrementalSync) {
+ num_inc++;
+ }
+ }
+
+ push_ss(ss, status, tab) << "full sync: " << num_full << "/" << total_shards << " shards";
+
+ if (num_full > 0) {
+ push_ss(ss, status, tab) << "full sync: " << full_total - full_complete << " buckets to sync";
+ }
+
+ push_ss(ss, status, tab) << "incremental sync: " << num_inc << "/" << total_shards << " shards";
+
+ map<int, RGWDataChangesLogInfo> source_shards_info;
+
+ ret = sync.read_source_log_shards_info(&source_shards_info);
+ if (ret < 0) {
+ push_ss(ss, status, tab) << string("failed to fetch source sync status: ") + cpp_strerror(-ret);
+ return;
+ }
+
+ map<int, string> shards_behind;
+
+ for (auto local_iter : sync_status.sync_markers) {
+ int shard_id = local_iter.first;
+ auto iter = source_shards_info.find(shard_id);
+
+ if (iter == source_shards_info.end()) {
+ /* huh? */
+ derr << "ERROR: could not find remote sync shard status for shard_id=" << shard_id << dendl;
+ continue;
+ }
+ auto master_marker = iter->second.marker;
+ if (local_iter.second.state == rgw_data_sync_marker::SyncState::IncrementalSync &&
+ master_marker > local_iter.second.marker) {
+ shards_behind[shard_id] = local_iter.second.marker;
+ shards_behind_set.insert(shard_id);
+ }
+ }
+
+ int total_behind = shards_behind.size() + (sync_status.sync_info.num_shards - num_inc);
+ int total_recovering = recovering_shards.size();
+ if (total_behind == 0 && total_recovering == 0) {
+ push_ss(ss, status, tab) << "data is caught up with source";
+ } else if (total_behind > 0) {
+ push_ss(ss, status, tab) << "data is behind on " << total_behind << " shards";
+
+ push_ss(ss, status, tab) << "behind shards: " << "[" << shards_behind_set << "]" ;
+
+ map<int, rgw_datalog_shard_data> master_pos;
+ ret = sync.read_source_log_shards_next(shards_behind, &master_pos);
+ if (ret < 0) {
+ derr << "ERROR: failed to fetch next positions (" << cpp_strerror(-ret) << ")" << dendl;
+ } else {
+ std::optional<std::pair<int, ceph::real_time>> oldest;
+
+ for (auto iter : master_pos) {
+ rgw_datalog_shard_data& shard_data = iter.second;
+
+ if (!shard_data.entries.empty()) {
+ rgw_datalog_entry& entry = shard_data.entries.front();
+ if (!oldest) {
+ oldest.emplace(iter.first, entry.timestamp);
+ } else if (!ceph::real_clock::is_zero(entry.timestamp) && entry.timestamp < oldest->second) {
+ oldest.emplace(iter.first, entry.timestamp);
+ }
+ }
+ }
+
+ if (oldest) {
+ push_ss(ss, status, tab) << "oldest incremental change not applied: "
+ << oldest->second << " [" << oldest->first << ']';
+ }
+ }
+ }
+
+ if (total_recovering > 0) {
+ push_ss(ss, status, tab) << total_recovering << " shards are recovering";
+ push_ss(ss, status, tab) << "recovering shards: " << "[" << recovering_shards << "]";
+ }
+
+ flush_ss(ss, status);
+}
+
+static void tab_dump(const string& header, int width, const list<string>& entries)
+{
+ string s = header;
+
+ for (auto e : entries) {
+ cout << std::setw(width) << s << std::setw(1) << " " << e << std::endl;
+ s.clear();
+ }
+}
+
+
+static void sync_status(Formatter *formatter)
+{
+ const RGWRealm& realm = store->svc.zone->get_realm();
+ const RGWZoneGroup& zonegroup = store->svc.zone->get_zonegroup();
+ const RGWZone& zone = store->svc.zone->get_zone();
+
+ int width = 15;
+
+ cout << std::setw(width) << "realm" << std::setw(1) << " " << realm.get_id() << " (" << realm.get_name() << ")" << std::endl;
+ cout << std::setw(width) << "zonegroup" << std::setw(1) << " " << zonegroup.get_id() << " (" << zonegroup.get_name() << ")" << std::endl;
+ cout << std::setw(width) << "zone" << std::setw(1) << " " << zone.id << " (" << zone.name << ")" << std::endl;
+
+ list<string> md_status;
+
+ if (store->svc.zone->is_meta_master()) {
+ md_status.push_back("no sync (zone is master)");
+ } else {
+ get_md_sync_status(md_status);
+ }
+
+ tab_dump("metadata sync", width, md_status);
+
+ list<string> data_status;
+
+ auto& zone_conn_map = store->svc.zone->get_zone_conn_map();
+
+ for (auto iter : zone_conn_map) {
+ const string& source_id = iter.first;
+ string source_str = "source: ";
+ string s = source_str + source_id;
+ RGWZone *sz;
+ if (store->svc.zone->find_zone_by_id(source_id, &sz)) {
+ s += string(" (") + sz->name + ")";
+ }
+ data_status.push_back(s);
+ get_data_sync_status(source_id, data_status, source_str.size());
+ }
+
+ tab_dump("data sync", width, data_status);
+}
+
+struct indented {
+ int w; // indent width
+ std::string_view header;
+ indented(int w, std::string_view header = "") : w(w), header(header) {}
+};
+std::ostream& operator<<(std::ostream& out, const indented& h) {
+ return out << std::setw(h.w) << h.header << std::setw(1) << ' ';
+}
+
+static int remote_bilog_markers(RGWRados *store, const RGWZone& source,
+ RGWRESTConn *conn, const RGWBucketInfo& info,
+ BucketIndexShardsManager *markers)
+{
+ const auto instance_key = info.bucket.get_key();
+ const rgw_http_param_pair params[] = {
+ { "type" , "bucket-index" },
+ { "bucket-instance", instance_key.c_str() },
+ { "info" , nullptr },
+ { nullptr, nullptr }
+ };
+ rgw_bucket_index_marker_info result;
+ int r = conn->get_json_resource("/admin/log/", params, result);
+ if (r < 0) {
+ lderr(store->ctx()) << "failed to fetch remote log markers: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+ r = markers->from_string(result.max_marker, -1);
+ if (r < 0) {
+ lderr(store->ctx()) << "failed to decode remote log markers" << dendl;
+ return r;
+ }
+ return 0;
+}
+
+static int bucket_source_sync_status(RGWRados *store, const RGWZone& zone,
+ const RGWZone& source, RGWRESTConn *conn,
+ const RGWBucketInfo& bucket_info,
+ int width, std::ostream& out)
+{
+ out << indented{width, "source zone"} << source.id << " (" << source.name << ")\n";
+
+ // syncing from this zone?
+ if (!zone.syncs_from(source.name)) {
+ out << indented{width} << "not in sync_from\n";
+ return 0;
+ }
+ std::vector<rgw_bucket_shard_sync_info> status;
+ int r = rgw_bucket_sync_status(dpp(), store, source.id, bucket_info, &status);
+ if (r < 0) {
+ lderr(store->ctx()) << "failed to read bucket sync status: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ int num_full = 0;
+ int num_inc = 0;
+ uint64_t full_complete = 0;
+ const size_t total_shards = status.size();
+
+ using BucketSyncState = rgw_bucket_shard_sync_info::SyncState;
+ for (size_t shard_id = 0; shard_id < total_shards; shard_id++) {
+ auto& m = status[shard_id];
+ if (m.state == BucketSyncState::StateFullSync) {
+ num_full++;
+ full_complete += m.full_marker.count;
+ } else if (m.state == BucketSyncState::StateIncrementalSync) {
+ num_inc++;
+ }
+ }
+
+ out << indented{width} << "full sync: " << num_full << "/" << total_shards << " shards\n";
+ if (num_full > 0) {
+ out << indented{width} << "full sync: " << full_complete << " objects completed\n";
+ }
+ out << indented{width} << "incremental sync: " << num_inc << "/" << total_shards << " shards\n";
+
+ BucketIndexShardsManager remote_markers;
+ r = remote_bilog_markers(store, source, conn, bucket_info, &remote_markers);
+ if (r < 0) {
+ lderr(store->ctx()) << "failed to read remote log: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ std::set<int> shards_behind;
+ for (auto& r : remote_markers.get()) {
+ auto shard_id = r.first;
+ auto& m = status[shard_id];
+ if (r.second.empty()) {
+ continue; // empty bucket index shard
+ }
+ auto pos = BucketIndexShardsManager::get_shard_marker(m.inc_marker.position);
+ if (m.state != BucketSyncState::StateIncrementalSync || pos != r.second) {
+ shards_behind.insert(shard_id);
+ }
+ }
+ if (!shards_behind.empty()) {
+ out << indented{width} << "bucket is behind on " << shards_behind.size() << " shards\n";
+ out << indented{width} << "behind shards: [" << shards_behind << "]\n" ;
+ } else if (!num_full) {
+ out << indented{width} << "bucket is caught up with source\n";
+ }
+ return 0;
+}
+
+static int bucket_sync_status(RGWRados *store, const RGWBucketInfo& info,
+ const std::string& source_zone_id,
+ std::ostream& out)
+{
+ const RGWRealm& realm = store->svc.zone->get_realm();
+ const RGWZoneGroup& zonegroup = store->svc.zone->get_zonegroup();
+ const RGWZone& zone = store->svc.zone->get_zone();
+ constexpr int width = 15;
+
+ out << indented{width, "realm"} << realm.get_id() << " (" << realm.get_name() << ")\n";
+ out << indented{width, "zonegroup"} << zonegroup.get_id() << " (" << zonegroup.get_name() << ")\n";
+ out << indented{width, "zone"} << zone.id << " (" << zone.name << ")\n";
+ out << indented{width, "bucket"} << info.bucket << "\n\n";
+
+ if (!info.datasync_flag_enabled()) {
+ out << "Sync is disabled for bucket " << info.bucket.name << '\n';
+ return 0;
+ }
+
+ auto& zone_conn_map = store->svc.zone->get_zone_conn_map();
+ if (!source_zone_id.empty()) {
+ auto z = zonegroup.zones.find(source_zone_id);
+ if (z == zonegroup.zones.end()) {
+ lderr(store->ctx()) << "Source zone not found in zonegroup "
+ << zonegroup.get_name() << dendl;
+ return -EINVAL;
+ }
+ auto c = zone_conn_map.find(source_zone_id);
+ if (c == zone_conn_map.end()) {
+ lderr(store->ctx()) << "No connection to zone " << z->second.name << dendl;
+ return -EINVAL;
+ }
+ return bucket_source_sync_status(store, zone, z->second, c->second,
+ info, width, out);
+ }
+
+ for (const auto& z : zonegroup.zones) {
+ auto c = zone_conn_map.find(z.second.id);
+ if (c != zone_conn_map.end()) {
+ bucket_source_sync_status(store, zone, z.second, c->second,
+ info, width, out);
+ }
+ }
+ return 0;
+}
+
+static void parse_tier_config_param(const string& s, map<string, string, ltstr_nocase>& out)
+{
+ int level = 0;
+ string cur_conf;
+ list<string> confs;
+ for (auto c : s) {
+ if (c == ',') {
+ if (level == 0) {
+ confs.push_back(cur_conf);
+ cur_conf.clear();
+ continue;
+ }
+ }
+ if (c == '{') {
+ ++level;
+ } else if (c == '}') {
+ --level;
+ }
+ cur_conf += c;
+ }
+ if (!cur_conf.empty()) {
+ confs.push_back(cur_conf);
+ }
+
+ for (auto c : confs) {
+ ssize_t pos = c.find("=");
+ if (pos < 0) {
+ out[c] = "";
+ } else {
+ out[c.substr(0, pos)] = c.substr(pos + 1);
+ }
+ }
+}
+
+static int check_pool_support_omap(const rgw_pool& pool)
+{
+ librados::IoCtx io_ctx;
+ int ret = store->get_rados_handle()->ioctx_create(pool.to_str().c_str(), io_ctx);
+ if (ret < 0) {
+ // the pool may not exist at this moment, we have no way to check if it supports omap.
+ return 0;
+ }
+
+ ret = io_ctx.omap_clear("__omap_test_not_exist_oid__");
+ if (ret == -EOPNOTSUPP) {
+ io_ctx.close();
+ return ret;
+ }
+ io_ctx.close();
+ return 0;
+}
+
+int check_reshard_bucket_params(RGWRados *store,
+ const string& bucket_name,
+ const string& tenant,
+ const string& bucket_id,
+ bool num_shards_specified,
+ int num_shards,
+ int yes_i_really_mean_it,
+ rgw_bucket& bucket,
+ RGWBucketInfo& bucket_info,
+ map<string, bufferlist>& attrs)
+{
+ if (bucket_name.empty()) {
+ cerr << "ERROR: bucket not specified" << std::endl;
+ return -EINVAL;
+ }
+
+ if (!num_shards_specified) {
+ cerr << "ERROR: --num-shards not specified" << std::endl;
+ return -EINVAL;
+ }
+
+ if (num_shards > (int)store->get_max_bucket_shards()) {
+ cerr << "ERROR: num_shards too high, max value: " << store->get_max_bucket_shards() << std::endl;
+ return -EINVAL;
+ }
+
+ int ret = init_bucket(tenant, bucket_name, bucket_id, bucket_info, bucket, &attrs);
+ if (ret < 0) {
+ cerr << "ERROR: could not init bucket: " << cpp_strerror(-ret) << std::endl;
+ return ret;
+ }
+
+ if (bucket_info.reshard_status != CLS_RGW_RESHARD_NOT_RESHARDING) {
+ // if in_progress or done then we have an old BucketInfo
+ cerr << "ERROR: the bucket is currently undergoing resharding and "
+ "cannot be added to the reshard list at this time" << std::endl;
+ return -EBUSY;
+ }
+
+ int num_source_shards = (bucket_info.num_shards > 0 ? bucket_info.num_shards : 1);
+
+ if (num_shards <= num_source_shards && !yes_i_really_mean_it) {
+ cerr << "num shards is less or equal to current shards count" << std::endl
+ << "do you really mean it? (requires --yes-i-really-mean-it)" << std::endl;
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int create_new_bucket_instance(RGWRados *store,
+ int new_num_shards,
+ const RGWBucketInfo& bucket_info,
+ map<string, bufferlist>& attrs,
+ RGWBucketInfo& new_bucket_info)
+{
+
+ store->create_bucket_id(&new_bucket_info.bucket.bucket_id);
+ new_bucket_info.bucket.oid.clear();
+
+ new_bucket_info.num_shards = new_num_shards;
+ new_bucket_info.objv_tracker.clear();
+
+ int ret = store->init_bucket_index(new_bucket_info, new_bucket_info.num_shards);
+ if (ret < 0) {
+ cerr << "ERROR: failed to init new bucket indexes: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ ret = store->put_bucket_instance_info(new_bucket_info, true, real_time(), &attrs);
+ if (ret < 0) {
+ cerr << "ERROR: failed to store new bucket instance info: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ return 0;
+}
+
+static int scan_totp(CephContext *cct, ceph::real_time& now, rados::cls::otp::otp_info_t& totp, vector<string>& pins,
+ time_t *pofs)
+{
+#define MAX_TOTP_SKEW_HOURS (24 * 7)
+ ceph_assert(pins.size() == 2);
+
+ time_t start_time = ceph::real_clock::to_time_t(now);
+ time_t time_ofs = 0, time_ofs_abs = 0;
+ time_t step_size = totp.step_size;
+ if (step_size == 0) {
+ step_size = OATH_TOTP_DEFAULT_TIME_STEP_SIZE;
+ }
+ uint32_t count = 0;
+ int sign = 1;
+
+ uint32_t max_skew = MAX_TOTP_SKEW_HOURS * 3600;
+
+ while (time_ofs_abs < max_skew) {
+ int rc = oath_totp_validate2(totp.seed_bin.c_str(), totp.seed_bin.length(),
+ start_time,
+ step_size,
+ time_ofs,
+ 1,
+ nullptr,
+ pins[0].c_str());
+ if (rc != OATH_INVALID_OTP) {
+ rc = oath_totp_validate2(totp.seed_bin.c_str(), totp.seed_bin.length(),
+ start_time,
+ step_size,
+ time_ofs - step_size, /* smaller time_ofs moves time forward */
+ 1,
+ nullptr,
+ pins[1].c_str());
+ if (rc != OATH_INVALID_OTP) {
+ *pofs = time_ofs - step_size + step_size * totp.window / 2;
+ ldout(cct, 20) << "found at time=" << start_time - time_ofs << " time_ofs=" << time_ofs << dendl;
+ return 0;
+ }
+ }
+ sign = -sign;
+ time_ofs_abs = (++count) * step_size;
+ time_ofs = sign * time_ofs_abs;
+ }
+
+ return -ENOENT;
+}
+
+static int trim_sync_error_log(int shard_id, const ceph::real_time& start_time,
+ const ceph::real_time& end_time,
+ const string& start_marker, const string& end_marker,
+ int delay_ms)
+{
+ auto oid = RGWSyncErrorLogger::get_shard_oid(RGW_SYNC_ERROR_LOG_SHARD_PREFIX,
+ shard_id);
+ // call cls_log_trim() until it returns -ENODATA
+ for (;;) {
+ int ret = store->time_log_trim(oid, start_time, end_time,
+ start_marker, end_marker);
+ if (ret == -ENODATA) {
+ return 0;
+ }
+ if (ret < 0) {
+ return ret;
+ }
+ if (delay_ms) {
+ std::this_thread::sleep_for(std::chrono::milliseconds(delay_ms));
+ }
+ }
+ // unreachable
+}
+
+const string& get_tier_type(RGWRados *store) {
+ return store->svc.zone->get_zone().tier_type;
+}
+
+int main(int argc, const char **argv)
+{
+ vector<const char*> args;
+ argv_to_vec(argc, (const char **)argv, args);
+ if (args.empty()) {
+ cerr << argv[0] << ": -h or --help for usage" << std::endl;
+ exit(1);
+ }
+ if (ceph_argparse_need_usage(args)) {
+ usage();
+ exit(0);
+ }
+
+ auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT,
+ CODE_ENVIRONMENT_UTILITY, 0);
+
+ // for region -> zonegroup conversion (must happen before common_init_finish())
+ if (!g_conf()->rgw_region.empty() && g_conf()->rgw_zonegroup.empty()) {
+ g_conf().set_val_or_die("rgw_zonegroup", g_conf()->rgw_region.c_str());
+ }
+
+ common_init_finish(g_ceph_context);
+
+ rgw_user user_id;
+ string tenant;
+ std::string access_key, secret_key, user_email, display_name;
+ std::string bucket_name, pool_name, object;
+ rgw_pool pool;
+ std::string date, subuser, access, format;
+ std::string start_date, end_date;
+ std::string key_type_str;
+ std::string period_id, period_epoch, remote, url;
+ std::string master_zone;
+ std::string realm_name, realm_id, realm_new_name;
+ std::string zone_name, zone_id, zone_new_name;
+ std::string zonegroup_name, zonegroup_id, zonegroup_new_name;
+ std::string api_name;
+ std::string role_name, path, assume_role_doc, policy_name, perm_policy_doc, path_prefix;
+ std::string redirect_zone;
+ bool redirect_zone_set = false;
+ list<string> endpoints;
+ int tmp_int;
+ int sync_from_all_specified = false;
+ bool sync_from_all = false;
+ list<string> sync_from;
+ list<string> sync_from_rm;
+ int is_master_int;
+ int set_default = 0;
+ bool is_master = false;
+ bool is_master_set = false;
+ int read_only_int;
+ bool read_only = false;
+ int is_read_only_set = false;
+ int commit = false;
+ int staging = false;
+ int key_type = KEY_TYPE_UNDEFINED;
+ rgw_bucket bucket;
+ uint32_t perm_mask = 0;
+ RGWUserInfo info;
+ int opt_cmd = OPT_NO_CMD;
+ bool need_more;
+ int gen_access_key = 0;
+ int gen_secret_key = 0;
+ bool set_perm = false;
+ bool set_temp_url_key = false;
+ map<int, string> temp_url_keys;
+ string bucket_id;
+ Formatter *formatter = NULL;
+ int purge_data = false;
+ int pretty_format = false;
+ int show_log_entries = true;
+ int show_log_sum = true;
+ int skip_zero_entries = false; // log show
+ int purge_keys = false;
+ int yes_i_really_mean_it = false;
+ int delete_child_objects = false;
+ int fix = false;
+ int remove_bad = false;
+ int check_head_obj_locator = false;
+ int max_buckets = -1;
+ bool max_buckets_specified = false;
+ map<string, bool> categories;
+ string caps;
+ int check_objects = false;
+ RGWUserAdminOpState user_op;
+ RGWBucketAdminOpState bucket_op;
+ string infile;
+ string metadata_key;
+ RGWObjVersionTracker objv_tracker;
+ string marker;
+ string start_marker;
+ string end_marker;
+ int max_entries = -1;
+ bool max_entries_specified = false;
+ int admin = false;
+ bool admin_specified = false;
+ int system = false;
+ bool system_specified = false;
+ int shard_id = -1;
+ bool specified_shard_id = false;
+ string client_id;
+ string op_id;
+ string op_mask_str;
+ string quota_scope;
+ string object_version;
+ string placement_id;
+ string storage_class;
+ list<string> tags;
+ list<string> tags_add;
+ list<string> tags_rm;
+
+ int64_t max_objects = -1;
+ int64_t max_size = -1;
+ bool have_max_objects = false;
+ bool have_max_size = false;
+ int include_all = false;
+ int allow_unordered = false;
+
+ int sync_stats = false;
+ int reset_stats = false;
+ int bypass_gc = false;
+ int warnings_only = false;
+ int inconsistent_index = false;
+
+ int verbose = false;
+
+ int extra_info = false;
+
+ uint64_t min_rewrite_size = 4 * 1024 * 1024;
+ uint64_t max_rewrite_size = ULLONG_MAX;
+ uint64_t min_rewrite_stripe_size = 0;
+
+ BIIndexType bi_index_type = BIIndexType::Plain;
+
+ string job_id;
+ int num_shards = 0;
+ bool num_shards_specified = false;
+ int max_concurrent_ios = 32;
+ uint64_t orphan_stale_secs = (24 * 3600);
+ int detail = false;
+
+ std::string val;
+ std::ostringstream errs;
+ string err;
+
+ string source_zone_name;
+ string source_zone; /* zone id */
+
+ string tier_type;
+ bool tier_type_specified = false;
+
+ map<string, string, ltstr_nocase> tier_config_add;
+ map<string, string, ltstr_nocase> tier_config_rm;
+
+ boost::optional<string> index_pool;
+ boost::optional<string> data_pool;
+ boost::optional<string> data_extra_pool;
+ RGWBucketIndexType placement_index_type = RGWBIType_Normal;
+ bool index_type_specified = false;
+
+ boost::optional<std::string> compression_type;
+
+ string totp_serial;
+ string totp_seed;
+ string totp_seed_type = "hex";
+ vector<string> totp_pin;
+ int totp_seconds = 0;
+ int totp_window = 0;
+ int trim_delay_ms = 0;
+
+ string topic_name;
+ string sub_name;
+ string sub_oid_prefix;
+ string sub_dest_bucket;
+ string sub_push_endpoint;
+ string event_id;
+ rgw::notify::EventTypeList event_types;
+
+ for (std::vector<const char*>::iterator i = args.begin(); i != args.end(); ) {
+ if (ceph_argparse_double_dash(args, i)) {
+ break;
+ } else if (ceph_argparse_witharg(args, i, &val, "-i", "--uid", (char*)NULL)) {
+ user_id.from_str(val);
+ if (user_id.empty()) {
+ cerr << "no value for uid" << std::endl;
+ exit(1);
+ }
+ } else if (ceph_argparse_witharg(args, i, &val, "--tenant", (char*)NULL)) {
+ tenant = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--access-key", (char*)NULL)) {
+ access_key = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--subuser", (char*)NULL)) {
+ subuser = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--secret", "--secret-key", (char*)NULL)) {
+ secret_key = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "-e", "--email", (char*)NULL)) {
+ user_email = val;
+ user_op.user_email_specified=true;
+ } else if (ceph_argparse_witharg(args, i, &val, "-n", "--display-name", (char*)NULL)) {
+ display_name = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "-b", "--bucket", (char*)NULL)) {
+ bucket_name = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "-p", "--pool", (char*)NULL)) {
+ pool_name = val;
+ pool = rgw_pool(pool_name);
+ } else if (ceph_argparse_witharg(args, i, &val, "-o", "--object", (char*)NULL)) {
+ object = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--object-version", (char*)NULL)) {
+ object_version = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--client-id", (char*)NULL)) {
+ client_id = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--op-id", (char*)NULL)) {
+ op_id = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--op-mask", (char*)NULL)) {
+ op_mask_str = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--key-type", (char*)NULL)) {
+ key_type_str = val;
+ if (key_type_str.compare("swift") == 0) {
+ key_type = KEY_TYPE_SWIFT;
+ } else if (key_type_str.compare("s3") == 0) {
+ key_type = KEY_TYPE_S3;
+ } else {
+ cerr << "bad key type: " << key_type_str << std::endl;
+ exit(1);
+ }
+ } else if (ceph_argparse_witharg(args, i, &val, "--job-id", (char*)NULL)) {
+ job_id = val;
+ } else if (ceph_argparse_binary_flag(args, i, &gen_access_key, NULL, "--gen-access-key", (char*)NULL)) {
+ // do nothing
+ } else if (ceph_argparse_binary_flag(args, i, &gen_secret_key, NULL, "--gen-secret", (char*)NULL)) {
+ // do nothing
+ } else if (ceph_argparse_binary_flag(args, i, &show_log_entries, NULL, "--show-log-entries", (char*)NULL)) {
+ // do nothing
+ } else if (ceph_argparse_binary_flag(args, i, &show_log_sum, NULL, "--show-log-sum", (char*)NULL)) {
+ // do nothing
+ } else if (ceph_argparse_binary_flag(args, i, &skip_zero_entries, NULL, "--skip-zero-entries", (char*)NULL)) {
+ // do nothing
+ } else if (ceph_argparse_binary_flag(args, i, &admin, NULL, "--admin", (char*)NULL)) {
+ admin_specified = true;
+ } else if (ceph_argparse_binary_flag(args, i, &system, NULL, "--system", (char*)NULL)) {
+ system_specified = true;
+ } else if (ceph_argparse_binary_flag(args, i, &verbose, NULL, "--verbose", (char*)NULL)) {
+ // do nothing
+ } else if (ceph_argparse_binary_flag(args, i, &staging, NULL, "--staging", (char*)NULL)) {
+ // do nothing
+ } else if (ceph_argparse_binary_flag(args, i, &commit, NULL, "--commit", (char*)NULL)) {
+ // do nothing
+ } else if (ceph_argparse_witharg(args, i, &val, "--min-rewrite-size", (char*)NULL)) {
+ min_rewrite_size = (uint64_t)atoll(val.c_str());
+ } else if (ceph_argparse_witharg(args, i, &val, "--max-rewrite-size", (char*)NULL)) {
+ max_rewrite_size = (uint64_t)atoll(val.c_str());
+ } else if (ceph_argparse_witharg(args, i, &val, "--min-rewrite-stripe-size", (char*)NULL)) {
+ min_rewrite_stripe_size = (uint64_t)atoll(val.c_str());
+ } else if (ceph_argparse_witharg(args, i, &val, "--max-buckets", (char*)NULL)) {
+ max_buckets = (int)strict_strtol(val.c_str(), 10, &err);
+ if (!err.empty()) {
+ cerr << "ERROR: failed to parse max buckets: " << err << std::endl;
+ return EINVAL;
+ }
+ max_buckets_specified = true;
+ } else if (ceph_argparse_witharg(args, i, &val, "--max-entries", (char*)NULL)) {
+ max_entries = (int)strict_strtol(val.c_str(), 10, &err);
+ max_entries_specified = true;
+ if (!err.empty()) {
+ cerr << "ERROR: failed to parse max entries: " << err << std::endl;
+ return EINVAL;
+ }
+ } else if (ceph_argparse_witharg(args, i, &val, "--max-size", (char*)NULL)) {
+ max_size = strict_iec_cast<long long>(val.c_str(), &err);
+ if (!err.empty()) {
+ cerr << "ERROR: failed to parse max size: " << err << std::endl;
+ return EINVAL;
+ }
+ have_max_size = true;
+ } else if (ceph_argparse_witharg(args, i, &val, "--max-objects", (char*)NULL)) {
+ max_objects = (int64_t)strict_strtoll(val.c_str(), 10, &err);
+ if (!err.empty()) {
+ cerr << "ERROR: failed to parse max objects: " << err << std::endl;
+ return EINVAL;
+ }
+ have_max_objects = true;
+ } else if (ceph_argparse_witharg(args, i, &val, "--date", "--time", (char*)NULL)) {
+ date = val;
+ if (end_date.empty())
+ end_date = date;
+ } else if (ceph_argparse_witharg(args, i, &val, "--start-date", "--start-time", (char*)NULL)) {
+ start_date = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--end-date", "--end-time", (char*)NULL)) {
+ end_date = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--num-shards", (char*)NULL)) {
+ num_shards = (int)strict_strtol(val.c_str(), 10, &err);
+ if (!err.empty()) {
+ cerr << "ERROR: failed to parse num shards: " << err << std::endl;
+ return EINVAL;
+ }
+ num_shards_specified = true;
+ } else if (ceph_argparse_witharg(args, i, &val, "--max-concurrent-ios", (char*)NULL)) {
+ max_concurrent_ios = (int)strict_strtol(val.c_str(), 10, &err);
+ if (!err.empty()) {
+ cerr << "ERROR: failed to parse max concurrent ios: " << err << std::endl;
+ return EINVAL;
+ }
+ } else if (ceph_argparse_witharg(args, i, &val, "--orphan-stale-secs", (char*)NULL)) {
+ orphan_stale_secs = (uint64_t)strict_strtoll(val.c_str(), 10, &err);
+ if (!err.empty()) {
+ cerr << "ERROR: failed to parse orphan stale secs: " << err << std::endl;
+ return EINVAL;
+ }
+ } else if (ceph_argparse_witharg(args, i, &val, "--shard-id", (char*)NULL)) {
+ shard_id = (int)strict_strtol(val.c_str(), 10, &err);
+ if (!err.empty()) {
+ cerr << "ERROR: failed to parse shard id: " << err << std::endl;
+ return EINVAL;
+ }
+ specified_shard_id = true;
+ } else if (ceph_argparse_witharg(args, i, &val, "--access", (char*)NULL)) {
+ access = val;
+ perm_mask = rgw_str_to_perm(access.c_str());
+ set_perm = true;
+ } else if (ceph_argparse_witharg(args, i, &val, "--temp-url-key", (char*)NULL)) {
+ temp_url_keys[0] = val;
+ set_temp_url_key = true;
+ } else if (ceph_argparse_witharg(args, i, &val, "--temp-url-key2", "--temp-url-key-2", (char*)NULL)) {
+ temp_url_keys[1] = val;
+ set_temp_url_key = true;
+ } else if (ceph_argparse_witharg(args, i, &val, "--bucket-id", (char*)NULL)) {
+ bucket_id = val;
+ if (bucket_id.empty()) {
+ cerr << "no value for bucket-id" << std::endl;
+ exit(1);
+ }
+ } else if (ceph_argparse_witharg(args, i, &val, "--format", (char*)NULL)) {
+ format = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--categories", (char*)NULL)) {
+ string cat_str = val;
+ list<string> cat_list;
+ list<string>::iterator iter;
+ get_str_list(cat_str, cat_list);
+ for (iter = cat_list.begin(); iter != cat_list.end(); ++iter) {
+ categories[*iter] = true;
+ }
+ } else if (ceph_argparse_binary_flag(args, i, &delete_child_objects, NULL, "--purge-objects", (char*)NULL)) {
+ // do nothing
+ } else if (ceph_argparse_binary_flag(args, i, &pretty_format, NULL, "--pretty-format", (char*)NULL)) {
+ // do nothing
+ } else if (ceph_argparse_binary_flag(args, i, &purge_data, NULL, "--purge-data", (char*)NULL)) {
+ delete_child_objects = purge_data;
+ } else if (ceph_argparse_binary_flag(args, i, &purge_keys, NULL, "--purge-keys", (char*)NULL)) {
+ // do nothing
+ } else if (ceph_argparse_binary_flag(args, i, &yes_i_really_mean_it, NULL, "--yes-i-really-mean-it", (char*)NULL)) {
+ // do nothing
+ } else if (ceph_argparse_binary_flag(args, i, &fix, NULL, "--fix", (char*)NULL)) {
+ // do nothing
+ } else if (ceph_argparse_binary_flag(args, i, &remove_bad, NULL, "--remove-bad", (char*)NULL)) {
+ // do nothing
+ } else if (ceph_argparse_binary_flag(args, i, &check_head_obj_locator, NULL, "--check-head-obj-locator", (char*)NULL)) {
+ // do nothing
+ } else if (ceph_argparse_binary_flag(args, i, &check_objects, NULL, "--check-objects", (char*)NULL)) {
+ // do nothing
+ } else if (ceph_argparse_binary_flag(args, i, &sync_stats, NULL, "--sync-stats", (char*)NULL)) {
+ // do nothing
+ } else if (ceph_argparse_binary_flag(args, i, &reset_stats, NULL, "--reset-stats", (char*)NULL)) {
+ // do nothing
+ } else if (ceph_argparse_binary_flag(args, i, &include_all, NULL, "--include-all", (char*)NULL)) {
+ // do nothing
+ } else if (ceph_argparse_binary_flag(args, i, &allow_unordered, NULL, "--allow-unordered", (char*)NULL)) {
+ // do nothing
+ } else if (ceph_argparse_binary_flag(args, i, &extra_info, NULL, "--extra-info", (char*)NULL)) {
+ // do nothing
+ } else if (ceph_argparse_binary_flag(args, i, &bypass_gc, NULL, "--bypass-gc", (char*)NULL)) {
+ // do nothing
+ } else if (ceph_argparse_binary_flag(args, i, &warnings_only, NULL, "--warnings-only", (char*)NULL)) {
+ // do nothing
+ } else if (ceph_argparse_binary_flag(args, i, &inconsistent_index, NULL, "--inconsistent-index", (char*)NULL)) {
+ // do nothing
+ } else if (ceph_argparse_witharg(args, i, &val, "--caps", (char*)NULL)) {
+ caps = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "-i", "--infile", (char*)NULL)) {
+ infile = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--metadata-key", (char*)NULL)) {
+ metadata_key = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--marker", (char*)NULL)) {
+ marker = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--start-marker", (char*)NULL)) {
+ start_marker = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--end-marker", (char*)NULL)) {
+ end_marker = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--quota-scope", (char*)NULL)) {
+ quota_scope = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--index-type", (char*)NULL)) {
+ string index_type_str = val;
+ bi_index_type = get_bi_index_type(index_type_str);
+ if (bi_index_type == BIIndexType::Invalid) {
+ cerr << "ERROR: invalid bucket index entry type" << std::endl;
+ return EINVAL;
+ }
+ } else if (ceph_argparse_binary_flag(args, i, &is_master_int, NULL, "--master", (char*)NULL)) {
+ is_master = (bool)is_master_int;
+ is_master_set = true;
+ } else if (ceph_argparse_binary_flag(args, i, &set_default, NULL, "--default", (char*)NULL)) {
+ /* do nothing */
+ } else if (ceph_argparse_witharg(args, i, &val, "--redirect-zone", (char*)NULL)) {
+ redirect_zone = val;
+ redirect_zone_set = true;
+ } else if (ceph_argparse_binary_flag(args, i, &read_only_int, NULL, "--read-only", (char*)NULL)) {
+ read_only = (bool)read_only_int;
+ is_read_only_set = true;
+ } else if (ceph_argparse_witharg(args, i, &val, "--master-zone", (char*)NULL)) {
+ master_zone = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--period", (char*)NULL)) {
+ period_id = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--epoch", (char*)NULL)) {
+ period_epoch = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--remote", (char*)NULL)) {
+ remote = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--url", (char*)NULL)) {
+ url = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--realm-id", (char*)NULL)) {
+ realm_id = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--realm-new-name", (char*)NULL)) {
+ realm_new_name = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--zonegroup-id", (char*)NULL)) {
+ zonegroup_id = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--zonegroup-new-name", (char*)NULL)) {
+ zonegroup_new_name = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--placement-id", (char*)NULL)) {
+ placement_id = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--storage-class", (char*)NULL)) {
+ storage_class = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--tags", (char*)NULL)) {
+ get_str_list(val, tags);
+ } else if (ceph_argparse_witharg(args, i, &val, "--tags-add", (char*)NULL)) {
+ get_str_list(val, tags_add);
+ } else if (ceph_argparse_witharg(args, i, &val, "--tags-rm", (char*)NULL)) {
+ get_str_list(val, tags_rm);
+ } else if (ceph_argparse_witharg(args, i, &val, "--api-name", (char*)NULL)) {
+ api_name = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--zone-id", (char*)NULL)) {
+ zone_id = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--zone-new-name", (char*)NULL)) {
+ zone_new_name = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--endpoints", (char*)NULL)) {
+ get_str_list(val, endpoints);
+ } else if (ceph_argparse_witharg(args, i, &val, "--sync-from", (char*)NULL)) {
+ get_str_list(val, sync_from);
+ } else if (ceph_argparse_witharg(args, i, &val, "--sync-from-rm", (char*)NULL)) {
+ get_str_list(val, sync_from_rm);
+ } else if (ceph_argparse_binary_flag(args, i, &tmp_int, NULL, "--sync-from-all", (char*)NULL)) {
+ sync_from_all = (bool)tmp_int;
+ sync_from_all_specified = true;
+ } else if (ceph_argparse_witharg(args, i, &val, "--source-zone", (char*)NULL)) {
+ source_zone_name = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--tier-type", (char*)NULL)) {
+ tier_type = val;
+ tier_type_specified = true;
+ } else if (ceph_argparse_witharg(args, i, &val, "--tier-config", (char*)NULL)) {
+ parse_tier_config_param(val, tier_config_add);
+ } else if (ceph_argparse_witharg(args, i, &val, "--tier-config-rm", (char*)NULL)) {
+ parse_tier_config_param(val, tier_config_rm);
+ } else if (ceph_argparse_witharg(args, i, &val, "--index-pool", (char*)NULL)) {
+ index_pool = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--data-pool", (char*)NULL)) {
+ data_pool = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--data-extra-pool", (char*)NULL)) {
+ data_extra_pool = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--placement-index-type", (char*)NULL)) {
+ if (val == "normal") {
+ placement_index_type = RGWBIType_Normal;
+ } else if (val == "indexless") {
+ placement_index_type = RGWBIType_Indexless;
+ } else {
+ placement_index_type = (RGWBucketIndexType)strict_strtol(val.c_str(), 10, &err);
+ if (!err.empty()) {
+ cerr << "ERROR: failed to parse index type index: " << err << std::endl;
+ return EINVAL;
+ }
+ }
+ index_type_specified = true;
+ } else if (ceph_argparse_witharg(args, i, &val, "--compression", (char*)NULL)) {
+ compression_type = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--role-name", (char*)NULL)) {
+ role_name = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--path", (char*)NULL)) {
+ path = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--assume-role-policy-doc", (char*)NULL)) {
+ assume_role_doc = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--policy-name", (char*)NULL)) {
+ policy_name = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--policy-doc", (char*)NULL)) {
+ perm_policy_doc = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--path-prefix", (char*)NULL)) {
+ path_prefix = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--totp-serial", (char*)NULL)) {
+ totp_serial = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--totp-pin", (char*)NULL)) {
+ totp_pin.push_back(val);
+ } else if (ceph_argparse_witharg(args, i, &val, "--totp-seed", (char*)NULL)) {
+ totp_seed = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--totp-seed-type", (char*)NULL)) {
+ totp_seed_type = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--totp-seconds", (char*)NULL)) {
+ totp_seconds = atoi(val.c_str());
+ } else if (ceph_argparse_witharg(args, i, &val, "--totp-window", (char*)NULL)) {
+ totp_window = atoi(val.c_str());
+ } else if (ceph_argparse_witharg(args, i, &val, "--trim-delay-ms", (char*)NULL)) {
+ trim_delay_ms = atoi(val.c_str());
+ } else if (ceph_argparse_witharg(args, i, &val, "--topic", (char*)NULL)) {
+ topic_name = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--sub-name", (char*)NULL)) {
+ sub_name = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--sub-oid-prefix", (char*)NULL)) {
+ sub_oid_prefix = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--sub-dest-bucket", (char*)NULL)) {
+ sub_dest_bucket = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--sub-push-endpoint", (char*)NULL)) {
+ sub_push_endpoint = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--event-id", (char*)NULL)) {
+ event_id = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--event-type", "--event-types", (char*)NULL)) {
+ rgw::notify::from_string_list(val, event_types);
+ } else if (ceph_argparse_binary_flag(args, i, &detail, NULL, "--detail", (char*)NULL)) {
+ // do nothing
+ } else if (strncmp(*i, "-", 1) == 0) {
+ cerr << "ERROR: invalid flag " << *i << std::endl;
+ return EINVAL;
+ } else {
+ ++i;
+ }
+ }
+
+ if (args.empty()) {
+ usage();
+ exit(1);
+ }
+ else {
+ const char *prev_cmd = NULL;
+ const char *prev_prev_cmd = NULL;
+ std::vector<const char*>::iterator i ;
+ for (i = args.begin(); i != args.end(); ++i) {
+ opt_cmd = get_cmd(*i, prev_cmd, prev_prev_cmd, &need_more);
+ if (opt_cmd < 0) {
+ cerr << "unrecognized arg " << *i << std::endl;
+ exit(1);
+ }
+ if (!need_more) {
+ ++i;
+ break;
+ }
+ prev_prev_cmd = prev_cmd;
+ prev_cmd = *i;
+ }
+
+ if (opt_cmd == OPT_NO_CMD) {
+ cerr << "no command" << std::endl;
+ exit(1);
+ }
+
+ /* some commands may have an optional extra param */
+ if (i != args.end()) {
+ switch (opt_cmd) {
+ case OPT_METADATA_GET:
+ case OPT_METADATA_PUT:
+ case OPT_METADATA_RM:
+ case OPT_METADATA_LIST:
+ metadata_key = *i;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (tenant.empty()) {
+ tenant = user_id.tenant;
+ } else {
+ if (user_id.empty() && opt_cmd != OPT_ROLE_CREATE
+ && opt_cmd != OPT_ROLE_DELETE
+ && opt_cmd != OPT_ROLE_GET
+ && opt_cmd != OPT_ROLE_MODIFY
+ && opt_cmd != OPT_ROLE_LIST
+ && opt_cmd != OPT_ROLE_POLICY_PUT
+ && opt_cmd != OPT_ROLE_POLICY_LIST
+ && opt_cmd != OPT_ROLE_POLICY_GET
+ && opt_cmd != OPT_ROLE_POLICY_DELETE
+ && opt_cmd != OPT_RESHARD_ADD
+ && opt_cmd != OPT_RESHARD_CANCEL
+ && opt_cmd != OPT_RESHARD_STATUS) {
+ cerr << "ERROR: --tenant is set, but there's no user ID" << std::endl;
+ return EINVAL;
+ }
+ user_id.tenant = tenant;
+ }
+ /* check key parameter conflict */
+ if ((!access_key.empty()) && gen_access_key) {
+ cerr << "ERROR: key parameter conflict, --access-key & --gen-access-key" << std::endl;
+ return EINVAL;
+ }
+ if ((!secret_key.empty()) && gen_secret_key) {
+ cerr << "ERROR: key parameter conflict, --secret & --gen-secret" << std::endl;
+ return EINVAL;
+ }
+ }
+
+ // default to pretty json
+ if (format.empty()) {
+ format = "json";
+ pretty_format = true;
+ }
+
+ if (format == "xml")
+ formatter = new XMLFormatter(pretty_format);
+ else if (format == "json")
+ formatter = new JSONFormatter(pretty_format);
+ else {
+ cerr << "unrecognized format: " << format << std::endl;
+ exit(1);
+ }
+
+ realm_name = g_conf()->rgw_realm;
+ zone_name = g_conf()->rgw_zone;
+ zonegroup_name = g_conf()->rgw_zonegroup;
+
+ RGWStreamFlusher f(formatter, cout);
+
+ // not a raw op if 'period update' needs to commit to master
+ bool raw_period_update = opt_cmd == OPT_PERIOD_UPDATE && !commit;
+ // not a raw op if 'period pull' needs to read zone/period configuration
+ bool raw_period_pull = opt_cmd == OPT_PERIOD_PULL && !url.empty();
+
+ std::set<int> raw_storage_ops_list = {OPT_ZONEGROUP_ADD, OPT_ZONEGROUP_CREATE, OPT_ZONEGROUP_DELETE,
+ OPT_ZONEGROUP_GET, OPT_ZONEGROUP_LIST,
+ OPT_ZONEGROUP_SET, OPT_ZONEGROUP_DEFAULT,
+ OPT_ZONEGROUP_RENAME, OPT_ZONEGROUP_MODIFY,
+ OPT_ZONEGROUP_REMOVE,
+ OPT_ZONEGROUP_PLACEMENT_ADD, OPT_ZONEGROUP_PLACEMENT_RM,
+ OPT_ZONEGROUP_PLACEMENT_MODIFY, OPT_ZONEGROUP_PLACEMENT_LIST,
+ OPT_ZONEGROUP_PLACEMENT_GET,
+ OPT_ZONEGROUP_PLACEMENT_DEFAULT,
+ OPT_ZONE_CREATE, OPT_ZONE_DELETE,
+ OPT_ZONE_GET, OPT_ZONE_SET, OPT_ZONE_RENAME,
+ OPT_ZONE_LIST, OPT_ZONE_MODIFY, OPT_ZONE_DEFAULT,
+ OPT_ZONE_PLACEMENT_ADD, OPT_ZONE_PLACEMENT_RM,
+ OPT_ZONE_PLACEMENT_MODIFY, OPT_ZONE_PLACEMENT_LIST,
+ OPT_ZONE_PLACEMENT_GET,
+ OPT_REALM_CREATE,
+ OPT_PERIOD_DELETE, OPT_PERIOD_GET,
+ OPT_PERIOD_GET_CURRENT, OPT_PERIOD_LIST,
+ OPT_GLOBAL_QUOTA_GET, OPT_GLOBAL_QUOTA_SET,
+ OPT_GLOBAL_QUOTA_ENABLE, OPT_GLOBAL_QUOTA_DISABLE,
+ OPT_REALM_DELETE, OPT_REALM_GET, OPT_REALM_LIST,
+ OPT_REALM_LIST_PERIODS,
+ OPT_REALM_GET_DEFAULT,
+ OPT_REALM_RENAME, OPT_REALM_SET,
+ OPT_REALM_DEFAULT, OPT_REALM_PULL};
+
+ std::set<int> readonly_ops_list = {
+ OPT_USER_INFO,
+ OPT_USER_STATS,
+ OPT_BUCKETS_LIST,
+ OPT_BUCKET_LIMIT_CHECK,
+ OPT_BUCKET_STATS,
+ OPT_BUCKET_SYNC_STATUS,
+ OPT_BUCKET_SYNC_MARKERS,
+ OPT_LOG_LIST,
+ OPT_LOG_SHOW,
+ OPT_USAGE_SHOW,
+ OPT_OBJECT_STAT,
+ OPT_BI_GET,
+ OPT_BI_LIST,
+ OPT_OLH_GET,
+ OPT_OLH_READLOG,
+ OPT_GC_LIST,
+ OPT_LC_LIST,
+ OPT_ORPHANS_LIST_JOBS,
+ OPT_ZONEGROUP_GET,
+ OPT_ZONEGROUP_LIST,
+ OPT_ZONEGROUP_PLACEMENT_LIST,
+ OPT_ZONEGROUP_PLACEMENT_GET,
+ OPT_ZONE_GET,
+ OPT_ZONE_LIST,
+ OPT_ZONE_PLACEMENT_LIST,
+ OPT_ZONE_PLACEMENT_GET,
+ OPT_METADATA_GET,
+ OPT_METADATA_LIST,
+ OPT_METADATA_SYNC_STATUS,
+ OPT_MDLOG_LIST,
+ OPT_MDLOG_STATUS,
+ OPT_SYNC_ERROR_LIST,
+ OPT_BILOG_LIST,
+ OPT_BILOG_STATUS,
+ OPT_DATA_SYNC_STATUS,
+ OPT_DATALOG_LIST,
+ OPT_DATALOG_STATUS,
+ OPT_REALM_GET,
+ OPT_REALM_GET_DEFAULT,
+ OPT_REALM_LIST,
+ OPT_REALM_LIST_PERIODS,
+ OPT_PERIOD_GET,
+ OPT_PERIOD_GET_CURRENT,
+ OPT_PERIOD_LIST,
+ OPT_GLOBAL_QUOTA_GET,
+ OPT_SYNC_STATUS,
+ OPT_ROLE_GET,
+ OPT_ROLE_LIST,
+ OPT_ROLE_POLICY_LIST,
+ OPT_ROLE_POLICY_GET,
+ OPT_RESHARD_LIST,
+ OPT_RESHARD_STATUS,
+ };
+
+ bool raw_storage_op = (raw_storage_ops_list.find(opt_cmd) != raw_storage_ops_list.end() ||
+ raw_period_update || raw_period_pull);
+ bool need_cache = readonly_ops_list.find(opt_cmd) == readonly_ops_list.end();
+
+ if (raw_storage_op) {
+ store = RGWStoreManager::get_raw_storage(g_ceph_context);
+ } else {
+ store = RGWStoreManager::get_storage(g_ceph_context, false, false, false, false, false,
+ need_cache && g_conf()->rgw_cache_enabled);
+ }
+ if (!store) {
+ cerr << "couldn't init storage provider" << std::endl;
+ return 5; //EIO
+ }
+
+ if (!source_zone_name.empty()) {
+ if (!store->svc.zone->find_zone_id_by_name(source_zone_name, &source_zone)) {
+ cerr << "WARNING: cannot find source zone id for name=" << source_zone_name << std::endl;
+ source_zone = source_zone_name;
+ }
+ }
+
+ rgw_user_init(store);
+ rgw_bucket_init(store->meta_mgr);
+ rgw_otp_init(store);
+
+ rgw_http_client_init(g_ceph_context);
+
+ struct rgw_curl_setup {
+ rgw_curl_setup() {
+ rgw::curl::setup_curl(boost::none);
+ }
+ ~rgw_curl_setup() {
+ rgw::curl::cleanup_curl();
+ }
+ } curl_cleanup;
+
+ oath_init();
+
+ StoreDestructor store_destructor(store);
+
+ if (raw_storage_op) {
+ switch (opt_cmd) {
+ case OPT_PERIOD_DELETE:
+ {
+ if (period_id.empty()) {
+ cerr << "missing period id" << std::endl;
+ return EINVAL;
+ }
+ RGWPeriod period(period_id);
+ int ret = period.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0) {
+ cerr << "period.init failed: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ ret = period.delete_obj();
+ if (ret < 0) {
+ cerr << "ERROR: couldn't delete period: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ }
+ break;
+ case OPT_PERIOD_GET:
+ {
+ epoch_t epoch = 0;
+ if (!period_epoch.empty()) {
+ epoch = atoi(period_epoch.c_str());
+ }
+ if (staging) {
+ RGWRealm realm(realm_id, realm_name);
+ int ret = realm.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0 ) {
+ cerr << "Error initializing realm " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ realm_id = realm.get_id();
+ realm_name = realm.get_name();
+ period_id = RGWPeriod::get_staging_id(realm_id);
+ epoch = 1;
+ }
+ RGWPeriod period(period_id, epoch);
+ int ret = period.init(g_ceph_context, store->svc.sysobj, realm_id, realm_name);
+ if (ret < 0) {
+ cerr << "period init failed: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ encode_json("period", period, formatter);
+ formatter->flush(cout);
+ }
+ break;
+ case OPT_PERIOD_GET_CURRENT:
+ {
+ int ret = read_current_period_id(store, realm_id, realm_name, &period_id);
+ if (ret < 0) {
+ return -ret;
+ }
+ formatter->open_object_section("period_get_current");
+ encode_json("current_period", period_id, formatter);
+ formatter->close_section();
+ formatter->flush(cout);
+ }
+ break;
+ case OPT_PERIOD_LIST:
+ {
+ list<string> periods;
+ int ret = store->svc.zone->list_periods(periods);
+ if (ret < 0) {
+ cerr << "failed to list periods: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ formatter->open_object_section("periods_list");
+ encode_json("periods", periods, formatter);
+ formatter->close_section();
+ formatter->flush(cout);
+ }
+ break;
+ case OPT_PERIOD_UPDATE:
+ {
+ int ret = update_period(realm_id, realm_name, period_id, period_epoch,
+ commit, remote, url, access_key, secret_key,
+ formatter, yes_i_really_mean_it);
+ if (ret < 0) {
+ return -ret;
+ }
+ }
+ break;
+ case OPT_PERIOD_PULL:
+ {
+ boost::optional<RGWRESTConn> conn;
+ RGWRESTConn *remote_conn = nullptr;
+ if (url.empty()) {
+ // load current period for endpoints
+ RGWRealm realm(realm_id, realm_name);
+ int ret = realm.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0) {
+ cerr << "failed to init realm: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ RGWPeriod current_period(realm.get_current_period());
+ ret = current_period.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0) {
+ cerr << "failed to init current period: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ if (remote.empty()) {
+ // use realm master zone as remote
+ remote = current_period.get_master_zone();
+ }
+ conn = get_remote_conn(store, current_period.get_map(), remote);
+ if (!conn) {
+ cerr << "failed to find a zone or zonegroup for remote "
+ << remote << std::endl;
+ return -ENOENT;
+ }
+ remote_conn = &*conn;
+ }
+
+ RGWPeriod period;
+ int ret = do_period_pull(remote_conn, url, access_key, secret_key,
+ realm_id, realm_name, period_id, period_epoch,
+ &period);
+ if (ret < 0) {
+ cerr << "period pull failed: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ encode_json("period", period, formatter);
+ formatter->flush(cout);
+ }
+ break;
+ case OPT_GLOBAL_QUOTA_GET:
+ case OPT_GLOBAL_QUOTA_SET:
+ case OPT_GLOBAL_QUOTA_ENABLE:
+ case OPT_GLOBAL_QUOTA_DISABLE:
+ {
+ if (realm_id.empty()) {
+ RGWRealm realm(g_ceph_context, store->svc.sysobj);
+ if (!realm_name.empty()) {
+ // look up realm_id for the given realm_name
+ int ret = realm.read_id(realm_name, realm_id);
+ if (ret < 0) {
+ cerr << "ERROR: failed to read realm for " << realm_name
+ << ": " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ } else {
+ // use default realm_id when none is given
+ int ret = realm.read_default_id(realm_id);
+ if (ret < 0 && ret != -ENOENT) { // on ENOENT, use empty realm_id
+ cerr << "ERROR: failed to read default realm: "
+ << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ }
+ }
+
+ RGWPeriodConfig period_config;
+ int ret = period_config.read(store->svc.sysobj, realm_id);
+ if (ret < 0 && ret != -ENOENT) {
+ cerr << "ERROR: failed to read period config: "
+ << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ formatter->open_object_section("period_config");
+ if (quota_scope == "bucket") {
+ set_quota_info(period_config.bucket_quota, opt_cmd,
+ max_size, max_objects,
+ have_max_size, have_max_objects);
+ encode_json("bucket quota", period_config.bucket_quota, formatter);
+ } else if (quota_scope == "user") {
+ set_quota_info(period_config.user_quota, opt_cmd,
+ max_size, max_objects,
+ have_max_size, have_max_objects);
+ encode_json("user quota", period_config.user_quota, formatter);
+ } else if (quota_scope.empty() && opt_cmd == OPT_GLOBAL_QUOTA_GET) {
+ // if no scope is given for GET, print both
+ encode_json("bucket quota", period_config.bucket_quota, formatter);
+ encode_json("user quota", period_config.user_quota, formatter);
+ } else {
+ cerr << "ERROR: invalid quota scope specification. Please specify "
+ "either --quota-scope=bucket, or --quota-scope=user" << std::endl;
+ return EINVAL;
+ }
+ formatter->close_section();
+
+ if (opt_cmd != OPT_GLOBAL_QUOTA_GET) {
+ // write the modified period config
+ ret = period_config.write(store->svc.sysobj, realm_id);
+ if (ret < 0) {
+ cerr << "ERROR: failed to write period config: "
+ << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ if (!realm_id.empty()) {
+ cout << "Global quota changes saved. Use 'period update' to apply "
+ "them to the staging period, and 'period commit' to commit the "
+ "new period." << std::endl;
+ } else {
+ cout << "Global quota changes saved. They will take effect as "
+ "the gateways are restarted." << std::endl;
+ }
+ }
+
+ formatter->flush(cout);
+ }
+ break;
+ case OPT_REALM_CREATE:
+ {
+ if (realm_name.empty()) {
+ cerr << "missing realm name" << std::endl;
+ return EINVAL;
+ }
+
+ RGWRealm realm(realm_name, g_ceph_context, store->svc.sysobj);
+ int ret = realm.create();
+ if (ret < 0) {
+ cerr << "ERROR: couldn't create realm " << realm_name << ": " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ if (set_default) {
+ ret = realm.set_as_default();
+ if (ret < 0) {
+ cerr << "failed to set realm " << realm_name << " as default: " << cpp_strerror(-ret) << std::endl;
+ }
+ }
+
+ encode_json("realm", realm, formatter);
+ formatter->flush(cout);
+ }
+ break;
+ case OPT_REALM_DELETE:
+ {
+ RGWRealm realm(realm_id, realm_name);
+ if (realm_name.empty() && realm_id.empty()) {
+ cerr << "missing realm name or id" << std::endl;
+ return EINVAL;
+ }
+ int ret = realm.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0) {
+ cerr << "realm.init failed: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ ret = realm.delete_obj();
+ if (ret < 0) {
+ cerr << "ERROR: couldn't : " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ }
+ break;
+ case OPT_REALM_GET:
+ {
+ RGWRealm realm(realm_id, realm_name);
+ int ret = realm.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0) {
+ if (ret == -ENOENT && realm_name.empty() && realm_id.empty()) {
+ cerr << "missing realm name or id, or default realm not found" << std::endl;
+ } else {
+ cerr << "realm.init failed: " << cpp_strerror(-ret) << std::endl;
+ }
+ return -ret;
+ }
+ encode_json("realm", realm, formatter);
+ formatter->flush(cout);
+ }
+ break;
+ case OPT_REALM_GET_DEFAULT:
+ {
+ RGWRealm realm(g_ceph_context, store->svc.sysobj);
+ string default_id;
+ int ret = realm.read_default_id(default_id);
+ if (ret == -ENOENT) {
+ cout << "No default realm is set" << std::endl;
+ return -ret;
+ } else if (ret < 0) {
+ cerr << "Error reading default realm:" << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ cout << "default realm: " << default_id << std::endl;
+ }
+ break;
+ case OPT_REALM_LIST:
+ {
+ RGWRealm realm(g_ceph_context, store->svc.sysobj);
+ string default_id;
+ int ret = realm.read_default_id(default_id);
+ if (ret < 0 && ret != -ENOENT) {
+ cerr << "could not determine default realm: " << cpp_strerror(-ret) << std::endl;
+ }
+ list<string> realms;
+ ret = store->svc.zone->list_realms(realms);
+ if (ret < 0) {
+ cerr << "failed to list realms: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ formatter->open_object_section("realms_list");
+ encode_json("default_info", default_id, formatter);
+ encode_json("realms", realms, formatter);
+ formatter->close_section();
+ formatter->flush(cout);
+ }
+ break;
+ case OPT_REALM_LIST_PERIODS:
+ {
+ int ret = read_current_period_id(store, realm_id, realm_name, &period_id);
+ if (ret < 0) {
+ return -ret;
+ }
+ list<string> periods;
+ ret = store->svc.zone->list_periods(period_id, periods);
+ if (ret < 0) {
+ cerr << "list periods failed: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ formatter->open_object_section("realm_periods_list");
+ encode_json("current_period", period_id, formatter);
+ encode_json("periods", periods, formatter);
+ formatter->close_section();
+ formatter->flush(cout);
+ }
+ break;
+
+ case OPT_REALM_RENAME:
+ {
+ RGWRealm realm(realm_id, realm_name);
+ if (realm_new_name.empty()) {
+ cerr << "missing realm new name" << std::endl;
+ return EINVAL;
+ }
+ if (realm_name.empty() && realm_id.empty()) {
+ cerr << "missing realm name or id" << std::endl;
+ return EINVAL;
+ }
+ int ret = realm.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0) {
+ cerr << "realm.init failed: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ ret = realm.rename(realm_new_name);
+ if (ret < 0) {
+ cerr << "realm.rename failed: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ cout << "Realm name updated. Note that this change only applies to "
+ "the current cluster, so this command must be run separately "
+ "on each of the realm's other clusters." << std::endl;
+ }
+ break;
+ case OPT_REALM_SET:
+ {
+ if (realm_id.empty() && realm_name.empty()) {
+ cerr << "no realm name or id provided" << std::endl;
+ return EINVAL;
+ }
+ RGWRealm realm(realm_id, realm_name);
+ bool new_realm = false;
+ int ret = realm.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0 && ret != -ENOENT) {
+ cerr << "failed to init realm: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ } else if (ret == -ENOENT) {
+ new_realm = true;
+ }
+ ret = read_decode_json(infile, realm);
+ if (ret < 0) {
+ return 1;
+ }
+ if (!realm_name.empty() && realm.get_name() != realm_name) {
+ cerr << "mismatch between --rgw-realm " << realm_name << " and json input file name " <<
+ realm.get_name() << std::endl;
+ return EINVAL;
+ }
+ /* new realm */
+ if (new_realm) {
+ cout << "clearing period and epoch for new realm" << std::endl;
+ realm.clear_current_period_and_epoch();
+ ret = realm.create();
+ if (ret < 0) {
+ cerr << "ERROR: couldn't create new realm: " << cpp_strerror(-ret) << std::endl;
+ return 1;
+ }
+ } else {
+ ret = realm.update();
+ if (ret < 0) {
+ cerr << "ERROR: couldn't store realm info: " << cpp_strerror(-ret) << std::endl;
+ return 1;
+ }
+ }
+
+ if (set_default) {
+ ret = realm.set_as_default();
+ if (ret < 0) {
+ cerr << "failed to set realm " << realm_name << " as default: " << cpp_strerror(-ret) << std::endl;
+ }
+ }
+ encode_json("realm", realm, formatter);
+ formatter->flush(cout);
+ }
+ break;
+
+ case OPT_REALM_DEFAULT:
+ {
+ RGWRealm realm(realm_id, realm_name);
+ int ret = realm.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0) {
+ cerr << "failed to init realm: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ ret = realm.set_as_default();
+ if (ret < 0) {
+ cerr << "failed to set realm as default: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ }
+ break;
+ case OPT_REALM_PULL:
+ {
+ if (url.empty()) {
+ cerr << "A --url must be provided." << std::endl;
+ return EINVAL;
+ }
+ RGWEnv env;
+ req_info info(g_ceph_context, &env);
+ info.method = "GET";
+ info.request_uri = "/admin/realm";
+
+ map<string, string> &params = info.args.get_params();
+ if (!realm_id.empty())
+ params["id"] = realm_id;
+ if (!realm_name.empty())
+ params["name"] = realm_name;
+
+ bufferlist bl;
+ JSONParser p;
+ int ret = send_to_url(url, access_key, secret_key, info, bl, p);
+ if (ret < 0) {
+ cerr << "request failed: " << cpp_strerror(-ret) << std::endl;
+ if (ret == -EACCES) {
+ cerr << "If the realm has been changed on the master zone, the "
+ "master zone's gateway may need to be restarted to recognize "
+ "this user." << std::endl;
+ }
+ return -ret;
+ }
+ RGWRealm realm;
+ realm.init(g_ceph_context, store->svc.sysobj, false);
+ try {
+ decode_json_obj(realm, &p);
+ } catch (JSONDecoder::err& e) {
+ cerr << "failed to decode JSON response: " << e.message << std::endl;
+ return EINVAL;
+ }
+ RGWPeriod period;
+ auto& current_period = realm.get_current_period();
+ if (!current_period.empty()) {
+ // pull the latest epoch of the realm's current period
+ ret = do_period_pull(nullptr, url, access_key, secret_key,
+ realm_id, realm_name, current_period, "",
+ &period);
+ if (ret < 0) {
+ cerr << "could not fetch period " << current_period << std::endl;
+ return -ret;
+ }
+ }
+ ret = realm.create(false);
+ if (ret < 0 && ret != -EEXIST) {
+ cerr << "Error storing realm " << realm.get_id() << ": "
+ << cpp_strerror(ret) << std::endl;
+ return -ret;
+ } else if (ret ==-EEXIST) {
+ ret = realm.update();
+ if (ret < 0) {
+ cerr << "Error storing realm " << realm.get_id() << ": "
+ << cpp_strerror(ret) << std::endl;
+ }
+ }
+
+ if (set_default) {
+ ret = realm.set_as_default();
+ if (ret < 0) {
+ cerr << "failed to set realm " << realm_name << " as default: " << cpp_strerror(-ret) << std::endl;
+ }
+ }
+
+ encode_json("realm", realm, formatter);
+ formatter->flush(cout);
+ }
+ break;
+
+ case OPT_ZONEGROUP_ADD:
+ {
+ if (zonegroup_id.empty() && zonegroup_name.empty()) {
+ cerr << "no zonegroup name or id provided" << std::endl;
+ return EINVAL;
+ }
+
+ RGWZoneGroup zonegroup(zonegroup_id,zonegroup_name);
+ int ret = zonegroup.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0) {
+ cerr << "failed to initialize zonegroup " << zonegroup_name << " id " << zonegroup_id << " :"
+ << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ RGWZoneParams zone(zone_id, zone_name);
+ ret = zone.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0) {
+ cerr << "unable to initialize zone: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ if (zone.realm_id != zonegroup.realm_id) {
+ zone.realm_id = zonegroup.realm_id;
+ ret = zone.update();
+ if (ret < 0) {
+ cerr << "failed to save zone info: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ }
+
+ string *ptier_type = (tier_type_specified ? &tier_type : nullptr);
+
+ for (auto a : tier_config_add) {
+ int r = zone.tier_config.set(a.first, a.second);
+ if (r < 0) {
+ cerr << "ERROR: failed to set configurable: " << a << std::endl;
+ return EINVAL;
+ }
+ }
+
+ bool *psync_from_all = (sync_from_all_specified ? &sync_from_all : nullptr);
+ string *predirect_zone = (redirect_zone_set ? &redirect_zone : nullptr);
+
+ ret = zonegroup.add_zone(zone,
+ (is_master_set ? &is_master : NULL),
+ (is_read_only_set ? &read_only : NULL),
+ endpoints, ptier_type,
+ psync_from_all, sync_from, sync_from_rm,
+ predirect_zone,
+ store->svc.sync_modules->get_manager());
+ if (ret < 0) {
+ cerr << "failed to add zone " << zone_name << " to zonegroup " << zonegroup.get_name() << ": "
+ << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ encode_json("zonegroup", zonegroup, formatter);
+ formatter->flush(cout);
+ }
+ break;
+ case OPT_ZONEGROUP_CREATE:
+ {
+ if (zonegroup_name.empty()) {
+ cerr << "Missing zonegroup name" << std::endl;
+ return EINVAL;
+ }
+ RGWRealm realm(realm_id, realm_name);
+ int ret = realm.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0) {
+ cerr << "failed to init realm: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ RGWZoneGroup zonegroup(zonegroup_name, is_master, g_ceph_context, store->svc.sysobj, realm.get_id(), endpoints);
+ zonegroup.api_name = (api_name.empty() ? zonegroup_name : api_name);
+ ret = zonegroup.create();
+ if (ret < 0) {
+ cerr << "failed to create zonegroup " << zonegroup_name << ": " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ if (set_default) {
+ ret = zonegroup.set_as_default();
+ if (ret < 0) {
+ cerr << "failed to set zonegroup " << zonegroup_name << " as default: " << cpp_strerror(-ret) << std::endl;
+ }
+ }
+
+ encode_json("zonegroup", zonegroup, formatter);
+ formatter->flush(cout);
+ }
+ break;
+ case OPT_ZONEGROUP_DEFAULT:
+ {
+ if (zonegroup_id.empty() && zonegroup_name.empty()) {
+ cerr << "no zonegroup name or id provided" << std::endl;
+ return EINVAL;
+ }
+
+ RGWZoneGroup zonegroup(zonegroup_id, zonegroup_name);
+ int ret = zonegroup.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0) {
+ cerr << "failed to init zonegroup: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ ret = zonegroup.set_as_default();
+ if (ret < 0) {
+ cerr << "failed to set zonegroup as default: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ }
+ break;
+ case OPT_ZONEGROUP_DELETE:
+ {
+ if (zonegroup_id.empty() && zonegroup_name.empty()) {
+ cerr << "no zonegroup name or id provided" << std::endl;
+ return EINVAL;
+ }
+ RGWZoneGroup zonegroup(zonegroup_id, zonegroup_name);
+ int ret = zonegroup.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0) {
+ cerr << "failed to init zonegroup: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ ret = zonegroup.delete_obj();
+ if (ret < 0) {
+ cerr << "ERROR: couldn't delete zonegroup: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ }
+ break;
+ case OPT_ZONEGROUP_GET:
+ {
+ RGWZoneGroup zonegroup(zonegroup_id, zonegroup_name);
+ int ret = zonegroup.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0) {
+ cerr << "failed to init zonegroup: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ encode_json("zonegroup", zonegroup, formatter);
+ formatter->flush(cout);
+ }
+ break;
+ case OPT_ZONEGROUP_LIST:
+ {
+ RGWZoneGroup zonegroup;
+ int ret = zonegroup.init(g_ceph_context, store->svc.sysobj, false);
+ if (ret < 0) {
+ cerr << "failed to init zonegroup: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ list<string> zonegroups;
+ ret = store->svc.zone->list_zonegroups(zonegroups);
+ if (ret < 0) {
+ cerr << "failed to list zonegroups: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ string default_zonegroup;
+ ret = zonegroup.read_default_id(default_zonegroup);
+ if (ret < 0 && ret != -ENOENT) {
+ cerr << "could not determine default zonegroup: " << cpp_strerror(-ret) << std::endl;
+ }
+ formatter->open_object_section("zonegroups_list");
+ encode_json("default_info", default_zonegroup, formatter);
+ encode_json("zonegroups", zonegroups, formatter);
+ formatter->close_section();
+ formatter->flush(cout);
+ }
+ break;
+ case OPT_ZONEGROUP_MODIFY:
+ {
+ RGWRealm realm(realm_id, realm_name);
+ int ret = realm.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0) {
+ cerr << "failed to init realm: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ RGWZoneGroup zonegroup(zonegroup_id, zonegroup_name);
+ ret = zonegroup.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0) {
+ cerr << "failed to init zonegroup: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ bool need_update = false;
+
+ if (!master_zone.empty()) {
+ zonegroup.master_zone = master_zone;
+ need_update = true;
+ }
+
+ if (is_master_set) {
+ zonegroup.update_master(is_master);
+ need_update = true;
+ }
+
+ if (!endpoints.empty()) {
+ zonegroup.endpoints = endpoints;
+ need_update = true;
+ }
+
+ if (!api_name.empty()) {
+ zonegroup.api_name = api_name;
+ need_update = true;
+ }
+
+ if (!realm_id.empty()) {
+ zonegroup.realm_id = realm_id;
+ need_update = true;
+ } else if (!realm_name.empty()) {
+ // get realm id from name
+ RGWRealm realm{g_ceph_context, store->svc.sysobj};
+ ret = realm.read_id(realm_name, zonegroup.realm_id);
+ if (ret < 0) {
+ cerr << "failed to find realm by name " << realm_name << std::endl;
+ return -ret;
+ }
+ need_update = true;
+ }
+
+ if (need_update) {
+ ret = zonegroup.update();
+ if (ret < 0) {
+ cerr << "failed to update zonegroup: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ }
+
+ if (set_default) {
+ ret = zonegroup.set_as_default();
+ if (ret < 0) {
+ cerr << "failed to set zonegroup " << zonegroup_name << " as default: " << cpp_strerror(-ret) << std::endl;
+ }
+ }
+
+ encode_json("zonegroup", zonegroup, formatter);
+ formatter->flush(cout);
+ }
+ break;
+ case OPT_ZONEGROUP_SET:
+ {
+ RGWRealm realm(realm_id, realm_name);
+ int ret = realm.init(g_ceph_context, store->svc.sysobj);
+ bool default_realm_not_exist = (ret == -ENOENT && realm_id.empty() && realm_name.empty());
+
+ if (ret < 0 && !default_realm_not_exist ) {
+ cerr << "failed to init realm: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ RGWZoneGroup zonegroup;
+ ret = zonegroup.init(g_ceph_context, store->svc.sysobj, false);
+ if (ret < 0) {
+ cerr << "failed to init zonegroup: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ ret = read_decode_json(infile, zonegroup);
+ if (ret < 0) {
+ return 1;
+ }
+ if (zonegroup.realm_id.empty() && !default_realm_not_exist) {
+ zonegroup.realm_id = realm.get_id();
+ }
+ ret = zonegroup.create();
+ if (ret < 0 && ret != -EEXIST) {
+ cerr << "ERROR: couldn't create zonegroup info: " << cpp_strerror(-ret) << std::endl;
+ return 1;
+ } else if (ret == -EEXIST) {
+ ret = zonegroup.update();
+ if (ret < 0) {
+ cerr << "ERROR: couldn't store zonegroup info: " << cpp_strerror(-ret) << std::endl;
+ return 1;
+ }
+ }
+
+ if (set_default) {
+ ret = zonegroup.set_as_default();
+ if (ret < 0) {
+ cerr << "failed to set zonegroup " << zonegroup_name << " as default: " << cpp_strerror(-ret) << std::endl;
+ }
+ }
+
+ encode_json("zonegroup", zonegroup, formatter);
+ formatter->flush(cout);
+ }
+ break;
+ case OPT_ZONEGROUP_REMOVE:
+ {
+ RGWZoneGroup zonegroup(zonegroup_id, zonegroup_name);
+ int ret = zonegroup.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0) {
+ cerr << "failed to init zonegroup: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ if (zone_id.empty()) {
+ if (zone_name.empty()) {
+ cerr << "no --zone-id or --rgw-zone name provided" << std::endl;
+ return EINVAL;
+ }
+ // look up zone id by name
+ for (auto& z : zonegroup.zones) {
+ if (zone_name == z.second.name) {
+ zone_id = z.second.id;
+ break;
+ }
+ }
+ if (zone_id.empty()) {
+ cerr << "zone name " << zone_name << " not found in zonegroup "
+ << zonegroup.get_name() << std::endl;
+ return ENOENT;
+ }
+ }
+
+ ret = zonegroup.remove_zone(zone_id);
+ if (ret < 0) {
+ cerr << "failed to remove zone: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ encode_json("zonegroup", zonegroup, formatter);
+ formatter->flush(cout);
+ }
+ break;
+ case OPT_ZONEGROUP_RENAME:
+ {
+ if (zonegroup_new_name.empty()) {
+ cerr << " missing zonegroup new name" << std::endl;
+ return EINVAL;
+ }
+ if (zonegroup_id.empty() && zonegroup_name.empty()) {
+ cerr << "no zonegroup name or id provided" << std::endl;
+ return EINVAL;
+ }
+ RGWZoneGroup zonegroup(zonegroup_id, zonegroup_name);
+ int ret = zonegroup.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0) {
+ cerr << "failed to init zonegroup: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ ret = zonegroup.rename(zonegroup_new_name);
+ if (ret < 0) {
+ cerr << "failed to rename zonegroup: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ }
+ break;
+ case OPT_ZONEGROUP_PLACEMENT_LIST:
+ {
+ RGWZoneGroup zonegroup(zonegroup_id, zonegroup_name);
+ int ret = zonegroup.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0) {
+ cerr << "failed to init zonegroup: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ encode_json("placement_targets", zonegroup.placement_targets, formatter);
+ formatter->flush(cout);
+ }
+ break;
+ case OPT_ZONEGROUP_PLACEMENT_GET:
+ {
+ if (placement_id.empty()) {
+ cerr << "ERROR: --placement-id not specified" << std::endl;
+ return EINVAL;
+ }
+
+ RGWZoneGroup zonegroup(zonegroup_id, zonegroup_name);
+ int ret = zonegroup.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0) {
+ cerr << "failed to init zonegroup: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ auto p = zonegroup.placement_targets.find(placement_id);
+ if (p == zonegroup.placement_targets.end()) {
+ cerr << "failed to find a zonegroup placement target named '" << placement_id << "'" << std::endl;
+ return -ENOENT;
+ }
+ encode_json("placement_targets", p->second, formatter);
+ formatter->flush(cout);
+ }
+ break;
+ case OPT_ZONEGROUP_PLACEMENT_ADD:
+ case OPT_ZONEGROUP_PLACEMENT_MODIFY:
+ case OPT_ZONEGROUP_PLACEMENT_RM:
+ case OPT_ZONEGROUP_PLACEMENT_DEFAULT:
+ {
+ if (placement_id.empty()) {
+ cerr << "ERROR: --placement-id not specified" << std::endl;
+ return EINVAL;
+ }
+
+ rgw_placement_rule rule;
+ rule.from_str(placement_id);
+
+ if (!rule.storage_class.empty() && !storage_class.empty() &&
+ rule.storage_class != storage_class) {
+ cerr << "ERROR: provided contradicting storage class configuration" << std::endl;
+ return EINVAL;
+ } else if (rule.storage_class.empty()) {
+ rule.storage_class = storage_class;
+ }
+
+ RGWZoneGroup zonegroup(zonegroup_id, zonegroup_name);
+ int ret = zonegroup.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0) {
+ cerr << "failed to init zonegroup: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ if (opt_cmd == OPT_ZONEGROUP_PLACEMENT_ADD ||
+ opt_cmd == OPT_ZONEGROUP_PLACEMENT_MODIFY) {
+ RGWZoneGroupPlacementTarget& target = zonegroup.placement_targets[placement_id];
+ if (!tags.empty()) {
+ target.tags.clear();
+ for (auto& t : tags) {
+ target.tags.insert(t);
+ }
+ }
+ target.name = placement_id;
+ for (auto& t : tags_rm) {
+ target.tags.erase(t);
+ }
+ for (auto& t : tags_add) {
+ target.tags.insert(t);
+ }
+ target.storage_classes.insert(rule.get_storage_class());
+ } else if (opt_cmd == OPT_ZONEGROUP_PLACEMENT_RM) {
+ zonegroup.placement_targets.erase(placement_id);
+ } else if (opt_cmd == OPT_ZONEGROUP_PLACEMENT_DEFAULT) {
+ if (!zonegroup.placement_targets.count(placement_id)) {
+ cerr << "failed to find a zonegroup placement target named '"
+ << placement_id << "'" << std::endl;
+ return -ENOENT;
+ }
+ zonegroup.default_placement = rule;
+ }
+
+ zonegroup.post_process_params();
+ ret = zonegroup.update();
+ if (ret < 0) {
+ cerr << "failed to update zonegroup: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ encode_json("placement_targets", zonegroup.placement_targets, formatter);
+ formatter->flush(cout);
+ }
+ break;
+ case OPT_ZONE_CREATE:
+ {
+ if (zone_name.empty()) {
+ cerr << "zone name not provided" << std::endl;
+ return EINVAL;
+ }
+ int ret;
+ RGWZoneGroup zonegroup(zonegroup_id, zonegroup_name);
+ /* if the user didn't provide zonegroup info , create stand alone zone */
+ if (!zonegroup_id.empty() || !zonegroup_name.empty()) {
+ ret = zonegroup.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0) {
+ cerr << "unable to initialize zonegroup " << zonegroup_name << ": " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ if (realm_id.empty() && realm_name.empty()) {
+ realm_id = zonegroup.realm_id;
+ }
+ }
+
+ RGWZoneParams zone(zone_id, zone_name);
+ ret = zone.init(g_ceph_context, store->svc.sysobj, false);
+ if (ret < 0) {
+ cerr << "unable to initialize zone: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ zone.system_key.id = access_key;
+ zone.system_key.key = secret_key;
+ zone.realm_id = realm_id;
+ for (auto a : tier_config_add) {
+ int r = zone.tier_config.set(a.first, a.second);
+ if (r < 0) {
+ cerr << "ERROR: failed to set configurable: " << a << std::endl;
+ return EINVAL;
+ }
+ }
+
+ ret = zone.create();
+ if (ret < 0) {
+ cerr << "failed to create zone " << zone_name << ": " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ if (!zonegroup_id.empty() || !zonegroup_name.empty()) {
+ string *ptier_type = (tier_type_specified ? &tier_type : nullptr);
+ bool *psync_from_all = (sync_from_all_specified ? &sync_from_all : nullptr);
+ string *predirect_zone = (redirect_zone_set ? &redirect_zone : nullptr);
+ ret = zonegroup.add_zone(zone,
+ (is_master_set ? &is_master : NULL),
+ (is_read_only_set ? &read_only : NULL),
+ endpoints,
+ ptier_type,
+ psync_from_all,
+ sync_from, sync_from_rm,
+ predirect_zone,
+ store->svc.sync_modules->get_manager());
+ if (ret < 0) {
+ cerr << "failed to add zone " << zone_name << " to zonegroup " << zonegroup.get_name()
+ << ": " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ }
+
+ if (set_default) {
+ ret = zone.set_as_default();
+ if (ret < 0) {
+ cerr << "failed to set zone " << zone_name << " as default: " << cpp_strerror(-ret) << std::endl;
+ }
+ }
+
+ encode_json("zone", zone, formatter);
+ formatter->flush(cout);
+ }
+ break;
+ case OPT_ZONE_DEFAULT:
+ {
+ RGWZoneGroup zonegroup(zonegroup_id,zonegroup_name);
+ int ret = zonegroup.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0) {
+ cerr << "WARNING: failed to initialize zonegroup " << zonegroup_name << std::endl;
+ }
+ if (zone_id.empty() && zone_name.empty()) {
+ cerr << "no zone name or id provided" << std::endl;
+ return EINVAL;
+ }
+ RGWZoneParams zone(zone_id, zone_name);
+ ret = zone.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0) {
+ cerr << "unable to initialize zone: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ ret = zone.set_as_default();
+ if (ret < 0) {
+ cerr << "failed to set zone as default: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ }
+ break;
+ case OPT_ZONE_DELETE:
+ {
+ if (zone_id.empty() && zone_name.empty()) {
+ cerr << "no zone name or id provided" << std::endl;
+ return EINVAL;
+ }
+ RGWZoneParams zone(zone_id, zone_name);
+ int ret = zone.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0) {
+ cerr << "unable to initialize zone: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ list<string> zonegroups;
+ ret = store->svc.zone->list_zonegroups(zonegroups);
+ if (ret < 0) {
+ cerr << "failed to list zonegroups: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ for (list<string>::iterator iter = zonegroups.begin(); iter != zonegroups.end(); ++iter) {
+ RGWZoneGroup zonegroup(string(), *iter);
+ int ret = zonegroup.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0) {
+ cerr << "WARNING: failed to initialize zonegroup " << zonegroup_name << std::endl;
+ continue;
+ }
+ ret = zonegroup.remove_zone(zone.get_id());
+ if (ret < 0 && ret != -ENOENT) {
+ cerr << "failed to remove zone " << zone_name << " from zonegroup " << zonegroup.get_name() << ": "
+ << cpp_strerror(-ret) << std::endl;
+ }
+ }
+
+ ret = zone.delete_obj();
+ if (ret < 0) {
+ cerr << "failed to delete zone " << zone_name << ": " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ }
+ break;
+ case OPT_ZONE_GET:
+ {
+ RGWZoneParams zone(zone_id, zone_name);
+ int ret = zone.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0) {
+ cerr << "unable to initialize zone: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ encode_json("zone", zone, formatter);
+ formatter->flush(cout);
+ }
+ break;
+ case OPT_ZONE_SET:
+ {
+ RGWZoneParams zone(zone_name);
+ int ret = zone.init(g_ceph_context, store->svc.sysobj, false);
+ if (ret < 0) {
+ return -ret;
+ }
+
+ ret = zone.read();
+ if (ret < 0 && ret != -ENOENT) {
+ cerr << "zone.read() returned ret=" << ret << std::endl;
+ return -ret;
+ }
+
+ string orig_id = zone.get_id();
+
+ ret = read_decode_json(infile, zone);
+ if (ret < 0) {
+ return 1;
+ }
+
+ if(zone.realm_id.empty()) {
+ RGWRealm realm(realm_id, realm_name);
+ int ret = realm.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0 && ret != -ENOENT) {
+ cerr << "failed to init realm: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ zone.realm_id = realm.get_id();
+ }
+
+ if( !zone_name.empty() && !zone.get_name().empty() && zone.get_name() != zone_name) {
+ cerr << "Error: zone name" << zone_name << " is different than the zone name " << zone.get_name() << " in the provided json " << std::endl;
+ return EINVAL;
+ }
+
+ if (zone.get_name().empty()) {
+ zone.set_name(zone_name);
+ if (zone.get_name().empty()) {
+ cerr << "no zone name specified" << std::endl;
+ return EINVAL;
+ }
+ }
+
+ zone_name = zone.get_name();
+
+ if (zone.get_id().empty()) {
+ zone.set_id(orig_id);
+ }
+
+ if (zone.get_id().empty()) {
+ cerr << "no zone name id the json provided, assuming old format" << std::endl;
+ if (zone_name.empty()) {
+ cerr << "missing zone name" << std::endl;
+ return EINVAL;
+ }
+ zone.set_name(zone_name);
+ zone.set_id(zone_name);
+ }
+
+ cerr << "zone id " << zone.get_id();
+ ret = zone.fix_pool_names();
+ if (ret < 0) {
+ cerr << "ERROR: couldn't fix zone: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ ret = zone.write(false);
+ if (ret < 0) {
+ cerr << "ERROR: couldn't create zone: " << cpp_strerror(-ret) << std::endl;
+ return 1;
+ }
+
+ if (set_default) {
+ ret = zone.set_as_default();
+ if (ret < 0) {
+ cerr << "failed to set zone " << zone_name << " as default: " << cpp_strerror(-ret) << std::endl;
+ }
+ }
+
+ encode_json("zone", zone, formatter);
+ formatter->flush(cout);
+ }
+ break;
+ case OPT_ZONE_LIST:
+ {
+ list<string> zones;
+ int ret = store->svc.zone->list_zones(zones);
+ if (ret < 0) {
+ cerr << "failed to list zones: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ RGWZoneParams zone;
+ ret = zone.init(g_ceph_context, store->svc.sysobj, false);
+ if (ret < 0) {
+ cerr << "failed to init zone: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ string default_zone;
+ ret = zone.read_default_id(default_zone);
+ if (ret < 0 && ret != -ENOENT) {
+ cerr << "could not determine default zone: " << cpp_strerror(-ret) << std::endl;
+ }
+ formatter->open_object_section("zones_list");
+ encode_json("default_info", default_zone, formatter);
+ encode_json("zones", zones, formatter);
+ formatter->close_section();
+ formatter->flush(cout);
+ }
+ break;
+ case OPT_ZONE_MODIFY:
+ {
+ RGWZoneParams zone(zone_id, zone_name);
+ int ret = zone.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0) {
+ cerr << "failed to init zone: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ bool need_zone_update = false;
+ if (!access_key.empty()) {
+ zone.system_key.id = access_key;
+ need_zone_update = true;
+ }
+
+ if (!secret_key.empty()) {
+ zone.system_key.key = secret_key;
+ need_zone_update = true;
+ }
+
+ if (!realm_id.empty()) {
+ zone.realm_id = realm_id;
+ need_zone_update = true;
+ } else if (!realm_name.empty()) {
+ // get realm id from name
+ RGWRealm realm{g_ceph_context, store->svc.sysobj};
+ ret = realm.read_id(realm_name, zone.realm_id);
+ if (ret < 0) {
+ cerr << "failed to find realm by name " << realm_name << std::endl;
+ return -ret;
+ }
+ need_zone_update = true;
+ }
+
+ if (tier_config_add.size() > 0) {
+ for (auto add : tier_config_add) {
+ int r = zone.tier_config.set(add.first, add.second);
+ if (r < 0) {
+ cerr << "ERROR: failed to set configurable: " << add << std::endl;
+ return EINVAL;
+ }
+ }
+ need_zone_update = true;
+ }
+
+ for (auto rm : tier_config_rm) {
+ if (!rm.first.empty()) { /* otherwise will remove the entire config */
+ zone.tier_config.erase(rm.first);
+ need_zone_update = true;
+ }
+ }
+
+ if (need_zone_update) {
+ ret = zone.update();
+ if (ret < 0) {
+ cerr << "failed to save zone info: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ }
+
+ RGWZoneGroup zonegroup(zonegroup_id, zonegroup_name);
+ ret = zonegroup.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0) {
+ cerr << "failed to init zonegroup: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ string *ptier_type = (tier_type_specified ? &tier_type : nullptr);
+
+ bool *psync_from_all = (sync_from_all_specified ? &sync_from_all : nullptr);
+ string *predirect_zone = (redirect_zone_set ? &redirect_zone : nullptr);
+
+ ret = zonegroup.add_zone(zone,
+ (is_master_set ? &is_master : NULL),
+ (is_read_only_set ? &read_only : NULL),
+ endpoints, ptier_type,
+ psync_from_all, sync_from, sync_from_rm,
+ predirect_zone,
+ store->svc.sync_modules->get_manager());
+ if (ret < 0) {
+ cerr << "failed to update zonegroup: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ ret = zonegroup.update();
+ if (ret < 0) {
+ cerr << "failed to update zonegroup: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ if (set_default) {
+ ret = zone.set_as_default();
+ if (ret < 0) {
+ cerr << "failed to set zone " << zone_name << " as default: " << cpp_strerror(-ret) << std::endl;
+ }
+ }
+
+ encode_json("zone", zone, formatter);
+ formatter->flush(cout);
+ }
+ break;
+ case OPT_ZONE_RENAME:
+ {
+ if (zone_new_name.empty()) {
+ cerr << " missing zone new name" << std::endl;
+ return EINVAL;
+ }
+ if (zone_id.empty() && zone_name.empty()) {
+ cerr << "no zonegroup name or id provided" << std::endl;
+ return EINVAL;
+ }
+ RGWZoneParams zone(zone_id,zone_name);
+ int ret = zone.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0) {
+ cerr << "unable to initialize zone: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ ret = zone.rename(zone_new_name);
+ if (ret < 0) {
+ cerr << "failed to rename zone " << zone_name << " to " << zone_new_name << ": " << cpp_strerror(-ret)
+ << std::endl;
+ return -ret;
+ }
+ RGWZoneGroup zonegroup(zonegroup_id, zonegroup_name);
+ ret = zonegroup.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0) {
+ cerr << "WARNING: failed to initialize zonegroup " << zonegroup_name << std::endl;
+ } else {
+ ret = zonegroup.rename_zone(zone);
+ if (ret < 0) {
+ cerr << "Error in zonegroup rename for " << zone_name << ": " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ }
+ }
+ break;
+ case OPT_ZONE_PLACEMENT_ADD:
+ case OPT_ZONE_PLACEMENT_MODIFY:
+ case OPT_ZONE_PLACEMENT_RM:
+ {
+ if (placement_id.empty()) {
+ cerr << "ERROR: --placement-id not specified" << std::endl;
+ return EINVAL;
+ }
+ // validate compression type
+ if (compression_type && *compression_type != "random"
+ && !Compressor::get_comp_alg_type(*compression_type)) {
+ std::cerr << "Unrecognized compression type" << std::endl;
+ return EINVAL;
+ }
+
+ RGWZoneParams zone(zone_id, zone_name);
+ int ret = zone.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0) {
+ cerr << "failed to init zone: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ if (opt_cmd == OPT_ZONE_PLACEMENT_ADD ||
+ opt_cmd == OPT_ZONE_PLACEMENT_MODIFY) {
+ RGWZoneGroup zonegroup(zonegroup_id, zonegroup_name);
+ ret = zonegroup.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0) {
+ cerr << "failed to init zonegroup: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ auto ptiter = zonegroup.placement_targets.find(placement_id);
+ if (ptiter == zonegroup.placement_targets.end()) {
+ cerr << "ERROR: placement id '" << placement_id << "' is not configured in zonegroup placement targets" << std::endl;
+ return EINVAL;
+ }
+
+ storage_class = rgw_placement_rule::get_canonical_storage_class(storage_class);
+ if (ptiter->second.storage_classes.find(storage_class) == ptiter->second.storage_classes.end()) {
+ cerr << "ERROR: storage class '" << storage_class << "' is not defined in zonegroup '" << placement_id << "' placement target" << std::endl;
+ return EINVAL;
+ }
+
+ RGWZonePlacementInfo& info = zone.placement_pools[placement_id];
+
+ string opt_index_pool = index_pool.value_or(string());
+ string opt_data_pool = data_pool.value_or(string());
+
+ if (!opt_index_pool.empty()) {
+ info.index_pool = opt_index_pool;
+ }
+
+ if (info.index_pool.empty()) {
+ cerr << "ERROR: index pool not configured, need to specify --index-pool" << std::endl;
+ return EINVAL;
+ }
+
+ if (opt_data_pool.empty()) {
+ const RGWZoneStorageClass *porig_sc{nullptr};
+ if (info.storage_classes.find(storage_class, &porig_sc)) {
+ if (porig_sc->data_pool) {
+ opt_data_pool = porig_sc->data_pool->to_str();
+ }
+ }
+ if (opt_data_pool.empty()) {
+ cerr << "ERROR: data pool not configured, need to specify --data-pool" << std::endl;
+ return EINVAL;
+ }
+ }
+
+ rgw_pool dp = opt_data_pool;
+ info.storage_classes.set_storage_class(storage_class, &dp, compression_type.get_ptr());
+
+ if (data_extra_pool) {
+ info.data_extra_pool = *data_extra_pool;
+ }
+ if (index_type_specified) {
+ info.index_type = placement_index_type;
+ }
+
+ ret = check_pool_support_omap(info.get_data_extra_pool());
+ if (ret < 0) {
+ cerr << "ERROR: the data extra (non-ec) pool '" << info.get_data_extra_pool()
+ << "' does not support omap" << std::endl;
+ return ret;
+ }
+ } else if (opt_cmd == OPT_ZONE_PLACEMENT_RM) {
+ zone.placement_pools.erase(placement_id);
+ }
+
+ ret = zone.update();
+ if (ret < 0) {
+ cerr << "failed to save zone info: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ encode_json("zone", zone, formatter);
+ formatter->flush(cout);
+ }
+ break;
+ case OPT_ZONE_PLACEMENT_LIST:
+ {
+ RGWZoneParams zone(zone_id, zone_name);
+ int ret = zone.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0) {
+ cerr << "unable to initialize zone: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ encode_json("placement_pools", zone.placement_pools, formatter);
+ formatter->flush(cout);
+ }
+ break;
+ case OPT_ZONE_PLACEMENT_GET:
+ {
+ if (placement_id.empty()) {
+ cerr << "ERROR: --placement-id not specified" << std::endl;
+ return EINVAL;
+ }
+
+ RGWZoneParams zone(zone_id, zone_name);
+ int ret = zone.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0) {
+ cerr << "unable to initialize zone: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ auto p = zone.placement_pools.find(placement_id);
+ if (p == zone.placement_pools.end()) {
+ cerr << "ERROR: zone placement target '" << placement_id << "' not found" << std::endl;
+ return -ENOENT;
+ }
+ encode_json("placement_pools", p->second, formatter);
+ formatter->flush(cout);
+ }
+ break;
+ }
+ return 0;
+ }
+
+ bool non_master_cmd = (!store->svc.zone->is_meta_master() && !yes_i_really_mean_it);
+ std::set<int> non_master_ops_list = {OPT_USER_CREATE, OPT_USER_RM,
+ OPT_USER_MODIFY, OPT_USER_ENABLE,
+ OPT_USER_SUSPEND, OPT_SUBUSER_CREATE,
+ OPT_SUBUSER_MODIFY, OPT_SUBUSER_RM,
+ OPT_BUCKET_LINK, OPT_BUCKET_UNLINK,
+ OPT_BUCKET_RESHARD, OPT_BUCKET_RM,
+ OPT_METADATA_PUT, OPT_METADATA_RM,
+ OPT_RESHARD_CANCEL, OPT_RESHARD_ADD,
+ OPT_MFA_CREATE, OPT_MFA_REMOVE,
+ OPT_MFA_RESYNC, OPT_CAPS_ADD,
+ OPT_CAPS_RM};
+
+ bool print_warning_message = (non_master_ops_list.find(opt_cmd) != non_master_ops_list.end() &&
+ non_master_cmd);
+
+ if (print_warning_message) {
+ cerr << "Please run the command on master zone. Performing this operation on non-master zone leads to inconsistent metadata between zones" << std::endl;
+ cerr << "Are you sure you want to go ahead? (requires --yes-i-really-mean-it)" << std::endl;
+ return EINVAL;
+ }
+
+ if (!user_id.empty()) {
+ user_op.set_user_id(user_id);
+ bucket_op.set_user_id(user_id);
+ }
+
+ if (!display_name.empty())
+ user_op.set_display_name(display_name);
+
+ if (!user_email.empty())
+ user_op.set_user_email(user_email);
+
+ if (!access_key.empty())
+ user_op.set_access_key(access_key);
+
+ if (!secret_key.empty())
+ user_op.set_secret_key(secret_key);
+
+ if (!subuser.empty())
+ user_op.set_subuser(subuser);
+
+ if (!caps.empty())
+ user_op.set_caps(caps);
+
+ user_op.set_purge_data(purge_data);
+
+ if (purge_keys)
+ user_op.set_purge_keys();
+
+ if (gen_access_key)
+ user_op.set_generate_key();
+
+ if (gen_secret_key)
+ user_op.set_gen_secret(); // assume that a key pair should be created
+
+ if (max_buckets_specified)
+ user_op.set_max_buckets(max_buckets);
+
+ if (admin_specified)
+ user_op.set_admin(admin);
+
+ if (system_specified)
+ user_op.set_system(system);
+
+ if (set_perm)
+ user_op.set_perm(perm_mask);
+
+ if (set_temp_url_key) {
+ map<int, string>::iterator iter = temp_url_keys.begin();
+ for (; iter != temp_url_keys.end(); ++iter) {
+ user_op.set_temp_url_key(iter->second, iter->first);
+ }
+ }
+
+ if (!op_mask_str.empty()) {
+ uint32_t op_mask;
+ int ret = rgw_parse_op_type_list(op_mask_str, &op_mask);
+ if (ret < 0) {
+ cerr << "failed to parse op_mask: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ user_op.set_op_mask(op_mask);
+ }
+
+ if (key_type != KEY_TYPE_UNDEFINED)
+ user_op.set_key_type(key_type);
+
+ // set suspension operation parameters
+ if (opt_cmd == OPT_USER_ENABLE)
+ user_op.set_suspension(false);
+ else if (opt_cmd == OPT_USER_SUSPEND)
+ user_op.set_suspension(true);
+
+ // RGWUser to use for user operations
+ RGWUser user;
+ int ret = 0;
+ if (!(user_id.empty() && access_key.empty()) || !subuser.empty()) {
+ ret = user.init(store, user_op);
+ if (ret < 0) {
+ cerr << "user.init failed: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ }
+
+ /* populate bucket operation */
+ bucket_op.set_bucket_name(bucket_name);
+ bucket_op.set_object(object);
+ bucket_op.set_check_objects(check_objects);
+ bucket_op.set_delete_children(delete_child_objects);
+ bucket_op.set_fix_index(fix);
+ bucket_op.set_max_aio(max_concurrent_ios);
+
+ // required to gather errors from operations
+ std::string err_msg;
+
+ bool output_user_info = true;
+
+ switch (opt_cmd) {
+ case OPT_USER_INFO:
+ if (user_id.empty() && access_key.empty()) {
+ cerr << "ERROR: --uid or --access-key required" << std::endl;
+ return EINVAL;
+ }
+ break;
+ case OPT_USER_CREATE:
+ if (!user_op.has_existing_user()) {
+ user_op.set_generate_key(); // generate a new key by default
+ }
+ ret = user.add(user_op, &err_msg);
+ if (ret < 0) {
+ cerr << "could not create user: " << err_msg << std::endl;
+ if (ret == -ERR_INVALID_TENANT_NAME)
+ ret = -EINVAL;
+
+ return -ret;
+ }
+ if (!subuser.empty()) {
+ ret = user.subusers.add(user_op, &err_msg);
+ if (ret < 0) {
+ cerr << "could not create subuser: " << err_msg << std::endl;
+ return -ret;
+ }
+ }
+ break;
+ case OPT_USER_RM:
+ ret = user.remove(user_op, &err_msg);
+ if (ret < 0) {
+ cerr << "could not remove user: " << err_msg << std::endl;
+ return -ret;
+ }
+
+ output_user_info = false;
+ break;
+ case OPT_USER_ENABLE:
+ case OPT_USER_SUSPEND:
+ case OPT_USER_MODIFY:
+ ret = user.modify(user_op, &err_msg);
+ if (ret < 0) {
+ cerr << "could not modify user: " << err_msg << std::endl;
+ return -ret;
+ }
+
+ break;
+ case OPT_SUBUSER_CREATE:
+ ret = user.subusers.add(user_op, &err_msg);
+ if (ret < 0) {
+ cerr << "could not create subuser: " << err_msg << std::endl;
+ return -ret;
+ }
+
+ break;
+ case OPT_SUBUSER_MODIFY:
+ ret = user.subusers.modify(user_op, &err_msg);
+ if (ret < 0) {
+ cerr << "could not modify subuser: " << err_msg << std::endl;
+ return -ret;
+ }
+
+ break;
+ case OPT_SUBUSER_RM:
+ ret = user.subusers.remove(user_op, &err_msg);
+ if (ret < 0) {
+ cerr << "could not remove subuser: " << err_msg << std::endl;
+ return -ret;
+ }
+
+ break;
+ case OPT_CAPS_ADD:
+ ret = user.caps.add(user_op, &err_msg);
+ if (ret < 0) {
+ cerr << "could not add caps: " << err_msg << std::endl;
+ return -ret;
+ }
+
+ break;
+ case OPT_CAPS_RM:
+ ret = user.caps.remove(user_op, &err_msg);
+ if (ret < 0) {
+ cerr << "could not remove caps: " << err_msg << std::endl;
+ return -ret;
+ }
+
+ break;
+ case OPT_KEY_CREATE:
+ ret = user.keys.add(user_op, &err_msg);
+ if (ret < 0) {
+ cerr << "could not create key: " << err_msg << std::endl;
+ return -ret;
+ }
+
+ break;
+ case OPT_KEY_RM:
+ ret = user.keys.remove(user_op, &err_msg);
+ if (ret < 0) {
+ cerr << "could not remove key: " << err_msg << std::endl;
+ return -ret;
+ }
+ break;
+ case OPT_PERIOD_PUSH:
+ {
+ RGWEnv env;
+ req_info info(g_ceph_context, &env);
+ info.method = "POST";
+ info.request_uri = "/admin/realm/period";
+
+ map<string, string> &params = info.args.get_params();
+ if (!realm_id.empty())
+ params["realm_id"] = realm_id;
+ if (!realm_name.empty())
+ params["realm_name"] = realm_name;
+ if (!period_id.empty())
+ params["period_id"] = period_id;
+ if (!period_epoch.empty())
+ params["epoch"] = period_epoch;
+
+ // load the period
+ RGWPeriod period(period_id);
+ int ret = period.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0) {
+ cerr << "period init failed: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ // json format into a bufferlist
+ JSONFormatter jf(false);
+ encode_json("period", period, &jf);
+ bufferlist bl;
+ jf.flush(bl);
+
+ JSONParser p;
+ ret = send_to_remote_or_url(nullptr, url, access_key, secret_key,
+ info, bl, p);
+ if (ret < 0) {
+ cerr << "request failed: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ }
+ return 0;
+ case OPT_PERIOD_UPDATE:
+ {
+ int ret = update_period(realm_id, realm_name, period_id, period_epoch,
+ commit, remote, url, access_key, secret_key,
+ formatter, yes_i_really_mean_it);
+ if (ret < 0) {
+ return -ret;
+ }
+ }
+ return 0;
+ case OPT_PERIOD_COMMIT:
+ {
+ // read realm and staging period
+ RGWRealm realm(realm_id, realm_name);
+ int ret = realm.init(g_ceph_context, store->svc.sysobj);
+ if (ret < 0) {
+ cerr << "Error initializing realm: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ RGWPeriod period(RGWPeriod::get_staging_id(realm.get_id()), 1);
+ ret = period.init(g_ceph_context, store->svc.sysobj, realm.get_id());
+ if (ret < 0) {
+ cerr << "period init failed: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ ret = commit_period(realm, period, remote, url, access_key, secret_key,
+ yes_i_really_mean_it);
+ if (ret < 0) {
+ cerr << "failed to commit period: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ encode_json("period", period, formatter);
+ formatter->flush(cout);
+ }
+ return 0;
+ case OPT_ROLE_CREATE:
+ {
+ if (role_name.empty()) {
+ cerr << "ERROR: role name is empty" << std::endl;
+ return -EINVAL;
+ }
+
+ if (assume_role_doc.empty()) {
+ cerr << "ERROR: assume role policy document is empty" << std::endl;
+ return -EINVAL;
+ }
+ bufferlist bl = bufferlist::static_from_string(assume_role_doc);
+ try {
+ const rgw::IAM::Policy p(g_ceph_context, tenant, bl);
+ } catch (rgw::IAM::PolicyParseException& e) {
+ cerr << "failed to parse policy: " << e.what() << std::endl;
+ return -EINVAL;
+ }
+ RGWRole role(g_ceph_context, store, role_name, path, assume_role_doc, tenant);
+ ret = role.create(true);
+ if (ret < 0) {
+ return -ret;
+ }
+ show_role_info(role, formatter);
+ return 0;
+ }
+ case OPT_ROLE_DELETE:
+ {
+ if (role_name.empty()) {
+ cerr << "ERROR: empty role name" << std::endl;
+ return -EINVAL;
+ }
+ RGWRole role(g_ceph_context, store, role_name, tenant);
+ ret = role.delete_obj();
+ if (ret < 0) {
+ return -ret;
+ }
+ cout << "role: " << role_name << " successfully deleted" << std::endl;
+ return 0;
+ }
+ case OPT_ROLE_GET:
+ {
+ if (role_name.empty()) {
+ cerr << "ERROR: empty role name" << std::endl;
+ return -EINVAL;
+ }
+ RGWRole role(g_ceph_context, store, role_name, tenant);
+ ret = role.get();
+ if (ret < 0) {
+ return -ret;
+ }
+ show_role_info(role, formatter);
+ return 0;
+ }
+ case OPT_ROLE_MODIFY:
+ {
+ if (role_name.empty()) {
+ cerr << "ERROR: role name is empty" << std::endl;
+ return -EINVAL;
+ }
+
+ if (assume_role_doc.empty()) {
+ cerr << "ERROR: assume role policy document is empty" << std::endl;
+ return -EINVAL;
+ }
+
+ bufferlist bl = bufferlist::static_from_string(assume_role_doc);
+ try {
+ const rgw::IAM::Policy p(g_ceph_context, tenant, bl);
+ } catch (rgw::IAM::PolicyParseException& e) {
+ cerr << "failed to parse policy: " << e.what() << std::endl;
+ return -EINVAL;
+ }
+
+ RGWRole role(g_ceph_context, store, role_name, tenant);
+ ret = role.get();
+ if (ret < 0) {
+ return -ret;
+ }
+ role.update_trust_policy(assume_role_doc);
+ ret = role.update();
+ if (ret < 0) {
+ return -ret;
+ }
+ cout << "Assume role policy document updated successfully for role: " << role_name << std::endl;
+ return 0;
+ }
+ case OPT_ROLE_LIST:
+ {
+ vector<RGWRole> result;
+ ret = RGWRole::get_roles_by_path_prefix(store, g_ceph_context, path_prefix, tenant, result);
+ if (ret < 0) {
+ return -ret;
+ }
+ show_roles_info(result, formatter);
+ return 0;
+ }
+ case OPT_ROLE_POLICY_PUT:
+ {
+ if (role_name.empty()) {
+ cerr << "role name is empty" << std::endl;
+ return -EINVAL;
+ }
+
+ if (policy_name.empty()) {
+ cerr << "policy name is empty" << std::endl;
+ return -EINVAL;
+ }
+
+ if (perm_policy_doc.empty()) {
+ cerr << "permission policy document is empty" << std::endl;
+ return -EINVAL;
+ }
+
+ bufferlist bl = bufferlist::static_from_string(perm_policy_doc);
+ try {
+ const rgw::IAM::Policy p(g_ceph_context, tenant, bl);
+ } catch (rgw::IAM::PolicyParseException& e) {
+ cerr << "failed to parse perm policy: " << e.what() << std::endl;
+ return -EINVAL;
+ }
+
+ RGWRole role(g_ceph_context, store, role_name, tenant);
+ ret = role.get();
+ if (ret < 0) {
+ return -ret;
+ }
+ role.set_perm_policy(policy_name, perm_policy_doc);
+ ret = role.update();
+ if (ret < 0) {
+ return -ret;
+ }
+ cout << "Permission policy attached successfully" << std::endl;
+ return 0;
+ }
+ case OPT_ROLE_POLICY_LIST:
+ {
+ if (role_name.empty()) {
+ cerr << "ERROR: Role name is empty" << std::endl;
+ return -EINVAL;
+ }
+ RGWRole role(g_ceph_context, store, role_name, tenant);
+ ret = role.get();
+ if (ret < 0) {
+ return -ret;
+ }
+ std::vector<string> policy_names = role.get_role_policy_names();
+ show_policy_names(policy_names, formatter);
+ return 0;
+ }
+ case OPT_ROLE_POLICY_GET:
+ {
+ if (role_name.empty()) {
+ cerr << "ERROR: role name is empty" << std::endl;
+ return -EINVAL;
+ }
+
+ if (policy_name.empty()) {
+ cerr << "ERROR: policy name is empty" << std::endl;
+ return -EINVAL;
+ }
+ RGWRole role(g_ceph_context, store, role_name, tenant);
+ int ret = role.get();
+ if (ret < 0) {
+ return -ret;
+ }
+ string perm_policy;
+ ret = role.get_role_policy(policy_name, perm_policy);
+ if (ret < 0) {
+ return -ret;
+ }
+ show_perm_policy(perm_policy, formatter);
+ return 0;
+ }
+ case OPT_ROLE_POLICY_DELETE:
+ {
+ if (role_name.empty()) {
+ cerr << "ERROR: role name is empty" << std::endl;
+ return -EINVAL;
+ }
+
+ if (policy_name.empty()) {
+ cerr << "ERROR: policy name is empty" << std::endl;
+ return -EINVAL;
+ }
+ RGWRole role(g_ceph_context, store, role_name, tenant);
+ ret = role.get();
+ if (ret < 0) {
+ return -ret;
+ }
+ ret = role.delete_policy(policy_name);
+ if (ret < 0) {
+ return -ret;
+ }
+ ret = role.update();
+ if (ret < 0) {
+ return -ret;
+ }
+ cout << "Policy: " << policy_name << " successfully deleted for role: "
+ << role_name << std::endl;
+ return 0;
+ }
+ default:
+ output_user_info = false;
+ }
+
+ // output the result of a user operation
+ if (output_user_info) {
+ ret = user.info(info, &err_msg);
+ if (ret < 0) {
+ cerr << "could not fetch user info: " << err_msg << std::endl;
+ return -ret;
+ }
+ show_user_info(info, formatter);
+ }
+
+ if (opt_cmd == OPT_POLICY) {
+ if (format == "xml") {
+ int ret = RGWBucketAdminOp::dump_s3_policy(store, bucket_op, cout);
+ if (ret < 0) {
+ cerr << "ERROR: failed to get policy: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ } else {
+ int ret = RGWBucketAdminOp::get_policy(store, bucket_op, f);
+ if (ret < 0) {
+ cerr << "ERROR: failed to get policy: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ }
+ }
+
+ if (opt_cmd == OPT_BUCKET_LIMIT_CHECK) {
+ void *handle;
+ std::list<std::string> user_ids;
+ metadata_key = "user";
+ int max = 1000;
+
+ bool truncated;
+
+ if (! user_id.empty()) {
+ user_ids.push_back(user_id.id);
+ ret =
+ RGWBucketAdminOp::limit_check(store, bucket_op, user_ids, f,
+ warnings_only);
+ } else {
+ /* list users in groups of max-keys, then perform user-bucket
+ * limit-check on each group */
+ ret = store->meta_mgr->list_keys_init(metadata_key, &handle);
+ if (ret < 0) {
+ cerr << "ERROR: buckets limit check can't get user metadata_key: "
+ << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ do {
+ ret = store->meta_mgr->list_keys_next(handle, max, user_ids,
+ &truncated);
+ if (ret < 0 && ret != -ENOENT) {
+ cerr << "ERROR: buckets limit check lists_keys_next(): "
+ << cpp_strerror(-ret) << std::endl;
+ break;
+ } else {
+ /* ok, do the limit checks for this group */
+ ret =
+ RGWBucketAdminOp::limit_check(store, bucket_op, user_ids, f,
+ warnings_only);
+ if (ret < 0)
+ break;
+ }
+ user_ids.clear();
+ } while (truncated);
+ store->meta_mgr->list_keys_complete(handle);
+ }
+ return -ret;
+ } /* OPT_BUCKET_LIMIT_CHECK */
+
+ if (opt_cmd == OPT_BUCKETS_LIST) {
+ if (bucket_name.empty()) {
+ if (!user_id.empty()) {
+ if (!user_op.has_existing_user()) {
+ cerr << "ERROR: could not find user: " << user_id << std::endl;
+ return -ENOENT;
+ }
+ }
+ RGWBucketAdminOp::info(store, bucket_op, f);
+ } else {
+ RGWBucketInfo bucket_info;
+ int ret = init_bucket(tenant, bucket_name, bucket_id, bucket_info, bucket);
+ if (ret < 0) {
+ cerr << "ERROR: could not init bucket: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ formatter->open_array_section("entries");
+
+ bool truncated = false;
+ int count = 0;
+
+ static constexpr int MAX_PAGINATE_SIZE = 10000;
+ static constexpr int DEFAULT_MAX_ENTRIES = 1000;
+
+ if (max_entries < 0) {
+ max_entries = DEFAULT_MAX_ENTRIES;
+ }
+ const int paginate_size = std::min(max_entries, MAX_PAGINATE_SIZE);
+
+ string prefix;
+ string delim;
+ vector<rgw_bucket_dir_entry> result;
+ map<string, bool> common_prefixes;
+ string ns;
+
+ RGWRados::Bucket target(store, bucket_info);
+ RGWRados::Bucket::List list_op(&target);
+
+ list_op.params.prefix = prefix;
+ list_op.params.delim = delim;
+ list_op.params.marker = rgw_obj_key(marker);
+ list_op.params.ns = ns;
+ list_op.params.enforce_ns = false;
+ list_op.params.list_versions = true;
+ list_op.params.allow_unordered = bool(allow_unordered);
+
+ do {
+ const int remaining = max_entries - count;
+ ret = list_op.list_objects(std::min(remaining, paginate_size),
+ &result, &common_prefixes, &truncated);
+ if (ret < 0) {
+ cerr << "ERROR: store->list_objects(): " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ count += result.size();
+
+ for (const auto& entry : result) {
+ encode_json("entry", entry, formatter);
+ }
+ formatter->flush(cout);
+ } while (truncated && count < max_entries);
+
+ formatter->close_section();
+ formatter->flush(cout);
+ } /* have bucket_name */
+ } /* OPT_BUCKETS_LIST */
+
+ if (opt_cmd == OPT_BUCKET_RADOS_LIST) {
+ RGWRadosList lister(store,
+ max_concurrent_ios, orphan_stale_secs, tenant);
+ if (bucket_name.empty()) {
+ ret = lister.run();
+ } else {
+ ret = lister.run(bucket_name);
+ }
+
+ if (ret < 0) {
+ std::cerr <<
+ "ERROR: bucket radoslist failed to finish before " <<
+ "encountering error: " << cpp_strerror(-ret) << std::endl;
+ std::cerr << "************************************"
+ "************************************" << std::endl;
+ std::cerr << "WARNING: THE RESULTS ARE NOT RELIABLE AND SHOULD NOT " <<
+ "BE USED IN DELETING ORPHANS" << std::endl;
+ std::cerr << "************************************"
+ "************************************" << std::endl;
+ return -ret;
+ }
+ }
+
+ if (opt_cmd == OPT_BUCKET_STATS) {
+ if (bucket_name.empty() && !bucket_id.empty()) {
+ rgw_bucket bucket;
+ if (!rgw_find_bucket_by_id(store->ctx(), store->meta_mgr, marker, bucket_id, &bucket)) {
+ cerr << "failure: no such bucket id" << std::endl;
+ return -ENOENT;
+ }
+ bucket_op.set_tenant(bucket.tenant);
+ bucket_op.set_bucket_name(bucket.name);
+ }
+ bucket_op.set_fetch_stats(true);
+
+ int r = RGWBucketAdminOp::info(store, bucket_op, f);
+ if (r < 0) {
+ cerr << "failure: " << cpp_strerror(-r) << ": " << err << std::endl;
+ return -r;
+ }
+ }
+
+ if (opt_cmd == OPT_BUCKET_LINK) {
+ bucket_op.set_bucket_id(bucket_id);
+ string err;
+ int r = RGWBucketAdminOp::link(store, bucket_op, &err);
+ if (r < 0) {
+ cerr << "failure: " << cpp_strerror(-r) << ": " << err << std::endl;
+ return -r;
+ }
+ }
+
+ if (opt_cmd == OPT_BUCKET_UNLINK) {
+ int r = RGWBucketAdminOp::unlink(store, bucket_op);
+ if (r < 0) {
+ cerr << "failure: " << cpp_strerror(-r) << std::endl;
+ return -r;
+ }
+ }
+
+ if (opt_cmd == OPT_LOG_LIST) {
+ // filter by date?
+ if (date.size() && date.size() != 10) {
+ cerr << "bad date format for '" << date << "', expect YYYY-MM-DD" << std::endl;
+ return EINVAL;
+ }
+
+ formatter->reset();
+ formatter->open_array_section("logs");
+ RGWAccessHandle h;
+ int r = store->log_list_init(date, &h);
+ if (r == -ENOENT) {
+ // no logs.
+ } else {
+ if (r < 0) {
+ cerr << "log list: error " << r << std::endl;
+ return -r;
+ }
+ while (true) {
+ string name;
+ int r = store->log_list_next(h, &name);
+ if (r == -ENOENT)
+ break;
+ if (r < 0) {
+ cerr << "log list: error " << r << std::endl;
+ return -r;
+ }
+ formatter->dump_string("object", name);
+ }
+ }
+ formatter->close_section();
+ formatter->flush(cout);
+ cout << std::endl;
+ }
+
+ if (opt_cmd == OPT_LOG_SHOW || opt_cmd == OPT_LOG_RM) {
+ if (object.empty() && (date.empty() || bucket_name.empty() || bucket_id.empty())) {
+ cerr << "specify an object or a date, bucket and bucket-id" << std::endl;
+ exit(1);
+ }
+
+ string oid;
+ if (!object.empty()) {
+ oid = object;
+ } else {
+ oid = date;
+ oid += "-";
+ oid += bucket_id;
+ oid += "-";
+ oid += bucket_name;
+ }
+
+ if (opt_cmd == OPT_LOG_SHOW) {
+ RGWAccessHandle h;
+
+ int r = store->log_show_init(oid, &h);
+ if (r < 0) {
+ cerr << "error opening log " << oid << ": " << cpp_strerror(-r) << std::endl;
+ return -r;
+ }
+
+ formatter->reset();
+ formatter->open_object_section("log");
+
+ struct rgw_log_entry entry;
+
+ // peek at first entry to get bucket metadata
+ r = store->log_show_next(h, &entry);
+ if (r < 0) {
+ cerr << "error reading log " << oid << ": " << cpp_strerror(-r) << std::endl;
+ return -r;
+ }
+ formatter->dump_string("bucket_id", entry.bucket_id);
+ formatter->dump_string("bucket_owner", entry.bucket_owner.to_str());
+ formatter->dump_string("bucket", entry.bucket);
+
+ uint64_t agg_time = 0;
+ uint64_t agg_bytes_sent = 0;
+ uint64_t agg_bytes_received = 0;
+ uint64_t total_entries = 0;
+
+ if (show_log_entries)
+ formatter->open_array_section("log_entries");
+
+ do {
+ using namespace std::chrono;
+ uint64_t total_time = duration_cast<milliseconds>(entry.total_time).count();
+
+ agg_time += total_time;
+ agg_bytes_sent += entry.bytes_sent;
+ agg_bytes_received += entry.bytes_received;
+ total_entries++;
+
+ if (skip_zero_entries && entry.bytes_sent == 0 &&
+ entry.bytes_received == 0)
+ goto next;
+
+ if (show_log_entries) {
+
+ rgw_format_ops_log_entry(entry, formatter);
+ formatter->flush(cout);
+ }
+next:
+ r = store->log_show_next(h, &entry);
+ } while (r > 0);
+
+ if (r < 0) {
+ cerr << "error reading log " << oid << ": " << cpp_strerror(-r) << std::endl;
+ return -r;
+ }
+ if (show_log_entries)
+ formatter->close_section();
+
+ if (show_log_sum) {
+ formatter->open_object_section("log_sum");
+ formatter->dump_int("bytes_sent", agg_bytes_sent);
+ formatter->dump_int("bytes_received", agg_bytes_received);
+ formatter->dump_int("total_time", agg_time);
+ formatter->dump_int("total_entries", total_entries);
+ formatter->close_section();
+ }
+ formatter->close_section();
+ formatter->flush(cout);
+ cout << std::endl;
+ }
+ if (opt_cmd == OPT_LOG_RM) {
+ int r = store->log_remove(oid);
+ if (r < 0) {
+ cerr << "error removing log " << oid << ": " << cpp_strerror(-r) << std::endl;
+ return -r;
+ }
+ }
+ }
+
+ if (opt_cmd == OPT_POOL_ADD) {
+ if (pool_name.empty()) {
+ cerr << "need to specify pool to add!" << std::endl;
+ exit(1);
+ }
+
+ int ret = store->svc.zone->add_bucket_placement(pool);
+ if (ret < 0)
+ cerr << "failed to add bucket placement: " << cpp_strerror(-ret) << std::endl;
+ }
+
+ if (opt_cmd == OPT_POOL_RM) {
+ if (pool_name.empty()) {
+ cerr << "need to specify pool to remove!" << std::endl;
+ exit(1);
+ }
+
+ int ret = store->svc.zone->remove_bucket_placement(pool);
+ if (ret < 0)
+ cerr << "failed to remove bucket placement: " << cpp_strerror(-ret) << std::endl;
+ }
+
+ if (opt_cmd == OPT_POOLS_LIST) {
+ set<rgw_pool> pools;
+ int ret = store->svc.zone->list_placement_set(pools);
+ if (ret < 0) {
+ cerr << "could not list placement set: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ formatter->reset();
+ formatter->open_array_section("pools");
+ for (auto siter = pools.begin(); siter != pools.end(); ++siter) {
+ formatter->open_object_section("pool");
+ formatter->dump_string("name", siter->to_str());
+ formatter->close_section();
+ }
+ formatter->close_section();
+ formatter->flush(cout);
+ cout << std::endl;
+ }
+
+ if (opt_cmd == OPT_USAGE_SHOW) {
+ uint64_t start_epoch = 0;
+ uint64_t end_epoch = (uint64_t)-1;
+
+ int ret;
+
+ if (!start_date.empty()) {
+ ret = utime_t::parse_date(start_date, &start_epoch, NULL);
+ if (ret < 0) {
+ cerr << "ERROR: failed to parse start date" << std::endl;
+ return 1;
+ }
+ }
+ if (!end_date.empty()) {
+ ret = utime_t::parse_date(end_date, &end_epoch, NULL);
+ if (ret < 0) {
+ cerr << "ERROR: failed to parse end date" << std::endl;
+ return 1;
+ }
+ }
+
+
+ ret = RGWUsage::show(store, user_id, bucket_name, start_epoch, end_epoch,
+ show_log_entries, show_log_sum, &categories,
+ f);
+ if (ret < 0) {
+ cerr << "ERROR: failed to show usage" << std::endl;
+ return 1;
+ }
+ }
+
+ if (opt_cmd == OPT_USAGE_TRIM) {
+ if (user_id.empty() && bucket_name.empty() &&
+ start_date.empty() && end_date.empty() && !yes_i_really_mean_it) {
+ cerr << "usage trim without user/date/bucket specified will remove *all* users data" << std::endl;
+ cerr << "do you really mean it? (requires --yes-i-really-mean-it)" << std::endl;
+ return 1;
+ }
+ int ret;
+ uint64_t start_epoch = 0;
+ uint64_t end_epoch = (uint64_t)-1;
+
+
+ if (!start_date.empty()) {
+ ret = utime_t::parse_date(start_date, &start_epoch, NULL);
+ if (ret < 0) {
+ cerr << "ERROR: failed to parse start date" << std::endl;
+ return 1;
+ }
+ }
+
+ if (!end_date.empty()) {
+ ret = utime_t::parse_date(end_date, &end_epoch, NULL);
+ if (ret < 0) {
+ cerr << "ERROR: failed to parse end date" << std::endl;
+ return 1;
+ }
+ }
+
+ ret = RGWUsage::trim(store, user_id, bucket_name, start_epoch, end_epoch);
+ if (ret < 0) {
+ cerr << "ERROR: read_usage() returned ret=" << ret << std::endl;
+ return 1;
+ }
+ }
+
+ if (opt_cmd == OPT_USAGE_CLEAR) {
+ if (!yes_i_really_mean_it) {
+ cerr << "usage clear would remove *all* users usage data for all time" << std::endl;
+ cerr << "do you really mean it? (requires --yes-i-really-mean-it)" << std::endl;
+ return 1;
+ }
+
+ ret = RGWUsage::clear(store);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+
+ if (opt_cmd == OPT_OLH_GET || opt_cmd == OPT_OLH_READLOG) {
+ if (bucket_name.empty()) {
+ cerr << "ERROR: bucket not specified" << std::endl;
+ return EINVAL;
+ }
+ if (object.empty()) {
+ cerr << "ERROR: object not specified" << std::endl;
+ return EINVAL;
+ }
+ }
+
+ if (opt_cmd == OPT_OLH_GET) {
+ RGWBucketInfo bucket_info;
+ int ret = init_bucket(tenant, bucket_name, bucket_id, bucket_info, bucket);
+ if (ret < 0) {
+ cerr << "ERROR: could not init bucket: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ RGWOLHInfo olh;
+ rgw_obj obj(bucket, object);
+ ret = store->get_olh(bucket_info, obj, &olh);
+ if (ret < 0) {
+ cerr << "ERROR: failed reading olh: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ encode_json("olh", olh, formatter);
+ formatter->flush(cout);
+ }
+
+ if (opt_cmd == OPT_OLH_READLOG) {
+ RGWBucketInfo bucket_info;
+ int ret = init_bucket(tenant, bucket_name, bucket_id, bucket_info, bucket);
+ if (ret < 0) {
+ cerr << "ERROR: could not init bucket: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ map<uint64_t, vector<rgw_bucket_olh_log_entry> > log;
+ bool is_truncated;
+
+ RGWObjectCtx rctx(store);
+ rgw_obj obj(bucket, object);
+
+ RGWObjState *state;
+
+ ret = store->get_obj_state(&rctx, bucket_info, obj, &state, false); /* don't follow olh */
+ if (ret < 0) {
+ return -ret;
+ }
+
+ ret = store->bucket_index_read_olh_log(bucket_info, *state, obj, 0, &log, &is_truncated);
+ if (ret < 0) {
+ cerr << "ERROR: failed reading olh: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ formatter->open_object_section("result");
+ encode_json("is_truncated", is_truncated, formatter);
+ encode_json("log", log, formatter);
+ formatter->close_section();
+ formatter->flush(cout);
+ }
+
+ if (opt_cmd == OPT_BI_GET) {
+ if (bucket_name.empty()) {
+ cerr << "ERROR: bucket name not specified" << std::endl;
+ return EINVAL;
+ }
+ if (object.empty()) {
+ cerr << "ERROR: object not specified" << std::endl;
+ return EINVAL;
+ }
+ RGWBucketInfo bucket_info;
+ int ret = init_bucket(tenant, bucket_name, bucket_id, bucket_info, bucket);
+ if (ret < 0) {
+ cerr << "ERROR: could not init bucket: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ rgw_obj obj(bucket, object);
+ if (!object_version.empty()) {
+ obj.key.set_instance(object_version);
+ }
+
+ rgw_cls_bi_entry entry;
+
+ ret = store->bi_get(bucket_info, obj, bi_index_type, &entry);
+ if (ret < 0) {
+ cerr << "ERROR: bi_get(): " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ encode_json("entry", entry, formatter);
+ formatter->flush(cout);
+ }
+
+ if (opt_cmd == OPT_BI_PUT) {
+ if (bucket_name.empty()) {
+ cerr << "ERROR: bucket name not specified" << std::endl;
+ return EINVAL;
+ }
+ RGWBucketInfo bucket_info;
+ int ret = init_bucket(tenant, bucket_name, bucket_id, bucket_info, bucket);
+ if (ret < 0) {
+ cerr << "ERROR: could not init bucket: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ rgw_cls_bi_entry entry;
+ cls_rgw_obj_key key;
+ ret = read_decode_json(infile, entry, &key);
+ if (ret < 0) {
+ return 1;
+ }
+
+ rgw_obj obj(bucket, key);
+
+ ret = store->bi_put(bucket, obj, entry);
+ if (ret < 0) {
+ cerr << "ERROR: bi_put(): " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ }
+
+ if (opt_cmd == OPT_BI_LIST) {
+ if (bucket_name.empty()) {
+ cerr << "ERROR: bucket name not specified" << std::endl;
+ return EINVAL;
+ }
+ RGWBucketInfo bucket_info;
+ int ret = init_bucket(tenant, bucket_name, bucket_id, bucket_info, bucket);
+ if (ret < 0) {
+ cerr << "ERROR: could not init bucket: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ list<rgw_cls_bi_entry> entries;
+ bool is_truncated;
+ if (max_entries < 0) {
+ max_entries = 1000;
+ }
+
+ int max_shards = (bucket_info.num_shards > 0 ? bucket_info.num_shards : 1);
+
+ formatter->open_array_section("entries");
+
+ for (int i = 0; i < max_shards; i++) {
+ RGWRados::BucketShard bs(store);
+ int shard_id = (bucket_info.num_shards > 0 ? i : -1);
+ int ret = bs.init(bucket, shard_id, nullptr /* no RGWBucketInfo */);
+ marker.clear();
+
+ if (ret < 0) {
+ cerr << "ERROR: bs.init(bucket=" << bucket << ", shard=" << shard_id << "): " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ do {
+ entries.clear();
+ ret = store->bi_list(bs, object, marker, max_entries, &entries, &is_truncated);
+ if (ret < 0) {
+ cerr << "ERROR: bi_list(): " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ list<rgw_cls_bi_entry>::iterator iter;
+ for (iter = entries.begin(); iter != entries.end(); ++iter) {
+ rgw_cls_bi_entry& entry = *iter;
+ encode_json("entry", entry, formatter);
+ marker = entry.idx;
+ }
+ formatter->flush(cout);
+ } while (is_truncated);
+ formatter->flush(cout);
+ }
+ formatter->close_section();
+ formatter->flush(cout);
+ }
+
+ if (opt_cmd == OPT_BI_PURGE) {
+ if (bucket_name.empty()) {
+ cerr << "ERROR: bucket name not specified" << std::endl;
+ return EINVAL;
+ }
+ RGWBucketInfo bucket_info;
+ int ret = init_bucket(tenant, bucket_name, bucket_id, bucket_info, bucket);
+ if (ret < 0) {
+ cerr << "ERROR: could not init bucket: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ RGWBucketInfo cur_bucket_info;
+ rgw_bucket cur_bucket;
+ ret = init_bucket(tenant, bucket_name, string(), cur_bucket_info, cur_bucket);
+ if (ret < 0) {
+ cerr << "ERROR: could not init current bucket info for bucket_name=" << bucket_name << ": " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ if (cur_bucket_info.bucket.bucket_id == bucket_info.bucket.bucket_id && !yes_i_really_mean_it) {
+ cerr << "specified bucket instance points to a current bucket instance" << std::endl;
+ cerr << "do you really mean it? (requires --yes-i-really-mean-it)" << std::endl;
+ return EINVAL;
+ }
+
+ int max_shards = (bucket_info.num_shards > 0 ? bucket_info.num_shards : 1);
+
+ for (int i = 0; i < max_shards; i++) {
+ RGWRados::BucketShard bs(store);
+ int shard_id = (bucket_info.num_shards > 0 ? i : -1);
+ int ret = bs.init(bucket, shard_id, nullptr /* no RGWBucketInfo */);
+ if (ret < 0) {
+ cerr << "ERROR: bs.init(bucket=" << bucket << ", shard=" << shard_id << "): " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ ret = store->bi_remove(bs);
+ if (ret < 0) {
+ cerr << "ERROR: failed to remove bucket index object: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ }
+ }
+
+ if (opt_cmd == OPT_OBJECT_PUT) {
+ if (bucket_name.empty()) {
+ cerr << "ERROR: bucket not specified" << std::endl;
+ return EINVAL;
+ }
+ if (object.empty()) {
+ cerr << "ERROR: object not specified" << std::endl;
+ return EINVAL;
+ }
+
+ RGWDataAccess data_access(store);
+ rgw_obj_key key(object, object_version);
+
+ RGWDataAccess::BucketRef b;
+ RGWDataAccess::ObjectRef obj;
+
+ int ret = data_access.get_bucket(tenant, bucket_name, bucket_id, &b);
+ if (ret < 0) {
+ cerr << "ERROR: failed to init bucket: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ ret = b->get_object(key, &obj);
+ if (ret < 0) {
+ cerr << "ERROR: failed to get object: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ bufferlist bl;
+ ret = read_input(infile, bl);
+ if (ret < 0) {
+ cerr << "ERROR: failed to read input: " << cpp_strerror(-ret) << std::endl;
+ }
+
+ map<string, bufferlist> attrs;
+ ret = obj->put(bl, attrs);
+ if (ret < 0) {
+ cerr << "ERROR: put object returned error: " << cpp_strerror(-ret) << std::endl;
+ }
+ }
+
+ if (opt_cmd == OPT_OBJECT_RM) {
+ RGWBucketInfo bucket_info;
+ int ret = init_bucket(tenant, bucket_name, bucket_id, bucket_info, bucket);
+ if (ret < 0) {
+ cerr << "ERROR: could not init bucket: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ rgw_obj_key key(object, object_version);
+ ret = rgw_remove_object(store, bucket_info, bucket, key);
+
+ if (ret < 0) {
+ cerr << "ERROR: object remove returned: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ }
+
+ if (opt_cmd == OPT_OBJECT_REWRITE) {
+ if (bucket_name.empty()) {
+ cerr << "ERROR: bucket not specified" << std::endl;
+ return EINVAL;
+ }
+ if (object.empty()) {
+ cerr << "ERROR: object not specified" << std::endl;
+ return EINVAL;
+ }
+
+ RGWBucketInfo bucket_info;
+ int ret = init_bucket(tenant, bucket_name, bucket_id, bucket_info, bucket);
+ if (ret < 0) {
+ cerr << "ERROR: could not init bucket: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ rgw_obj obj(bucket, object);
+ obj.key.set_instance(object_version);
+ bool need_rewrite = true;
+ if (min_rewrite_stripe_size > 0) {
+ ret = check_min_obj_stripe_size(store, bucket_info, obj, min_rewrite_stripe_size, &need_rewrite);
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "WARNING: check_min_obj_stripe_size failed, r=" << ret << dendl;
+ }
+ }
+ if (need_rewrite) {
+ ret = store->rewrite_obj(bucket_info, obj);
+ if (ret < 0) {
+ cerr << "ERROR: object rewrite returned: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ } else {
+ ldout(store->ctx(), 20) << "skipped object" << dendl;
+ }
+ }
+
+ if (opt_cmd == OPT_OBJECTS_EXPIRE) {
+ if (!store->process_expire_objects()) {
+ cerr << "ERROR: process_expire_objects() processing returned error." << std::endl;
+ return 1;
+ }
+ }
+
+ if (opt_cmd == OPT_OBJECTS_EXPIRE_STALE_LIST) {
+ ret = RGWBucketAdminOp::fix_obj_expiry(store, bucket_op, f, true);
+ if (ret < 0) {
+ cerr << "ERROR: listing returned " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ }
+
+ if (opt_cmd == OPT_OBJECTS_EXPIRE_STALE_RM) {
+ ret = RGWBucketAdminOp::fix_obj_expiry(store, bucket_op, f, false);
+ if (ret < 0) {
+ cerr << "ERROR: removing returned " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ }
+
+ if (opt_cmd == OPT_BUCKET_REWRITE) {
+ if (bucket_name.empty()) {
+ cerr << "ERROR: bucket not specified" << std::endl;
+ return EINVAL;
+ }
+
+ RGWBucketInfo bucket_info;
+ int ret = init_bucket(tenant, bucket_name, bucket_id, bucket_info, bucket);
+ if (ret < 0) {
+ cerr << "ERROR: could not init bucket: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ uint64_t start_epoch = 0;
+ uint64_t end_epoch = 0;
+
+ if (!end_date.empty()) {
+ int ret = utime_t::parse_date(end_date, &end_epoch, NULL);
+ if (ret < 0) {
+ cerr << "ERROR: failed to parse end date" << std::endl;
+ return EINVAL;
+ }
+ }
+ if (!start_date.empty()) {
+ int ret = utime_t::parse_date(start_date, &start_epoch, NULL);
+ if (ret < 0) {
+ cerr << "ERROR: failed to parse start date" << std::endl;
+ return EINVAL;
+ }
+ }
+
+ bool is_truncated = true;
+
+ rgw_obj_index_key marker;
+ string prefix;
+
+ formatter->open_object_section("result");
+ formatter->dump_string("bucket", bucket_name);
+ formatter->open_array_section("objects");
+
+ constexpr uint32_t NUM_ENTRIES = 1000;
+ uint16_t expansion_factor = 1;
+ while (is_truncated) {
+ map<string, rgw_bucket_dir_entry> result;
+ int r =
+ store->cls_bucket_list_ordered(bucket_info, RGW_NO_SHARD, marker,
+ prefix, NUM_ENTRIES, true, expansion_factor,
+ result, &is_truncated, &marker,
+ bucket_object_check_filter);
+ if (r < 0 && r != -ENOENT) {
+ cerr << "ERROR: failed operation r=" << r << std::endl;
+ } else if (r == -ENOENT) {
+ break;
+ }
+
+ if (result.size() < NUM_ENTRIES / 8) {
+ ++expansion_factor;
+ } else if (result.size() > NUM_ENTRIES * 7 / 8 &&
+ expansion_factor > 1) {
+ --expansion_factor;
+ }
+
+ map<string, rgw_bucket_dir_entry>::iterator iter;
+ for (iter = result.begin(); iter != result.end(); ++iter) {
+ rgw_obj_key key = iter->second.key;
+ rgw_bucket_dir_entry& entry = iter->second;
+
+ formatter->open_object_section("object");
+ formatter->dump_string("name", key.name);
+ formatter->dump_string("instance", key.instance);
+ formatter->dump_int("size", entry.meta.size);
+ utime_t ut(entry.meta.mtime);
+ ut.gmtime(formatter->dump_stream("mtime"));
+
+ if ((entry.meta.size < min_rewrite_size) ||
+ (entry.meta.size > max_rewrite_size) ||
+ (start_epoch > 0 && start_epoch > (uint64_t)ut.sec()) ||
+ (end_epoch > 0 && end_epoch < (uint64_t)ut.sec())) {
+ formatter->dump_string("status", "Skipped");
+ } else {
+ rgw_obj obj(bucket, key);
+
+ bool need_rewrite = true;
+ if (min_rewrite_stripe_size > 0) {
+ r = check_min_obj_stripe_size(store, bucket_info, obj, min_rewrite_stripe_size, &need_rewrite);
+ if (r < 0) {
+ ldout(store->ctx(), 0) << "WARNING: check_min_obj_stripe_size failed, r=" << r << dendl;
+ }
+ }
+ if (!need_rewrite) {
+ formatter->dump_string("status", "Skipped");
+ } else {
+ r = store->rewrite_obj(bucket_info, obj);
+ if (r == 0) {
+ formatter->dump_string("status", "Success");
+ } else {
+ formatter->dump_string("status", cpp_strerror(-r));
+ }
+ }
+ }
+ formatter->dump_int("flags", entry.flags);
+
+ formatter->close_section();
+ formatter->flush(cout);
+ }
+ }
+ formatter->close_section();
+ formatter->close_section();
+ formatter->flush(cout);
+ }
+
+ if (opt_cmd == OPT_BUCKET_RESHARD) {
+ rgw_bucket bucket;
+ RGWBucketInfo bucket_info;
+ map<string, bufferlist> attrs;
+
+ int ret = check_reshard_bucket_params(store,
+ bucket_name,
+ tenant,
+ bucket_id,
+ num_shards_specified,
+ num_shards,
+ yes_i_really_mean_it,
+ bucket,
+ bucket_info,
+ attrs);
+ if (ret < 0) {
+ return ret;
+ }
+
+ RGWBucketReshard br(store, bucket_info, attrs, nullptr /* no callback */);
+
+#define DEFAULT_RESHARD_MAX_ENTRIES 1000
+ if (max_entries < 1) {
+ max_entries = DEFAULT_RESHARD_MAX_ENTRIES;
+ }
+
+ return br.execute(num_shards, max_entries,
+ verbose, &cout, formatter);
+ }
+
+ if (opt_cmd == OPT_RESHARD_ADD) {
+ rgw_bucket bucket;
+ RGWBucketInfo bucket_info;
+ map<string, bufferlist> attrs;
+
+ int ret = check_reshard_bucket_params(store,
+ bucket_name,
+ tenant,
+ bucket_id,
+ num_shards_specified,
+ num_shards,
+ yes_i_really_mean_it,
+ bucket,
+ bucket_info,
+ attrs);
+ if (ret < 0) {
+ return ret;
+ }
+
+ int num_source_shards = (bucket_info.num_shards > 0 ? bucket_info.num_shards : 1);
+
+ RGWReshard reshard(store);
+ cls_rgw_reshard_entry entry;
+ entry.time = real_clock::now();
+ entry.tenant = tenant;
+ entry.bucket_name = bucket_name;
+ entry.bucket_id = bucket_info.bucket.bucket_id;
+ entry.old_num_shards = num_source_shards;
+ entry.new_num_shards = num_shards;
+
+ return reshard.add(entry);
+ }
+
+ if (opt_cmd == OPT_RESHARD_LIST) {
+ list<cls_rgw_reshard_entry> entries;
+ int ret;
+ int count = 0;
+ if (max_entries < 0) {
+ max_entries = 1000;
+ }
+
+ int num_logshards =
+ store->ctx()->_conf.get_val<uint64_t>("rgw_reshard_num_logs");
+
+ RGWReshard reshard(store);
+
+ formatter->open_array_section("reshard");
+ for (int i = 0; i < num_logshards; i++) {
+ bool is_truncated = true;
+ string marker;
+ do {
+ entries.clear();
+ ret = reshard.list(i, marker, max_entries, entries, &is_truncated);
+ if (ret < 0) {
+ cerr << "Error listing resharding buckets: " << cpp_strerror(-ret) << std::endl;
+ return ret;
+ }
+ for (auto iter=entries.begin(); iter != entries.end(); ++iter) {
+ cls_rgw_reshard_entry& entry = *iter;
+ encode_json("entry", entry, formatter);
+ entry.get_key(&marker);
+ }
+ count += entries.size();
+ formatter->flush(cout);
+ } while (is_truncated && count < max_entries);
+
+ if (count >= max_entries) {
+ break;
+ }
+ }
+
+ formatter->close_section();
+ formatter->flush(cout);
+ return 0;
+ }
+
+ if (opt_cmd == OPT_RESHARD_STATUS) {
+ if (bucket_name.empty()) {
+ cerr << "ERROR: bucket not specified" << std::endl;
+ return EINVAL;
+ }
+
+ rgw_bucket bucket;
+ RGWBucketInfo bucket_info;
+ map<string, bufferlist> attrs;
+ ret = init_bucket(tenant, bucket_name, bucket_id, bucket_info, bucket, &attrs);
+ if (ret < 0) {
+ cerr << "ERROR: could not init bucket: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ RGWBucketReshard br(store, bucket_info, attrs, nullptr /* no callback */);
+ list<cls_rgw_bucket_instance_entry> status;
+ int r = br.get_status(&status);
+ if (r < 0) {
+ cerr << "ERROR: could not get resharding status for bucket " <<
+ bucket_name << std::endl;
+ return -r;
+ }
+
+ show_reshard_status(status, formatter);
+ }
+
+ if (opt_cmd == OPT_RESHARD_PROCESS) {
+ RGWReshard reshard(store, true, &cout);
+
+ int ret = reshard.process_all_logshards();
+ if (ret < 0) {
+ cerr << "ERROR: failed to process reshard logs, error=" << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ }
+
+ if (opt_cmd == OPT_RESHARD_CANCEL) {
+ if (bucket_name.empty()) {
+ cerr << "ERROR: bucket not specified" << std::endl;
+ return EINVAL;
+ }
+
+ rgw_bucket bucket;
+ RGWBucketInfo bucket_info;
+ map<string, bufferlist> attrs;
+ bool bucket_initable = true;
+ ret = init_bucket(tenant, bucket_name, bucket_id, bucket_info, bucket,
+ &attrs);
+ if (ret < 0) {
+ if (yes_i_really_mean_it) {
+ bucket_initable = false;
+ } else {
+ cerr << "ERROR: could not init bucket: " << cpp_strerror(-ret) <<
+ "; if you want to cancel the reshard request nonetheless, please "
+ "use the --yes-i-really-mean-it option" << std::endl;
+ return -ret;
+ }
+ }
+
+ if (bucket_initable) {
+ // we did not encounter an error, so let's work with the bucket
+ RGWBucketReshard br(store, bucket_info, attrs,
+ nullptr /* no callback */);
+ int ret = br.cancel();
+ if (ret < 0) {
+ if (ret == -EBUSY) {
+ cerr << "There is ongoing resharding, please retry after " <<
+ store->ctx()->_conf.get_val<uint64_t>(
+ "rgw_reshard_bucket_lock_duration") <<
+ " seconds " << std::endl;
+ } else {
+ cerr << "Error canceling bucket " << bucket_name <<
+ " resharding: " << cpp_strerror(-ret) << std::endl;
+ }
+ return ret;
+ }
+ }
+
+ RGWReshard reshard(store);
+
+ cls_rgw_reshard_entry entry;
+ entry.tenant = tenant;
+ entry.bucket_name = bucket_name;
+ //entry.bucket_id = bucket_id;
+
+ ret = reshard.remove(entry);
+ if (ret < 0 && ret != -ENOENT) {
+ cerr << "Error in updating reshard log with bucket " <<
+ bucket_name << ": " << cpp_strerror(-ret) << std::endl;
+ return ret;
+ }
+ } // OPT_RESHARD_CANCEL
+
+ if (opt_cmd == OPT_OBJECT_UNLINK) {
+ RGWBucketInfo bucket_info;
+ int ret = init_bucket(tenant, bucket_name, bucket_id, bucket_info, bucket);
+ if (ret < 0) {
+ cerr << "ERROR: could not init bucket: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ list<rgw_obj_index_key> oid_list;
+ rgw_obj_key key(object, object_version);
+ rgw_obj_index_key index_key;
+ key.get_index_key(&index_key);
+ oid_list.push_back(index_key);
+ ret = store->remove_objs_from_index(bucket_info, oid_list);
+ if (ret < 0) {
+ cerr << "ERROR: remove_obj_from_index() returned error: " << cpp_strerror(-ret) << std::endl;
+ return 1;
+ }
+ }
+
+ if (opt_cmd == OPT_OBJECT_STAT) {
+ RGWBucketInfo bucket_info;
+ int ret = init_bucket(tenant, bucket_name, bucket_id, bucket_info, bucket);
+ if (ret < 0) {
+ cerr << "ERROR: could not init bucket: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ rgw_obj obj(bucket, object);
+ obj.key.set_instance(object_version);
+
+ uint64_t obj_size;
+ map<string, bufferlist> attrs;
+ RGWObjectCtx obj_ctx(store);
+ RGWRados::Object op_target(store, bucket_info, obj_ctx, obj);
+ RGWRados::Object::Read read_op(&op_target);
+
+ read_op.params.attrs = &attrs;
+ read_op.params.obj_size = &obj_size;
+
+ ret = read_op.prepare();
+ if (ret < 0) {
+ cerr << "ERROR: failed to stat object, returned error: " << cpp_strerror(-ret) << std::endl;
+ return 1;
+ }
+ formatter->open_object_section("object_metadata");
+ formatter->dump_string("name", object);
+ formatter->dump_unsigned("size", obj_size);
+
+ map<string, bufferlist>::iterator iter;
+ map<string, bufferlist> other_attrs;
+ for (iter = attrs.begin(); iter != attrs.end(); ++iter) {
+ bufferlist& bl = iter->second;
+ bool handled = false;
+ if (iter->first == RGW_ATTR_MANIFEST) {
+ handled = decode_dump<RGWObjManifest>("manifest", bl, formatter);
+ } else if (iter->first == RGW_ATTR_ACL) {
+ handled = decode_dump<RGWAccessControlPolicy>("policy", bl, formatter);
+ } else if (iter->first == RGW_ATTR_ID_TAG) {
+ handled = dump_string("tag", bl, formatter);
+ } else if (iter->first == RGW_ATTR_ETAG) {
+ handled = dump_string("etag", bl, formatter);
+ } else if (iter->first == RGW_ATTR_COMPRESSION) {
+ handled = decode_dump<RGWCompressionInfo>("compression", bl, formatter);
+ } else if (iter->first == RGW_ATTR_DELETE_AT) {
+ handled = decode_dump<utime_t>("delete_at", bl, formatter);
+ }
+
+ if (!handled)
+ other_attrs[iter->first] = bl;
+ }
+
+ formatter->open_object_section("attrs");
+ for (iter = other_attrs.begin(); iter != other_attrs.end(); ++iter) {
+ dump_string(iter->first.c_str(), iter->second, formatter);
+ }
+ formatter->close_section();
+ formatter->close_section();
+ formatter->flush(cout);
+ }
+
+ if (opt_cmd == OPT_BUCKET_CHECK) {
+ if (check_head_obj_locator) {
+ if (bucket_name.empty()) {
+ cerr << "ERROR: need to specify bucket name" << std::endl;
+ return EINVAL;
+ }
+ do_check_object_locator(tenant, bucket_name, fix, remove_bad, formatter);
+ } else {
+ RGWBucketAdminOp::check_index(store, bucket_op, f);
+ }
+ }
+
+ if (opt_cmd == OPT_BUCKET_RM) {
+ if (!inconsistent_index) {
+ RGWBucketAdminOp::remove_bucket(store, bucket_op, bypass_gc, true);
+ } else {
+ if (!yes_i_really_mean_it) {
+ cerr << "using --inconsistent_index can corrupt the bucket index " << std::endl
+ << "do you really mean it? (requires --yes-i-really-mean-it)" << std::endl;
+ return 1;
+ }
+ RGWBucketAdminOp::remove_bucket(store, bucket_op, bypass_gc, false);
+ }
+ }
+
+ if (opt_cmd == OPT_GC_LIST) {
+ int index = 0;
+ bool truncated;
+ formatter->open_array_section("entries");
+
+ do {
+ list<cls_rgw_gc_obj_info> result;
+ int ret = store->list_gc_objs(&index, marker, 1000, !include_all, result, &truncated);
+ if (ret < 0) {
+ cerr << "ERROR: failed to list objs: " << cpp_strerror(-ret) << std::endl;
+ return 1;
+ }
+
+
+ list<cls_rgw_gc_obj_info>::iterator iter;
+ for (iter = result.begin(); iter != result.end(); ++iter) {
+ cls_rgw_gc_obj_info& info = *iter;
+ formatter->open_object_section("chain_info");
+ formatter->dump_string("tag", info.tag);
+ formatter->dump_stream("time") << info.time;
+ formatter->open_array_section("objs");
+ list<cls_rgw_obj>::iterator liter;
+ cls_rgw_obj_chain& chain = info.chain;
+ for (liter = chain.objs.begin(); liter != chain.objs.end(); ++liter) {
+ cls_rgw_obj& obj = *liter;
+ encode_json("obj", obj, formatter);
+ }
+ formatter->close_section(); // objs
+ formatter->close_section(); // obj_chain
+ formatter->flush(cout);
+ }
+ } while (truncated);
+ formatter->close_section();
+ formatter->flush(cout);
+ }
+
+ if (opt_cmd == OPT_GC_PROCESS) {
+ int ret = store->process_gc(!include_all);
+ if (ret < 0) {
+ cerr << "ERROR: gc processing returned error: " << cpp_strerror(-ret) << std::endl;
+ return 1;
+ }
+ }
+
+ if (opt_cmd == OPT_LC_LIST) {
+ formatter->open_array_section("lifecycle_list");
+ map<string, int> bucket_lc_map;
+ string marker;
+#define MAX_LC_LIST_ENTRIES 100
+ if (max_entries < 0) {
+ max_entries = MAX_LC_LIST_ENTRIES;
+ }
+ do {
+ int ret = store->list_lc_progress(marker, max_entries, &bucket_lc_map);
+ if (ret < 0) {
+ cerr << "ERROR: failed to list objs: " << cpp_strerror(-ret) << std::endl;
+ return 1;
+ }
+ map<string, int>::iterator iter;
+ for (iter = bucket_lc_map.begin(); iter != bucket_lc_map.end(); ++iter) {
+ formatter->open_object_section("bucket_lc_info");
+ formatter->dump_string("bucket", iter->first);
+ string lc_status = LC_STATUS[iter->second];
+ formatter->dump_string("status", lc_status);
+ formatter->close_section(); // objs
+ formatter->flush(cout);
+ marker = iter->first;
+ }
+ } while (!bucket_lc_map.empty());
+
+ formatter->close_section(); //lifecycle list
+ formatter->flush(cout);
+ }
+
+
+ if (opt_cmd == OPT_LC_GET) {
+ if (bucket_name.empty()) {
+ cerr << "ERROR: bucket not specified" << std::endl;
+ return EINVAL;
+ }
+
+ rgw_bucket bucket;
+ RGWBucketInfo bucket_info;
+ map<string, bufferlist> attrs;
+ RGWLifecycleConfiguration config;
+ ret = init_bucket(tenant, bucket_name, bucket_id, bucket_info, bucket, &attrs);
+ if (ret < 0) {
+ cerr << "ERROR: could not init bucket: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ auto aiter = attrs.find(RGW_ATTR_LC);
+ if (aiter == attrs.end()) {
+ return -ENOENT;
+ }
+
+ bufferlist::const_iterator iter{&aiter->second};
+ try {
+ config.decode(iter);
+ } catch (const buffer::error& e) {
+ cerr << "ERROR: decode life cycle config failed" << std::endl;
+ return -EIO;
+ }
+
+ encode_json("result", config, formatter);
+ formatter->flush(cout);
+ }
+
+ if (opt_cmd == OPT_LC_PROCESS) {
+ int ret = store->process_lc();
+ if (ret < 0) {
+ cerr << "ERROR: lc processing returned error: " << cpp_strerror(-ret) << std::endl;
+ return 1;
+ }
+ }
+
+
+ if (opt_cmd == OPT_LC_RESHARD_FIX) {
+ ret = RGWBucketAdminOp::fix_lc_shards(store, bucket_op,f);
+ if (ret < 0) {
+ cerr << "ERROR: listing stale instances" << cpp_strerror(-ret) << std::endl;
+ }
+
+ }
+
+ if (opt_cmd == OPT_ORPHANS_FIND) {
+ if (!yes_i_really_mean_it) {
+ cerr << "accidental removal of active objects can not be reversed; "
+ << "do you really mean it? (requires --yes-i-really-mean-it)"
+ << std::endl;
+ return EINVAL;
+ }
+
+ RGWOrphanSearch search(store, max_concurrent_ios, orphan_stale_secs);
+
+ if (job_id.empty()) {
+ cerr << "ERROR: --job-id not specified" << std::endl;
+ return EINVAL;
+ }
+ if (pool_name.empty()) {
+ cerr << "ERROR: --pool not specified" << std::endl;
+ return EINVAL;
+ }
+
+ RGWOrphanSearchInfo info;
+
+ info.pool = pool;
+ info.job_name = job_id;
+ info.num_shards = num_shards;
+
+ int ret = search.init(job_id, &info, detail);
+ if (ret < 0) {
+ cerr << "could not init search, ret=" << ret << std::endl;
+ return -ret;
+ }
+ ret = search.run();
+ if (ret < 0) {
+ return -ret;
+ }
+ }
+
+ if (opt_cmd == OPT_ORPHANS_FINISH) {
+ RGWOrphanSearch search(store, max_concurrent_ios, orphan_stale_secs);
+
+ if (job_id.empty()) {
+ cerr << "ERROR: --job-id not specified" << std::endl;
+ return EINVAL;
+ }
+ int ret = search.init(job_id, NULL);
+ if (ret < 0) {
+ if (ret == -ENOENT) {
+ cerr << "job not found" << std::endl;
+ }
+ return -ret;
+ }
+ ret = search.finish();
+ if (ret < 0) {
+ return -ret;
+ }
+ }
+
+ if (opt_cmd == OPT_ORPHANS_LIST_JOBS){
+ RGWOrphanStore orphan_store(store);
+ int ret = orphan_store.init();
+ if (ret < 0){
+ cerr << "connection to cluster failed!" << std::endl;
+ return -ret;
+ }
+
+ map <string,RGWOrphanSearchState> m;
+ ret = orphan_store.list_jobs(m);
+ if (ret < 0) {
+ cerr << "job list failed" << std::endl;
+ return -ret;
+ }
+ formatter->open_array_section("entries");
+ for (const auto &it: m){
+ if (!extra_info){
+ formatter->dump_string("job-id",it.first);
+ } else {
+ encode_json("orphan_search_state", it.second, formatter);
+ }
+ }
+ formatter->close_section();
+ formatter->flush(cout);
+ }
+
+ if (opt_cmd == OPT_USER_CHECK) {
+ check_bad_user_bucket_mapping(store, user_id, fix);
+ }
+
+ if (opt_cmd == OPT_USER_STATS) {
+ if (user_id.empty()) {
+ cerr << "ERROR: uid not specified" << std::endl;
+ return EINVAL;
+ }
+
+ string user_str = user_id.to_str();
+ if (reset_stats) {
+ if (!bucket_name.empty()) {
+ cerr << "ERROR: --reset-stats does not work on buckets and "
+ "bucket specified" << std::endl;
+ return EINVAL;
+ }
+ if (sync_stats) {
+ cerr << "ERROR: sync-stats includes the reset-stats functionality, "
+ "so at most one of the two should be specified" << std::endl;
+ return EINVAL;
+ }
+ ret = store->cls_user_reset_stats(user_str);
+ if (ret < 0) {
+ cerr << "ERROR: could not reset user stats: " << cpp_strerror(-ret) <<
+ std::endl;
+ return -ret;
+ }
+ }
+
+ if (sync_stats) {
+ if (!bucket_name.empty()) {
+ int ret = rgw_bucket_sync_user_stats(store, tenant, bucket_name);
+ if (ret < 0) {
+ cerr << "ERROR: could not sync bucket stats: " <<
+ cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ } else {
+ int ret = rgw_user_sync_all_stats(store, user_id);
+ if (ret < 0) {
+ cerr << "ERROR: could not sync user stats: " <<
+ cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ }
+ }
+
+ cls_user_header header;
+ int ret = store->cls_user_get_header(user_str, &header);
+ if (ret < 0) {
+ if (ret == -ENOENT) { /* in case of ENOENT */
+ cerr << "User has not been initialized or user does not exist" << std::endl;
+ } else {
+ cerr << "ERROR: can't read user: " << cpp_strerror(ret) << std::endl;
+ }
+ return -ret;
+ }
+
+ encode_json("header", header, formatter);
+ formatter->flush(cout);
+ }
+
+ if (opt_cmd == OPT_METADATA_GET) {
+ int ret = store->meta_mgr->get(metadata_key, formatter);
+ if (ret < 0) {
+ cerr << "ERROR: can't get key: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ formatter->flush(cout);
+ }
+
+ if (opt_cmd == OPT_METADATA_PUT) {
+ bufferlist bl;
+ int ret = read_input(infile, bl);
+ if (ret < 0) {
+ cerr << "ERROR: failed to read input: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ ret = store->meta_mgr->put(metadata_key, bl, RGWMetadataHandler::RGWMetadataHandler::APPLY_ALWAYS);
+ if (ret < 0) {
+ cerr << "ERROR: can't put key: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ }
+
+ if (opt_cmd == OPT_METADATA_RM) {
+ int ret = store->meta_mgr->remove(metadata_key);
+ if (ret < 0) {
+ cerr << "ERROR: can't remove key: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ }
+
+ if (opt_cmd == OPT_METADATA_LIST || opt_cmd == OPT_USER_LIST) {
+ if (opt_cmd == OPT_USER_LIST) {
+ metadata_key = "user";
+ }
+ void *handle;
+ int max = 1000;
+ int ret = store->meta_mgr->list_keys_init(metadata_key, marker, &handle);
+ if (ret < 0) {
+ cerr << "ERROR: can't get key: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ bool truncated;
+ uint64_t count = 0;
+
+ if (max_entries_specified) {
+ formatter->open_object_section("result");
+ }
+ formatter->open_array_section("keys");
+
+ uint64_t left;
+ do {
+ list<string> keys;
+ left = (max_entries_specified ? max_entries - count : max);
+ ret = store->meta_mgr->list_keys_next(handle, left, keys, &truncated);
+ if (ret < 0 && ret != -ENOENT) {
+ cerr << "ERROR: lists_keys_next(): " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ } if (ret != -ENOENT) {
+ for (list<string>::iterator iter = keys.begin(); iter != keys.end(); ++iter) {
+ formatter->dump_string("key", *iter);
+ ++count;
+ }
+ formatter->flush(cout);
+ }
+ } while (truncated && left > 0);
+
+ formatter->close_section();
+
+ if (max_entries_specified) {
+ encode_json("truncated", truncated, formatter);
+ encode_json("count", count, formatter);
+ if (truncated) {
+ encode_json("marker", store->meta_mgr->get_marker(handle), formatter);
+ }
+ formatter->close_section();
+ }
+ formatter->flush(cout);
+
+ store->meta_mgr->list_keys_complete(handle);
+ }
+
+ if (opt_cmd == OPT_MDLOG_LIST) {
+ utime_t start_time, end_time;
+
+ int ret = parse_date_str(start_date, start_time);
+ if (ret < 0)
+ return -ret;
+
+ ret = parse_date_str(end_date, end_time);
+ if (ret < 0)
+ return -ret;
+
+ int i = (specified_shard_id ? shard_id : 0);
+
+ if (period_id.empty()) {
+ int ret = read_current_period_id(store, realm_id, realm_name, &period_id);
+ if (ret < 0) {
+ return -ret;
+ }
+ std::cerr << "No --period given, using current period="
+ << period_id << std::endl;
+ }
+ RGWMetadataLog *meta_log = store->meta_mgr->get_log(period_id);
+
+ formatter->open_array_section("entries");
+ for (; i < g_ceph_context->_conf->rgw_md_log_max_shards; i++) {
+ void *handle;
+ list<cls_log_entry> entries;
+
+
+ meta_log->init_list_entries(i, start_time.to_real_time(), end_time.to_real_time(), marker, &handle);
+ bool truncated;
+ do {
+ int ret = meta_log->list_entries(handle, 1000, entries, NULL, &truncated);
+ if (ret < 0) {
+ cerr << "ERROR: meta_log->list_entries(): " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ for (list<cls_log_entry>::iterator iter = entries.begin(); iter != entries.end(); ++iter) {
+ cls_log_entry& entry = *iter;
+ store->meta_mgr->dump_log_entry(entry, formatter);
+ }
+ formatter->flush(cout);
+ } while (truncated);
+
+ meta_log->complete_list_entries(handle);
+
+ if (specified_shard_id)
+ break;
+ }
+
+
+ formatter->close_section();
+ formatter->flush(cout);
+ }
+
+ if (opt_cmd == OPT_MDLOG_STATUS) {
+ int i = (specified_shard_id ? shard_id : 0);
+
+ if (period_id.empty()) {
+ int ret = read_current_period_id(store, realm_id, realm_name, &period_id);
+ if (ret < 0) {
+ return -ret;
+ }
+ std::cerr << "No --period given, using current period="
+ << period_id << std::endl;
+ }
+ RGWMetadataLog *meta_log = store->meta_mgr->get_log(period_id);
+
+ formatter->open_array_section("entries");
+
+ for (; i < g_ceph_context->_conf->rgw_md_log_max_shards; i++) {
+ RGWMetadataLogInfo info;
+ meta_log->get_info(i, &info);
+
+ ::encode_json("info", info, formatter);
+
+ if (specified_shard_id)
+ break;
+ }
+
+
+ formatter->close_section();
+ formatter->flush(cout);
+ }
+
+ if (opt_cmd == OPT_MDLOG_AUTOTRIM) {
+ // need a full history for purging old mdlog periods
+ store->meta_mgr->init_oldest_log_period();
+
+ RGWCoroutinesManager crs(store->ctx(), store->get_cr_registry());
+ RGWHTTPManager http(store->ctx(), crs.get_completion_mgr());
+ int ret = http.start();
+ if (ret < 0) {
+ cerr << "failed to initialize http client with " << cpp_strerror(ret) << std::endl;
+ return -ret;
+ }
+
+ auto num_shards = g_conf()->rgw_md_log_max_shards;
+ ret = crs.run(create_admin_meta_log_trim_cr(dpp(), store, &http, num_shards));
+ if (ret < 0) {
+ cerr << "automated mdlog trim failed with " << cpp_strerror(ret) << std::endl;
+ return -ret;
+ }
+ }
+
+ if (opt_cmd == OPT_MDLOG_TRIM) {
+ utime_t start_time, end_time;
+
+ if (!specified_shard_id) {
+ cerr << "ERROR: shard-id must be specified for trim operation" << std::endl;
+ return EINVAL;
+ }
+
+ int ret = parse_date_str(start_date, start_time);
+ if (ret < 0)
+ return -ret;
+
+ ret = parse_date_str(end_date, end_time);
+ if (ret < 0)
+ return -ret;
+
+ if (period_id.empty()) {
+ std::cerr << "missing --period argument" << std::endl;
+ return EINVAL;
+ }
+ RGWMetadataLog *meta_log = store->meta_mgr->get_log(period_id);
+
+ // trim until -ENODATA
+ do {
+ ret = meta_log->trim(shard_id, start_time.to_real_time(),
+ end_time.to_real_time(), start_marker, end_marker);
+ } while (ret == 0);
+ if (ret < 0 && ret != -ENODATA) {
+ cerr << "ERROR: meta_log->trim(): " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ }
+
+ if (opt_cmd == OPT_SYNC_STATUS) {
+ sync_status(formatter);
+ }
+
+ if (opt_cmd == OPT_METADATA_SYNC_STATUS) {
+ RGWMetaSyncStatusManager sync(store, store->get_async_rados());
+
+ int ret = sync.init();
+ if (ret < 0) {
+ cerr << "ERROR: sync.init() returned ret=" << ret << std::endl;
+ return -ret;
+ }
+
+ rgw_meta_sync_status sync_status;
+ ret = sync.read_sync_status(&sync_status);
+ if (ret < 0) {
+ cerr << "ERROR: sync.read_sync_status() returned ret=" << ret << std::endl;
+ return -ret;
+ }
+
+ formatter->open_object_section("summary");
+ encode_json("sync_status", sync_status, formatter);
+
+ uint64_t full_total = 0;
+ uint64_t full_complete = 0;
+
+ for (auto marker_iter : sync_status.sync_markers) {
+ full_total += marker_iter.second.total_entries;
+ if (marker_iter.second.state == rgw_meta_sync_marker::SyncState::FullSync) {
+ full_complete += marker_iter.second.pos;
+ } else {
+ full_complete += marker_iter.second.total_entries;
+ }
+ }
+
+ formatter->open_object_section("full_sync");
+ encode_json("total", full_total, formatter);
+ encode_json("complete", full_complete, formatter);
+ formatter->close_section();
+ formatter->close_section();
+
+ formatter->flush(cout);
+
+ }
+
+ if (opt_cmd == OPT_METADATA_SYNC_INIT) {
+ RGWMetaSyncStatusManager sync(store, store->get_async_rados());
+
+ int ret = sync.init();
+ if (ret < 0) {
+ cerr << "ERROR: sync.init() returned ret=" << ret << std::endl;
+ return -ret;
+ }
+ ret = sync.init_sync_status();
+ if (ret < 0) {
+ cerr << "ERROR: sync.init_sync_status() returned ret=" << ret << std::endl;
+ return -ret;
+ }
+ }
+
+
+ if (opt_cmd == OPT_METADATA_SYNC_RUN) {
+ RGWMetaSyncStatusManager sync(store, store->get_async_rados());
+
+ int ret = sync.init();
+ if (ret < 0) {
+ cerr << "ERROR: sync.init() returned ret=" << ret << std::endl;
+ return -ret;
+ }
+
+ ret = sync.run();
+ if (ret < 0) {
+ cerr << "ERROR: sync.run() returned ret=" << ret << std::endl;
+ return -ret;
+ }
+ }
+
+ if (opt_cmd == OPT_DATA_SYNC_STATUS) {
+ if (source_zone.empty()) {
+ cerr << "ERROR: source zone not specified" << std::endl;
+ return EINVAL;
+ }
+ RGWDataSyncStatusManager sync(store, store->get_async_rados(), source_zone, nullptr);
+
+ int ret = sync.init();
+ if (ret < 0) {
+ cerr << "ERROR: sync.init() returned ret=" << ret << std::endl;
+ return -ret;
+ }
+
+ rgw_data_sync_status sync_status;
+ if (specified_shard_id) {
+ set<string> pending_buckets;
+ set<string> recovering_buckets;
+ rgw_data_sync_marker sync_marker;
+ ret = sync.read_shard_status(shard_id, pending_buckets, recovering_buckets, &sync_marker,
+ max_entries_specified ? max_entries : 20);
+ if (ret < 0 && ret != -ENOENT) {
+ cerr << "ERROR: sync.read_shard_status() returned ret=" << ret << std::endl;
+ return -ret;
+ }
+ formatter->open_object_section("summary");
+ encode_json("shard_id", shard_id, formatter);
+ encode_json("marker", sync_marker, formatter);
+ encode_json("pending_buckets", pending_buckets, formatter);
+ encode_json("recovering_buckets", recovering_buckets, formatter);
+ formatter->close_section();
+ formatter->flush(cout);
+ } else {
+ ret = sync.read_sync_status(&sync_status);
+ if (ret < 0 && ret != -ENOENT) {
+ cerr << "ERROR: sync.read_sync_status() returned ret=" << ret << std::endl;
+ return -ret;
+ }
+
+ formatter->open_object_section("summary");
+ encode_json("sync_status", sync_status, formatter);
+
+ uint64_t full_total = 0;
+ uint64_t full_complete = 0;
+
+ for (auto marker_iter : sync_status.sync_markers) {
+ full_total += marker_iter.second.total_entries;
+ if (marker_iter.second.state == rgw_meta_sync_marker::SyncState::FullSync) {
+ full_complete += marker_iter.second.pos;
+ } else {
+ full_complete += marker_iter.second.total_entries;
+ }
+ }
+
+ formatter->open_object_section("full_sync");
+ encode_json("total", full_total, formatter);
+ encode_json("complete", full_complete, formatter);
+ formatter->close_section();
+ formatter->close_section();
+
+ formatter->flush(cout);
+ }
+ }
+
+ if (opt_cmd == OPT_DATA_SYNC_INIT) {
+ if (source_zone.empty()) {
+ cerr << "ERROR: source zone not specified" << std::endl;
+ return EINVAL;
+ }
+
+ RGWDataSyncStatusManager sync(store, store->get_async_rados(), source_zone, nullptr);
+
+ int ret = sync.init();
+ if (ret < 0) {
+ cerr << "ERROR: sync.init() returned ret=" << ret << std::endl;
+ return -ret;
+ }
+
+ ret = sync.init_sync_status();
+ if (ret < 0) {
+ cerr << "ERROR: sync.init_sync_status() returned ret=" << ret << std::endl;
+ return -ret;
+ }
+ }
+
+ if (opt_cmd == OPT_DATA_SYNC_RUN) {
+ if (source_zone.empty()) {
+ cerr << "ERROR: source zone not specified" << std::endl;
+ return EINVAL;
+ }
+
+ RGWSyncModuleInstanceRef sync_module;
+ int ret = store->svc.sync_modules->get_manager()->create_instance(g_ceph_context, store->svc.zone->get_zone().tier_type,
+ store->svc.zone->get_zone_params().tier_config, &sync_module);
+ if (ret < 0) {
+ lderr(cct) << "ERROR: failed to init sync module instance, ret=" << ret << dendl;
+ return ret;
+ }
+
+ RGWDataSyncStatusManager sync(store, store->get_async_rados(), source_zone, nullptr, sync_module);
+
+ ret = sync.init();
+ if (ret < 0) {
+ cerr << "ERROR: sync.init() returned ret=" << ret << std::endl;
+ return -ret;
+ }
+
+ ret = sync.run();
+ if (ret < 0) {
+ cerr << "ERROR: sync.run() returned ret=" << ret << std::endl;
+ return -ret;
+ }
+ }
+
+ if (opt_cmd == OPT_BUCKET_SYNC_INIT) {
+ if (source_zone.empty()) {
+ cerr << "ERROR: source zone not specified" << std::endl;
+ return EINVAL;
+ }
+ if (bucket_name.empty()) {
+ cerr << "ERROR: bucket not specified" << std::endl;
+ return EINVAL;
+ }
+ rgw_bucket bucket;
+ int ret = init_bucket_for_sync(tenant, bucket_name, bucket_id, bucket);
+ if (ret < 0) {
+ return -ret;
+ }
+ RGWBucketSyncStatusManager sync(store, source_zone, bucket);
+
+ ret = sync.init();
+ if (ret < 0) {
+ cerr << "ERROR: sync.init() returned ret=" << ret << std::endl;
+ return -ret;
+ }
+ ret = sync.init_sync_status();
+ if (ret < 0) {
+ cerr << "ERROR: sync.init_sync_status() returned ret=" << ret << std::endl;
+ return -ret;
+ }
+ }
+
+ if ((opt_cmd == OPT_BUCKET_SYNC_DISABLE) || (opt_cmd == OPT_BUCKET_SYNC_ENABLE)) {
+ if (bucket_name.empty()) {
+ cerr << "ERROR: bucket not specified" << std::endl;
+ return EINVAL;
+ }
+
+ if (ret < 0) {
+ cerr << "could not init realm " << ": " << cpp_strerror(-ret) << std::endl;
+ return ret;
+ }
+ RGWPeriod period;
+ ret = period.init(g_ceph_context, store->svc.sysobj, realm_id, realm_name, true);
+ if (ret < 0) {
+ cerr << "failed to init period " << ": " << cpp_strerror(-ret) << std::endl;
+ return ret;
+ }
+
+ if (!store->svc.zone->is_meta_master()) {
+ cerr << "failed to update bucket sync: only allowed on meta master zone " << std::endl;
+ cerr << period.get_master_zone() << " | " << period.get_realm() << std::endl;
+ return EINVAL;
+ }
+
+ rgw_obj obj(bucket, object);
+ ret = set_bucket_sync_enabled(store, opt_cmd, tenant, bucket_name);
+ if (ret < 0)
+ return -ret;
+ }
+
+ if (opt_cmd == OPT_BUCKET_SYNC_STATUS) {
+ if (bucket_name.empty()) {
+ cerr << "ERROR: bucket not specified" << std::endl;
+ return EINVAL;
+ }
+ RGWBucketInfo bucket_info;
+ rgw_bucket bucket;
+ int ret = init_bucket(tenant, bucket_name, bucket_id, bucket_info, bucket);
+ if (ret < 0) {
+ return -ret;
+ }
+ bucket_sync_status(store, bucket_info, source_zone, std::cout);
+ }
+
+ if (opt_cmd == OPT_BUCKET_SYNC_MARKERS) {
+ if (source_zone.empty()) {
+ cerr << "ERROR: source zone not specified" << std::endl;
+ return EINVAL;
+ }
+ if (bucket_name.empty()) {
+ cerr << "ERROR: bucket not specified" << std::endl;
+ return EINVAL;
+ }
+ rgw_bucket bucket;
+ int ret = init_bucket_for_sync(tenant, bucket_name, bucket_id, bucket);
+ if (ret < 0) {
+ return -ret;
+ }
+ RGWBucketSyncStatusManager sync(store, source_zone, bucket);
+
+ ret = sync.init();
+ if (ret < 0) {
+ cerr << "ERROR: sync.init() returned ret=" << ret << std::endl;
+ return -ret;
+ }
+ ret = sync.read_sync_status();
+ if (ret < 0) {
+ cerr << "ERROR: sync.read_sync_status() returned ret=" << ret << std::endl;
+ return -ret;
+ }
+
+ map<int, rgw_bucket_shard_sync_info>& sync_status = sync.get_sync_status();
+
+ encode_json("sync_status", sync_status, formatter);
+ formatter->flush(cout);
+ }
+
+ if (opt_cmd == OPT_BUCKET_SYNC_RUN) {
+ if (source_zone.empty()) {
+ cerr << "ERROR: source zone not specified" << std::endl;
+ return EINVAL;
+ }
+ if (bucket_name.empty()) {
+ cerr << "ERROR: bucket not specified" << std::endl;
+ return EINVAL;
+ }
+ rgw_bucket bucket;
+ int ret = init_bucket_for_sync(tenant, bucket_name, bucket_id, bucket);
+ if (ret < 0) {
+ return -ret;
+ }
+ RGWBucketSyncStatusManager sync(store, source_zone, bucket);
+
+ ret = sync.init();
+ if (ret < 0) {
+ cerr << "ERROR: sync.init() returned ret=" << ret << std::endl;
+ return -ret;
+ }
+
+ ret = sync.run();
+ if (ret < 0) {
+ cerr << "ERROR: sync.run() returned ret=" << ret << std::endl;
+ return -ret;
+ }
+ }
+
+ if (opt_cmd == OPT_BILOG_LIST) {
+ if (bucket_name.empty()) {
+ cerr << "ERROR: bucket not specified" << std::endl;
+ return EINVAL;
+ }
+ RGWBucketInfo bucket_info;
+ int ret = init_bucket(tenant, bucket_name, bucket_id, bucket_info, bucket);
+ if (ret < 0) {
+ cerr << "ERROR: could not init bucket: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ formatter->open_array_section("entries");
+ bool truncated;
+ int count = 0;
+ if (max_entries < 0)
+ max_entries = 1000;
+
+ do {
+ list<rgw_bi_log_entry> entries;
+ ret = store->list_bi_log_entries(bucket_info, shard_id, marker, max_entries - count, entries, &truncated);
+ if (ret < 0) {
+ cerr << "ERROR: list_bi_log_entries(): " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ count += entries.size();
+
+ for (list<rgw_bi_log_entry>::iterator iter = entries.begin(); iter != entries.end(); ++iter) {
+ rgw_bi_log_entry& entry = *iter;
+ encode_json("entry", entry, formatter);
+
+ marker = entry.id;
+ }
+ formatter->flush(cout);
+ } while (truncated && count < max_entries);
+
+ formatter->close_section();
+ formatter->flush(cout);
+ }
+
+ if (opt_cmd == OPT_SYNC_ERROR_LIST) {
+ if (max_entries < 0) {
+ max_entries = 1000;
+ }
+
+ bool truncated;
+ utime_t start_time, end_time;
+
+ int ret = parse_date_str(start_date, start_time);
+ if (ret < 0)
+ return -ret;
+
+ ret = parse_date_str(end_date, end_time);
+ if (ret < 0)
+ return -ret;
+
+ if (shard_id < 0) {
+ shard_id = 0;
+ }
+
+ formatter->open_array_section("entries");
+
+ for (; shard_id < ERROR_LOGGER_SHARDS; ++shard_id) {
+ formatter->open_object_section("shard");
+ encode_json("shard_id", shard_id, formatter);
+ formatter->open_array_section("entries");
+
+ int count = 0;
+ string oid = RGWSyncErrorLogger::get_shard_oid(RGW_SYNC_ERROR_LOG_SHARD_PREFIX, shard_id);
+
+ do {
+ list<cls_log_entry> entries;
+ ret = store->time_log_list(oid, start_time.to_real_time(), end_time.to_real_time(),
+ max_entries - count, entries, marker, &marker, &truncated);
+ if (ret == -ENOENT) {
+ break;
+ }
+ if (ret < 0) {
+ cerr << "ERROR: store->time_log_list(): " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ count += entries.size();
+
+ for (auto& cls_entry : entries) {
+ rgw_sync_error_info log_entry;
+
+ auto iter = cls_entry.data.cbegin();
+ try {
+ decode(log_entry, iter);
+ } catch (buffer::error& err) {
+ cerr << "ERROR: failed to decode log entry" << std::endl;
+ continue;
+ }
+ formatter->open_object_section("entry");
+ encode_json("id", cls_entry.id, formatter);
+ encode_json("section", cls_entry.section, formatter);
+ encode_json("name", cls_entry.name, formatter);
+ encode_json("timestamp", cls_entry.timestamp, formatter);
+ encode_json("info", log_entry, formatter);
+ formatter->close_section();
+ formatter->flush(cout);
+ }
+ } while (truncated && count < max_entries);
+
+ formatter->close_section();
+ formatter->close_section();
+
+ if (specified_shard_id) {
+ break;
+ }
+ }
+
+ formatter->close_section();
+ formatter->flush(cout);
+ }
+
+ if (opt_cmd == OPT_SYNC_ERROR_TRIM) {
+ utime_t start_time, end_time;
+ int ret = parse_date_str(start_date, start_time);
+ if (ret < 0)
+ return -ret;
+
+ ret = parse_date_str(end_date, end_time);
+ if (ret < 0)
+ return -ret;
+
+ if (shard_id < 0) {
+ shard_id = 0;
+ }
+
+ for (; shard_id < ERROR_LOGGER_SHARDS; ++shard_id) {
+ ret = trim_sync_error_log(shard_id, start_time.to_real_time(),
+ end_time.to_real_time(), start_marker,
+ end_marker, trim_delay_ms);
+ if (ret < 0) {
+ cerr << "ERROR: sync error trim: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ if (specified_shard_id) {
+ break;
+ }
+ }
+ }
+
+ if (opt_cmd == OPT_BILOG_TRIM) {
+ if (bucket_name.empty()) {
+ cerr << "ERROR: bucket not specified" << std::endl;
+ return EINVAL;
+ }
+ RGWBucketInfo bucket_info;
+ int ret = init_bucket(tenant, bucket_name, bucket_id, bucket_info, bucket);
+ if (ret < 0) {
+ cerr << "ERROR: could not init bucket: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ ret = store->trim_bi_log_entries(bucket_info, shard_id, start_marker, end_marker);
+ if (ret < 0) {
+ cerr << "ERROR: trim_bi_log_entries(): " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ }
+
+ if (opt_cmd == OPT_BILOG_STATUS) {
+ if (bucket_name.empty()) {
+ cerr << "ERROR: bucket not specified" << std::endl;
+ return EINVAL;
+ }
+ RGWBucketInfo bucket_info;
+ int ret = init_bucket(tenant, bucket_name, bucket_id, bucket_info, bucket);
+ if (ret < 0) {
+ cerr << "ERROR: could not init bucket: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ map<int, string> markers;
+ ret = store->get_bi_log_status(bucket_info, shard_id, markers);
+ if (ret < 0) {
+ cerr << "ERROR: get_bi_log_status(): " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ formatter->open_object_section("entries");
+ encode_json("markers", markers, formatter);
+ formatter->close_section();
+ formatter->flush(cout);
+ }
+
+ if (opt_cmd == OPT_BILOG_AUTOTRIM) {
+ RGWCoroutinesManager crs(store->ctx(), store->get_cr_registry());
+ RGWHTTPManager http(store->ctx(), crs.get_completion_mgr());
+ int ret = http.start();
+ if (ret < 0) {
+ cerr << "failed to initialize http client with " << cpp_strerror(ret) << std::endl;
+ return -ret;
+ }
+
+ rgw::BucketTrimConfig config;
+ configure_bucket_trim(store->ctx(), config);
+
+ rgw::BucketTrimManager trim(store, config);
+ ret = trim.init();
+ if (ret < 0) {
+ cerr << "trim manager init failed with " << cpp_strerror(ret) << std::endl;
+ return -ret;
+ }
+ ret = crs.run(trim.create_admin_bucket_trim_cr(&http));
+ if (ret < 0) {
+ cerr << "automated bilog trim failed with " << cpp_strerror(ret) << std::endl;
+ return -ret;
+ }
+ }
+
+ if (opt_cmd == OPT_DATALOG_LIST) {
+ formatter->open_array_section("entries");
+ bool truncated;
+ int count = 0;
+ if (max_entries < 0)
+ max_entries = 1000;
+
+ utime_t start_time, end_time;
+
+ int ret = parse_date_str(start_date, start_time);
+ if (ret < 0)
+ return -ret;
+
+ ret = parse_date_str(end_date, end_time);
+ if (ret < 0)
+ return -ret;
+
+ RGWDataChangesLog *log = store->data_log;
+ RGWDataChangesLog::LogMarker log_marker;
+
+ do {
+ list<rgw_data_change_log_entry> entries;
+ if (specified_shard_id) {
+ ret = log->list_entries(shard_id, start_time.to_real_time(), end_time.to_real_time(), max_entries - count, entries, marker, &marker, &truncated);
+ } else {
+ ret = log->list_entries(start_time.to_real_time(), end_time.to_real_time(), max_entries - count, entries, log_marker, &truncated);
+ }
+ if (ret < 0) {
+ cerr << "ERROR: list_bi_log_entries(): " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ count += entries.size();
+
+ for (list<rgw_data_change_log_entry>::iterator iter = entries.begin(); iter != entries.end(); ++iter) {
+ rgw_data_change_log_entry& entry = *iter;
+ if (!extra_info) {
+ encode_json("entry", entry.entry, formatter);
+ } else {
+ encode_json("entry", entry, formatter);
+ }
+ }
+ formatter->flush(cout);
+ } while (truncated && count < max_entries);
+
+ formatter->close_section();
+ formatter->flush(cout);
+ }
+
+ if (opt_cmd == OPT_DATALOG_STATUS) {
+ RGWDataChangesLog *log = store->data_log;
+ int i = (specified_shard_id ? shard_id : 0);
+
+ formatter->open_array_section("entries");
+ for (; i < g_ceph_context->_conf->rgw_data_log_num_shards; i++) {
+ list<cls_log_entry> entries;
+
+ RGWDataChangesLogInfo info;
+ log->get_info(i, &info);
+
+ ::encode_json("info", info, formatter);
+
+ if (specified_shard_id)
+ break;
+ }
+
+ formatter->close_section();
+ formatter->flush(cout);
+ }
+
+ if (opt_cmd == OPT_DATALOG_AUTOTRIM) {
+ RGWCoroutinesManager crs(store->ctx(), store->get_cr_registry());
+ RGWHTTPManager http(store->ctx(), crs.get_completion_mgr());
+ int ret = http.start();
+ if (ret < 0) {
+ cerr << "failed to initialize http client with " << cpp_strerror(ret) << std::endl;
+ return -ret;
+ }
+
+ auto num_shards = g_conf()->rgw_data_log_num_shards;
+ std::vector<std::string> markers(num_shards);
+ ret = crs.run(create_admin_data_log_trim_cr(store, &http, num_shards, markers));
+ if (ret < 0) {
+ cerr << "automated datalog trim failed with " << cpp_strerror(ret) << std::endl;
+ return -ret;
+ }
+ }
+
+ if (opt_cmd == OPT_DATALOG_TRIM) {
+ utime_t start_time, end_time;
+
+ int ret = parse_date_str(start_date, start_time);
+ if (ret < 0)
+ return -ret;
+
+ ret = parse_date_str(end_date, end_time);
+ if (ret < 0)
+ return -ret;
+
+ if (!specified_shard_id) {
+ cerr << "ERROR: requires a --shard-id" << std::endl;
+ return EINVAL;
+ }
+
+ // loop until -ENODATA
+ do {
+ auto datalog = store->data_log;
+ ret = datalog->trim_entries(shard_id, start_time.to_real_time(),
+ end_time.to_real_time(),
+ start_marker, end_marker);
+ } while (ret == 0);
+
+ if (ret < 0 && ret != -ENODATA) {
+ cerr << "ERROR: trim_entries(): " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ }
+
+ bool quota_op = (opt_cmd == OPT_QUOTA_SET || opt_cmd == OPT_QUOTA_ENABLE || opt_cmd == OPT_QUOTA_DISABLE);
+
+ if (quota_op) {
+ if (bucket_name.empty() && user_id.empty()) {
+ cerr << "ERROR: bucket name or uid is required for quota operation" << std::endl;
+ return EINVAL;
+ }
+
+ if (!bucket_name.empty()) {
+ if (!quota_scope.empty() && quota_scope != "bucket") {
+ cerr << "ERROR: invalid quota scope specification." << std::endl;
+ return EINVAL;
+ }
+ set_bucket_quota(store, opt_cmd, tenant, bucket_name,
+ max_size, max_objects, have_max_size, have_max_objects);
+ } else if (!user_id.empty()) {
+ if (quota_scope == "bucket") {
+ return set_user_bucket_quota(opt_cmd, user, user_op, max_size, max_objects, have_max_size, have_max_objects);
+ } else if (quota_scope == "user") {
+ return set_user_quota(opt_cmd, user, user_op, max_size, max_objects, have_max_size, have_max_objects);
+ } else {
+ cerr << "ERROR: invalid quota scope specification. Please specify either --quota-scope=bucket, or --quota-scope=user" << std::endl;
+ return EINVAL;
+ }
+ }
+ }
+
+ if (opt_cmd == OPT_MFA_CREATE) {
+ rados::cls::otp::otp_info_t config;
+
+ if (user_id.empty()) {
+ cerr << "ERROR: user id was not provided (via --uid)" << std::endl;
+ return EINVAL;
+ }
+
+ if (totp_serial.empty()) {
+ cerr << "ERROR: TOTP device serial number was not provided (via --totp-serial)" << std::endl;
+ return EINVAL;
+ }
+
+ if (totp_seed.empty()) {
+ cerr << "ERROR: TOTP device seed was not provided (via --totp-seed)" << std::endl;
+ return EINVAL;
+ }
+
+
+ rados::cls::otp::SeedType seed_type;
+ if (totp_seed_type == "hex") {
+ seed_type = rados::cls::otp::OTP_SEED_HEX;
+ } else if (totp_seed_type == "base32") {
+ seed_type = rados::cls::otp::OTP_SEED_BASE32;
+ } else {
+ cerr << "ERROR: invalid seed type: " << totp_seed_type << std::endl;
+ return EINVAL;
+ }
+
+ config.id = totp_serial;
+ config.seed = totp_seed;
+ config.seed_type = seed_type;
+
+ if (totp_seconds > 0) {
+ config.step_size = totp_seconds;
+ }
+
+ if (totp_window > 0) {
+ config.window = totp_window;
+ }
+
+ real_time mtime = real_clock::now();
+ string oid = store->get_mfa_oid(user_id);
+
+ int ret = store->meta_mgr->mutate(rgw_otp_get_handler(), oid, mtime, &objv_tracker,
+ MDLOG_STATUS_WRITE, RGWMetadataHandler::APPLY_ALWAYS,
+ [&] {
+ return store->create_mfa(user_id, config, &objv_tracker, mtime);
+ });
+ if (ret < 0) {
+ cerr << "MFA creation failed, error: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ RGWUserInfo& user_info = user_op.get_user_info();
+ user_info.mfa_ids.insert(totp_serial);
+ user_op.set_mfa_ids(user_info.mfa_ids);
+ string err;
+ ret = user.modify(user_op, &err);
+ if (ret < 0) {
+ cerr << "ERROR: failed storing user info, error: " << err << std::endl;
+ return -ret;
+ }
+ }
+
+ if (opt_cmd == OPT_MFA_REMOVE) {
+ if (user_id.empty()) {
+ cerr << "ERROR: user id was not provided (via --uid)" << std::endl;
+ return EINVAL;
+ }
+
+ if (totp_serial.empty()) {
+ cerr << "ERROR: TOTP device serial number was not provided (via --totp-serial)" << std::endl;
+ return EINVAL;
+ }
+
+ real_time mtime = real_clock::now();
+ string oid = store->get_mfa_oid(user_id);
+
+ int ret = store->meta_mgr->mutate(rgw_otp_get_handler(), oid, mtime, &objv_tracker,
+ MDLOG_STATUS_WRITE, RGWMetadataHandler::APPLY_ALWAYS,
+ [&] {
+ return store->remove_mfa(user_id, totp_serial, &objv_tracker, mtime);
+ });
+ if (ret < 0) {
+ cerr << "MFA removal failed, error: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ RGWUserInfo& user_info = user_op.get_user_info();
+ user_info.mfa_ids.erase(totp_serial);
+ user_op.set_mfa_ids(user_info.mfa_ids);
+ string err;
+ ret = user.modify(user_op, &err);
+ if (ret < 0) {
+ cerr << "ERROR: failed storing user info, error: " << err << std::endl;
+ return -ret;
+ }
+ }
+
+ if (opt_cmd == OPT_MFA_GET) {
+ if (user_id.empty()) {
+ cerr << "ERROR: user id was not provided (via --uid)" << std::endl;
+ return EINVAL;
+ }
+
+ if (totp_serial.empty()) {
+ cerr << "ERROR: TOTP device serial number was not provided (via --totp-serial)" << std::endl;
+ return EINVAL;
+ }
+
+ rados::cls::otp::otp_info_t result;
+ int ret = store->get_mfa(user_id, totp_serial, &result);
+ if (ret < 0) {
+ if (ret == -ENOENT || ret == -ENODATA) {
+ cerr << "MFA serial id not found" << std::endl;
+ } else {
+ cerr << "MFA retrieval failed, error: " << cpp_strerror(-ret) << std::endl;
+ }
+ return -ret;
+ }
+ formatter->open_object_section("result");
+ encode_json("entry", result, formatter);
+ formatter->close_section();
+ formatter->flush(cout);
+ }
+
+ if (opt_cmd == OPT_MFA_LIST) {
+ if (user_id.empty()) {
+ cerr << "ERROR: user id was not provided (via --uid)" << std::endl;
+ return EINVAL;
+ }
+
+ list<rados::cls::otp::otp_info_t> result;
+ int ret = store->list_mfa(user_id, &result);
+ if (ret < 0) {
+ cerr << "MFA listing failed, error: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ formatter->open_object_section("result");
+ encode_json("entries", result, formatter);
+ formatter->close_section();
+ formatter->flush(cout);
+ }
+
+ if (opt_cmd == OPT_MFA_CHECK) {
+ if (user_id.empty()) {
+ cerr << "ERROR: user id was not provided (via --uid)" << std::endl;
+ return EINVAL;
+ }
+
+ if (totp_serial.empty()) {
+ cerr << "ERROR: TOTP device serial number was not provided (via --totp-serial)" << std::endl;
+ return EINVAL;
+ }
+
+ if (totp_pin.empty()) {
+ cerr << "ERROR: TOTP device serial number was not provided (via --totp-pin)" << std::endl;
+ return EINVAL;
+ }
+
+ list<rados::cls::otp::otp_info_t> result;
+ int ret = store->check_mfa(user_id, totp_serial, totp_pin.front());
+ if (ret < 0) {
+ cerr << "MFA check failed, error: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ cout << "ok" << std::endl;
+ }
+
+ if (opt_cmd == OPT_MFA_RESYNC) {
+ if (user_id.empty()) {
+ cerr << "ERROR: user id was not provided (via --uid)" << std::endl;
+ return EINVAL;
+ }
+
+ if (totp_serial.empty()) {
+ cerr << "ERROR: TOTP device serial number was not provided (via --totp-serial)" << std::endl;
+ return EINVAL;
+ }
+
+ if (totp_pin.size() != 2) {
+ cerr << "ERROR: missing two --totp-pin params (--totp-pin=<first> --totp-pin=<second>)" << std::endl;
+ }
+
+ rados::cls::otp::otp_info_t config;
+ int ret = store->get_mfa(user_id, totp_serial, &config);
+ if (ret < 0) {
+ if (ret == -ENOENT || ret == -ENODATA) {
+ cerr << "MFA serial id not found" << std::endl;
+ } else {
+ cerr << "MFA retrieval failed, error: " << cpp_strerror(-ret) << std::endl;
+ }
+ return -ret;
+ }
+
+ ceph::real_time now;
+
+ ret = store->otp_get_current_time(user_id, &now);
+ if (ret < 0) {
+ cerr << "ERROR: failed to fetch current time from osd: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ time_t time_ofs;
+
+ ret = scan_totp(store->ctx(), now, config, totp_pin, &time_ofs);
+ if (ret < 0) {
+ if (ret == -ENOENT) {
+ cerr << "failed to resync, TOTP values not found in range" << std::endl;
+ } else {
+ cerr << "ERROR: failed to scan for TOTP values: " << cpp_strerror(-ret) << std::endl;
+ }
+ return -ret;
+ }
+
+ config.time_ofs = time_ofs;
+
+ /* now update the backend */
+ real_time mtime = real_clock::now();
+ string oid = store->get_mfa_oid(user_id);
+
+ ret = store->meta_mgr->mutate(rgw_otp_get_handler(), oid, mtime, &objv_tracker,
+ MDLOG_STATUS_WRITE, RGWMetadataHandler::APPLY_ALWAYS,
+ [&] {
+ return store->create_mfa(user_id, config, &objv_tracker, mtime);
+ });
+ if (ret < 0) {
+ cerr << "MFA update failed, error: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ }
+
+ if (opt_cmd == OPT_RESHARD_STALE_INSTANCES_LIST) {
+ if (!store->svc.zone->can_reshard() && !yes_i_really_mean_it) {
+ cerr << "Resharding disabled in a multisite env, stale instances unlikely from resharding" << std::endl;
+ cerr << "These instances may not be safe to delete." << std::endl;
+ cerr << "Use --yes-i-really-mean-it to force displaying these instances." << std::endl;
+ return EINVAL;
+ }
+
+ ret = RGWBucketAdminOp::list_stale_instances(store, bucket_op,f);
+ if (ret < 0) {
+ cerr << "ERROR: listing stale instances" << cpp_strerror(-ret) << std::endl;
+ }
+ }
+
+ if (opt_cmd == OPT_RESHARD_STALE_INSTANCES_DELETE) {
+ if (!store->svc.zone->can_reshard()) {
+ cerr << "Resharding disabled in a multisite env. Stale instances are not safe to be deleted." << std::endl;
+ return EINVAL;
+ }
+
+ ret = RGWBucketAdminOp::clear_stale_instances(store, bucket_op,f);
+ if (ret < 0) {
+ cerr << "ERROR: deleting stale instances" << cpp_strerror(-ret) << std::endl;
+ }
+ }
+
+ if (opt_cmd == OPT_PUBSUB_TOPICS_LIST) {
+ if (get_tier_type(store) != "pubsub") {
+ cerr << "ERROR: only pubsub tier type supports this command" << std::endl;
+ return EINVAL;
+ }
+ if (user_id.empty()) {
+ cerr << "ERROR: user id was not provided (via --uid)" << std::endl;
+ return EINVAL;
+ }
+ RGWUserInfo& user_info = user_op.get_user_info();
+
+ RGWUserPubSub ups(store, user_info.user_id);
+
+ rgw_bucket bucket;
+
+ if (!bucket_name.empty()) {
+ rgw_pubsub_bucket_topics result;
+ RGWBucketInfo bucket_info;
+ int ret = init_bucket(tenant, bucket_name, bucket_id, bucket_info, bucket);
+ if (ret < 0) {
+ cerr << "ERROR: could not init bucket: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ auto b = ups.get_bucket(bucket_info.bucket);
+ ret = b->get_topics(&result);
+ if (ret < 0) {
+ cerr << "ERROR: could not get topics: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ encode_json("result", result, formatter);
+ } else {
+ rgw_pubsub_user_topics result;
+ int ret = ups.get_user_topics(&result);
+ if (ret < 0) {
+ cerr << "ERROR: could not get topics: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ encode_json("result", result, formatter);
+ }
+ formatter->flush(cout);
+ }
+
+ if (opt_cmd == OPT_PUBSUB_TOPIC_CREATE) {
+ if (get_tier_type(store) != "pubsub") {
+ cerr << "ERROR: only pubsub tier type supports this command" << std::endl;
+ return EINVAL;
+ }
+ if (topic_name.empty()) {
+ cerr << "ERROR: topic name was not provided (via --topic)" << std::endl;
+ return EINVAL;
+ }
+ if (user_id.empty()) {
+ cerr << "ERROR: user id was not provided (via --uid)" << std::endl;
+ return EINVAL;
+ }
+ RGWUserInfo& user_info = user_op.get_user_info();
+ RGWUserPubSub ups(store, user_info.user_id);
+
+ ret = ups.create_topic(topic_name);
+ if (ret < 0) {
+ cerr << "ERROR: could not create topic: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ }
+
+ if (opt_cmd == OPT_PUBSUB_TOPIC_GET) {
+ if (get_tier_type(store) != "pubsub") {
+ cerr << "ERROR: only pubsub tier type supports this command" << std::endl;
+ return EINVAL;
+ }
+ if (topic_name.empty()) {
+ cerr << "ERROR: topic name was not provided (via --topic)" << std::endl;
+ return EINVAL;
+ }
+ if (user_id.empty()) {
+ cerr << "ERROR: user id was not provided (via --uid)" << std::endl;
+ return EINVAL;
+ }
+ RGWUserInfo& user_info = user_op.get_user_info();
+ RGWUserPubSub ups(store, user_info.user_id);
+
+ rgw_pubsub_topic_subs topic;
+ ret = ups.get_topic(topic_name, &topic);
+ if (ret < 0) {
+ cerr << "ERROR: could not create topic: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ encode_json("topic", topic, formatter);
+ formatter->flush(cout);
+ }
+
+ if (opt_cmd == OPT_PUBSUB_NOTIFICATION_CREATE) {
+ if (get_tier_type(store) != "pubsub") {
+ cerr << "ERROR: only pubsub tier type supports this command" << std::endl;
+ return EINVAL;
+ }
+ if (topic_name.empty()) {
+ cerr << "ERROR: topic name was not provided (via --topic)" << std::endl;
+ return EINVAL;
+ }
+ if (user_id.empty()) {
+ cerr << "ERROR: user id was not provided (via --uid)" << std::endl;
+ return EINVAL;
+ }
+ if (bucket_name.empty()) {
+ cerr << "ERROR: bucket name was not provided (via --bucket)" << std::endl;
+ return EINVAL;
+ }
+ RGWUserInfo& user_info = user_op.get_user_info();
+ RGWUserPubSub ups(store, user_info.user_id);
+
+ rgw_bucket bucket;
+
+ RGWBucketInfo bucket_info;
+ int ret = init_bucket(tenant, bucket_name, bucket_id, bucket_info, bucket);
+ if (ret < 0) {
+ cerr << "ERROR: could not init bucket: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ auto b = ups.get_bucket(bucket_info.bucket);
+ ret = b->create_notification(topic_name, event_types);
+ if (ret < 0) {
+ cerr << "ERROR: could not publish bucket: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ }
+
+ if (opt_cmd == OPT_PUBSUB_NOTIFICATION_RM) {
+ if (get_tier_type(store) != "pubsub") {
+ cerr << "ERROR: only pubsub tier type supports this command" << std::endl;
+ return EINVAL;
+ }
+ if (topic_name.empty()) {
+ cerr << "ERROR: topic name was not provided (via --topic)" << std::endl;
+ return EINVAL;
+ }
+ if (user_id.empty()) {
+ cerr << "ERROR: user id was not provided (via --uid)" << std::endl;
+ return EINVAL;
+ }
+ if (bucket_name.empty()) {
+ cerr << "ERROR: bucket name was not provided (via --bucket)" << std::endl;
+ return EINVAL;
+ }
+ RGWUserInfo& user_info = user_op.get_user_info();
+ RGWUserPubSub ups(store, user_info.user_id);
+
+ rgw_bucket bucket;
+
+ RGWBucketInfo bucket_info;
+ int ret = init_bucket(tenant, bucket_name, bucket_id, bucket_info, bucket);
+ if (ret < 0) {
+ cerr << "ERROR: could not init bucket: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ auto b = ups.get_bucket(bucket_info.bucket);
+ ret = b->remove_notification(topic_name);
+ if (ret < 0) {
+ cerr << "ERROR: could not publish bucket: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ }
+
+ if (opt_cmd == OPT_PUBSUB_TOPIC_RM) {
+ if (get_tier_type(store) != "pubsub") {
+ cerr << "ERROR: only pubsub tier type supports this command" << std::endl;
+ return EINVAL;
+ }
+ if (topic_name.empty()) {
+ cerr << "ERROR: topic name was not provided (via --topic)" << std::endl;
+ return EINVAL;
+ }
+ if (user_id.empty()) {
+ cerr << "ERROR: user id was not provided (via --uid)" << std::endl;
+ return EINVAL;
+ }
+ RGWUserInfo& user_info = user_op.get_user_info();
+ RGWUserPubSub ups(store, user_info.user_id);
+
+ ret = ups.remove_topic(topic_name);
+ if (ret < 0) {
+ cerr << "ERROR: could not remove topic: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ }
+
+ if (opt_cmd == OPT_PUBSUB_SUB_GET) {
+ if (get_tier_type(store) != "pubsub") {
+ cerr << "ERROR: only pubsub tier type supports this command" << std::endl;
+ return EINVAL;
+ }
+ if (user_id.empty()) {
+ cerr << "ERROR: user id was not provided (via --uid)" << std::endl;
+ return EINVAL;
+ }
+ if (sub_name.empty()) {
+ cerr << "ERROR: subscription name was not provided (via --sub-name)" << std::endl;
+ return EINVAL;
+ }
+ RGWUserInfo& user_info = user_op.get_user_info();
+ RGWUserPubSub ups(store, user_info.user_id);
+
+ rgw_pubsub_sub_config sub_conf;
+
+ auto sub = ups.get_sub(sub_name);
+ ret = sub->get_conf(&sub_conf);
+ if (ret < 0) {
+ cerr << "ERROR: could not get subscription info: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ encode_json("sub", sub_conf, formatter);
+ formatter->flush(cout);
+ }
+
+ if (opt_cmd == OPT_PUBSUB_SUB_CREATE) {
+ if (get_tier_type(store) != "pubsub") {
+ cerr << "ERROR: only pubsub tier type supports this command" << std::endl;
+ return EINVAL;
+ }
+ if (user_id.empty()) {
+ cerr << "ERROR: user id was not provided (via --uid)" << std::endl;
+ return EINVAL;
+ }
+ if (sub_name.empty()) {
+ cerr << "ERROR: subscription name was not provided (via --sub-name)" << std::endl;
+ return EINVAL;
+ }
+ if (topic_name.empty()) {
+ cerr << "ERROR: topic name was not provided (via --topic)" << std::endl;
+ return EINVAL;
+ }
+ RGWUserInfo& user_info = user_op.get_user_info();
+ RGWUserPubSub ups(store, user_info.user_id);
+
+ rgw_pubsub_topic_subs topic;
+ int ret = ups.get_topic(topic_name, &topic);
+ if (ret < 0) {
+ cerr << "ERROR: topic not found" << std::endl;
+ return EINVAL;
+ }
+
+ rgw_pubsub_sub_dest dest_config;
+ dest_config.bucket_name = sub_dest_bucket;
+ dest_config.oid_prefix = sub_oid_prefix;
+ dest_config.push_endpoint = sub_push_endpoint;
+
+ auto psmodule = static_cast<RGWPSSyncModuleInstance *>(store->get_sync_module().get());
+ auto conf = psmodule->get_effective_conf();
+
+ if (dest_config.bucket_name.empty()) {
+ dest_config.bucket_name = string(conf["data_bucket_prefix"]) + user_info.user_id.to_str() + "-" + topic.topic.name;
+ }
+ if (dest_config.oid_prefix.empty()) {
+ dest_config.oid_prefix = conf["data_oid_prefix"];
+ }
+ auto sub = ups.get_sub(sub_name);
+ ret = sub->subscribe(topic_name, dest_config);
+ if (ret < 0) {
+ cerr << "ERROR: could not store subscription info: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ }
+
+ if (opt_cmd == OPT_PUBSUB_SUB_RM) {
+ if (get_tier_type(store) != "pubsub") {
+ cerr << "ERROR: only pubsub tier type supports this command" << std::endl;
+ return EINVAL;
+ }
+ if (user_id.empty()) {
+ cerr << "ERROR: user id was not provided (via --uid)" << std::endl;
+ return EINVAL;
+ }
+ if (sub_name.empty()) {
+ cerr << "ERROR: subscription name was not provided (via --sub-name)" << std::endl;
+ return EINVAL;
+ }
+ RGWUserInfo& user_info = user_op.get_user_info();
+ RGWUserPubSub ups(store, user_info.user_id);
+
+ auto sub = ups.get_sub(sub_name);
+ ret = sub->unsubscribe(topic_name);
+ if (ret < 0) {
+ cerr << "ERROR: could not get subscription info: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ }
+
+ if (opt_cmd == OPT_PUBSUB_SUB_PULL) {
+ if (get_tier_type(store) != "pubsub") {
+ cerr << "ERROR: only pubsub tier type supports this command" << std::endl;
+ return EINVAL;
+ }
+ if (user_id.empty()) {
+ cerr << "ERROR: user id was not provided (via --uid)" << std::endl;
+ return EINVAL;
+ }
+ if (sub_name.empty()) {
+ cerr << "ERROR: subscription name was not provided (via --sub-name)" << std::endl;
+ return EINVAL;
+ }
+ RGWUserInfo& user_info = user_op.get_user_info();
+ RGWUserPubSub ups(store, user_info.user_id);
+
+ if (!max_entries_specified) {
+ max_entries = RGWUserPubSub::Sub::DEFAULT_MAX_EVENTS;
+ }
+ auto sub = ups.get_sub(sub_name);
+ ret = sub->list_events(marker, max_entries);
+ if (ret < 0) {
+ cerr << "ERROR: could not list events: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ encode_json("result", *sub, formatter);
+ formatter->flush(cout);
+ }
+
+ if (opt_cmd == OPT_PUBSUB_EVENT_RM) {
+ if (get_tier_type(store) != "pubsub") {
+ cerr << "ERROR: only pubsub tier type supports this command" << std::endl;
+ return EINVAL;
+ }
+ if (user_id.empty()) {
+ cerr << "ERROR: user id was not provided (via --uid)" << std::endl;
+ return EINVAL;
+ }
+ if (sub_name.empty()) {
+ cerr << "ERROR: subscription name was not provided (via --sub-name)" << std::endl;
+ return EINVAL;
+ }
+ if (event_id.empty()) {
+ cerr << "ERROR: event id was not provided (via --event-id)" << std::endl;
+ return EINVAL;
+ }
+ RGWUserInfo& user_info = user_op.get_user_info();
+ RGWUserPubSub ups(store, user_info.user_id);
+
+ auto sub = ups.get_sub(sub_name);
+ ret = sub->remove_event(event_id);
+ if (ret < 0) {
+ cerr << "ERROR: could not remove event: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+ }
+
+ return 0;
+}
diff --git a/src/rgw/rgw_admin_user.cc b/src/rgw/rgw_admin_user.cc
new file mode 100644
index 00000000..615c6b31
--- /dev/null
+++ b/src/rgw/rgw_admin_user.cc
@@ -0,0 +1,91 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "include/rgw/librgw_admin_user.h"
+#include "rgw_admin_user.h"
+#include "rgw_user.h"
+#include "common/errno.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+using namespace rgw;
+
+namespace rgw {
+
+ extern RGWLibAdmin rgw_lib_admin;
+
+}
+
+extern "C" {
+
+ int rgw_admin_create_user(librgw_admin_user_t librgw_admin_user, const char *uid,
+ const char *display_name, const char *access_key, const char* secret_key,
+ const char *email, const char *caps,
+ const char *access, bool admin, bool system)
+ {
+ RGWUserAdminOpState user_op;
+ rgw_user user_id;
+ user_id.from_str(uid);
+ user_op.set_user_id(user_id);
+ user_op.set_display_name(display_name);
+ user_op.user_email = email;
+ user_op.user_email_specified=true;
+ user_op.set_access_key(access_key);
+ user_op.set_secret_key(secret_key);
+ user_op.set_caps(caps);
+ if (access) {
+ uint32_t perm_mask = rgw_str_to_perm(access);
+ user_op.set_perm(perm_mask);
+ }
+ user_op.set_admin(admin);
+ user_op.set_system(system);
+
+ RGWUser user;
+ int ret = 0;
+ ret = user.init(rgw_lib_admin.get_store(), user_op);
+ if (ret < 0) {
+ cerr << "user.init failed: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ std::string err_msg;
+ ret = user.add(user_op, &err_msg);
+ if (ret < 0) {
+ cerr << "could not create user: " << err_msg << std::endl;
+ if (ret == -ERR_INVALID_TENANT_NAME)
+ ret = -EINVAL;
+
+ return -ret;
+ }
+
+ return 0;
+ }
+
+ int rgw_admin_user_info(librgw_admin_user_t librgw_admin_user, const char *uid, rgw_user_info* user_info)
+ {
+ RGWUserAdminOpState user_op;
+ rgw_user user_id;
+ user_id.from_str(uid);
+ user_op.set_user_id(user_id);
+
+ RGWUser user;
+ int ret = 0;
+ ret = user.init(rgw_lib_admin.get_store(), user_op);
+ if (ret < 0) {
+ cerr << "user.init failed: " << cpp_strerror(-ret) << std::endl;
+ return -ret;
+ }
+
+ std::string err_msg;
+ RGWUserInfo info;
+ ret = user.info(info, &err_msg);
+ if (ret < 0) {
+ cerr << "could not fetch user info: " << err_msg << std::endl;
+ return -ret;
+ }
+
+ return 0;
+ }
+
+}
diff --git a/src/rgw/rgw_admin_user.h b/src/rgw/rgw_admin_user.h
new file mode 100644
index 00000000..68f8167f
--- /dev/null
+++ b/src/rgw/rgw_admin_user.h
@@ -0,0 +1,43 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * create rgw admin user
+ *
+ * Copyright (C) 2015 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef RGW_ADMIN_USER_H
+#define RGW_ADMIN_USER_H
+
+#include <string>
+#include "common/config.h"
+
+#include "rgw_rados.h"
+
+namespace rgw {
+
+ class RGWLibAdmin
+ {
+ RGWRados *store;
+ boost::intrusive_ptr<CephContext> cct;
+
+ public:
+ RGWRados* get_store()
+ {
+ return store;
+ }
+
+ int init();
+ int init(vector<const char *>& args);
+ int stop();
+ };
+}
+
+#endif /*RGW_ADMIN_USER_H */
diff --git a/src/rgw/rgw_aio.h b/src/rgw/rgw_aio.h
new file mode 100644
index 00000000..0ca401da
--- /dev/null
+++ b/src/rgw/rgw_aio.h
@@ -0,0 +1,80 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2018 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#pragma once
+
+#include "include/rados/librados_fwd.hpp"
+#include <boost/intrusive/list.hpp>
+#include "rgw_common.h"
+#include "services/svc_rados.h" // cant forward declare RGWSI_RADOS::Obj
+
+namespace rgw {
+
+struct AioResult {
+ RGWSI_RADOS::Obj obj;
+ uint64_t id = 0; // id allows caller to associate a result with its request
+ bufferlist data; // result buffer for reads
+ int result = 0;
+};
+struct AioResultEntry : AioResult, boost::intrusive::list_base_hook<> {
+ virtual ~AioResultEntry() {}
+};
+// a list of polymorphic entries that frees them on destruction
+template <typename T, typename ...Args>
+struct OwningList : boost::intrusive::list<T, Args...> {
+ OwningList() = default;
+ ~OwningList() { this->clear_and_dispose(std::default_delete<T>{}); }
+ OwningList(OwningList&&) = default;
+ OwningList& operator=(OwningList&&) = default;
+ OwningList(const OwningList&) = delete;
+ OwningList& operator=(const OwningList&) = delete;
+};
+using AioResultList = OwningList<AioResultEntry>;
+
+// returns the first error code or 0 if all succeeded
+inline int check_for_errors(const AioResultList& results) {
+ for (auto& e : results) {
+ if (e.result < 0) {
+ return e.result;
+ }
+ }
+ return 0;
+}
+
+// interface to submit async librados operations and wait on their completions.
+// each call returns a list of results from prior completions
+class Aio {
+ public:
+ virtual ~Aio() {}
+
+ virtual AioResultList submit(RGWSI_RADOS::Obj& obj,
+ librados::ObjectReadOperation *op,
+ uint64_t cost, uint64_t id) = 0;
+
+ virtual AioResultList submit(RGWSI_RADOS::Obj& obj,
+ librados::ObjectWriteOperation *op,
+ uint64_t cost, uint64_t id) = 0;
+
+ // poll for any ready completions without waiting
+ virtual AioResultList poll() = 0;
+
+ // return any ready completions. if there are none, wait for the next
+ virtual AioResultList wait() = 0;
+
+ // wait for all outstanding completions and return their results
+ virtual AioResultList drain() = 0;
+};
+
+} // namespace rgw
diff --git a/src/rgw/rgw_aio_throttle.cc b/src/rgw/rgw_aio_throttle.cc
new file mode 100644
index 00000000..79d095d2
--- /dev/null
+++ b/src/rgw/rgw_aio_throttle.cc
@@ -0,0 +1,157 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2018 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "include/rados/librados.hpp"
+
+#include "rgw_aio_throttle.h"
+#include "rgw_rados.h"
+
+namespace rgw {
+
+void AioThrottle::aio_cb(void *cb, void *arg)
+{
+ Pending& p = *static_cast<Pending*>(arg);
+ p.result = p.completion->get_return_value();
+ p.parent->put(p);
+}
+
+bool AioThrottle::waiter_ready() const
+{
+ switch (waiter) {
+ case Wait::Available: return is_available();
+ case Wait::Completion: return has_completion();
+ case Wait::Drained: return is_drained();
+ default: return false;
+ }
+}
+
+AioResultList AioThrottle::submit(RGWSI_RADOS::Obj& obj,
+ librados::ObjectWriteOperation *op,
+ uint64_t cost, uint64_t id)
+{
+ auto p = std::make_unique<Pending>();
+ p->obj = obj;
+ p->id = id;
+ p->cost = cost;
+
+ if (cost > window) {
+ p->result = -EDEADLK; // would never succeed
+ std::unique_lock lock{mutex};
+ completed.push_back(*p);
+ } else {
+ get(*p);
+ p->result = obj.aio_operate(p->completion, op);
+ if (p->result < 0) {
+ put(*p);
+ }
+ }
+ p.release();
+ std::unique_lock lock{mutex};
+ return std::move(completed);
+}
+
+AioResultList AioThrottle::submit(RGWSI_RADOS::Obj& obj,
+ librados::ObjectReadOperation *op,
+ uint64_t cost, uint64_t id)
+{
+ auto p = std::make_unique<Pending>();
+ p->obj = obj;
+ p->id = id;
+ p->cost = cost;
+
+ if (cost > window) {
+ p->result = -EDEADLK; // would never succeed
+ std::unique_lock lock{mutex};
+ completed.push_back(*p);
+ } else {
+ get(*p);
+ p->result = obj.aio_operate(p->completion, op, &p->data);
+ if (p->result < 0) {
+ put(*p);
+ }
+ }
+ p.release();
+ std::unique_lock lock{mutex};
+ return std::move(completed);
+}
+
+void AioThrottle::get(Pending& p)
+{
+ std::unique_lock lock{mutex};
+
+ // wait for the write size to become available
+ pending_size += p.cost;
+ if (!is_available()) {
+ ceph_assert(waiter == Wait::None);
+ waiter = Wait::Available;
+ cond.wait(lock, [this] { return is_available(); });
+ waiter = Wait::None;
+ }
+
+ // register the pending write and attach a completion
+ p.parent = this;
+ p.completion = librados::Rados::aio_create_completion(&p, nullptr, aio_cb);
+ pending.push_back(p);
+}
+
+void AioThrottle::put(Pending& p)
+{
+ p.completion->release();
+ p.completion = nullptr;
+
+ std::scoped_lock lock{mutex};
+
+ // move from pending to completed
+ pending.erase(pending.iterator_to(p));
+ completed.push_back(p);
+
+ pending_size -= p.cost;
+
+ if (waiter_ready()) {
+ cond.notify_one();
+ }
+}
+
+AioResultList AioThrottle::poll()
+{
+ std::unique_lock lock{mutex};
+ return std::move(completed);
+}
+
+AioResultList AioThrottle::wait()
+{
+ std::unique_lock lock{mutex};
+ if (completed.empty() && !pending.empty()) {
+ ceph_assert(waiter == Wait::None);
+ waiter = Wait::Completion;
+ cond.wait(lock, [this] { return has_completion(); });
+ waiter = Wait::None;
+ }
+ return std::move(completed);
+}
+
+AioResultList AioThrottle::drain()
+{
+ std::unique_lock lock{mutex};
+ if (!pending.empty()) {
+ ceph_assert(waiter == Wait::None);
+ waiter = Wait::Drained;
+ cond.wait(lock, [this] { return is_drained(); });
+ waiter = Wait::None;
+ }
+ return std::move(completed);
+}
+
+} // namespace rgw
diff --git a/src/rgw/rgw_aio_throttle.h b/src/rgw/rgw_aio_throttle.h
new file mode 100644
index 00000000..751d7f98
--- /dev/null
+++ b/src/rgw/rgw_aio_throttle.h
@@ -0,0 +1,83 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2018 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#pragma once
+
+#include "include/rados/librados_fwd.hpp"
+#include <memory>
+#include "common/ceph_mutex.h"
+#include "services/svc_rados.h"
+#include "rgw_aio.h"
+
+namespace rgw {
+
+// a throttle for aio operations that enforces a maximum window on outstanding
+// bytes. only supports a single waiter, so all public functions must be called
+// from the same thread
+class AioThrottle : public Aio {
+ protected:
+ const uint64_t window;
+ uint64_t pending_size = 0;
+
+ bool is_available() const { return pending_size <= window; }
+ bool has_completion() const { return !completed.empty(); }
+ bool is_drained() const { return pending.empty(); }
+
+ struct Pending : AioResultEntry {
+ AioThrottle *parent = nullptr;
+ uint64_t cost = 0;
+ librados::AioCompletion *completion = nullptr;
+ };
+ OwningList<Pending> pending;
+ AioResultList completed;
+
+ enum class Wait { None, Available, Completion, Drained };
+ Wait waiter = Wait::None;
+
+ bool waiter_ready() const;
+
+ ceph::mutex mutex = ceph::make_mutex("AioThrottle");
+ ceph::condition_variable cond;
+
+ void get(Pending& p);
+ void put(Pending& p);
+
+ static void aio_cb(void *cb, void *arg);
+
+ public:
+ AioThrottle(uint64_t window) : window(window) {}
+
+ virtual ~AioThrottle() {
+ // must drain before destructing
+ ceph_assert(pending.empty());
+ ceph_assert(completed.empty());
+ }
+
+ AioResultList submit(RGWSI_RADOS::Obj& obj,
+ librados::ObjectReadOperation *op,
+ uint64_t cost, uint64_t id) override;
+
+ AioResultList submit(RGWSI_RADOS::Obj& obj,
+ librados::ObjectWriteOperation *op,
+ uint64_t cost, uint64_t id) override;
+
+ AioResultList poll() override;
+
+ AioResultList wait() override;
+
+ AioResultList drain() override;
+};
+
+} // namespace rgw
diff --git a/src/rgw/rgw_amqp.cc b/src/rgw/rgw_amqp.cc
new file mode 100644
index 00000000..45167a8e
--- /dev/null
+++ b/src/rgw/rgw_amqp.cc
@@ -0,0 +1,1035 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "rgw_amqp.h"
+#include <amqp.h>
+#include <amqp_tcp_socket.h>
+#include <amqp_framing.h>
+#include "include/ceph_assert.h"
+#include <sstream>
+#include <cstring>
+#include <unordered_map>
+#include <string>
+#include <vector>
+#include <thread>
+#include <atomic>
+#include <mutex>
+#include <boost/lockfree/queue.hpp>
+#include "common/dout.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+// TODO investigation, not necessarily issues:
+// (1) in case of single threaded writer context use spsc_queue
+// (2) support multiple channels
+// (3) check performance of emptying queue to local list, and go over the list and publish
+// (4) use std::shared_mutex (c++17) or equivalent for the connections lock
+
+namespace rgw::amqp {
+
+// RGW AMQP status codes for publishing
+static const int RGW_AMQP_STATUS_BROKER_NACK = -0x1001;
+static const int RGW_AMQP_STATUS_CONNECTION_CLOSED = -0x1002;
+static const int RGW_AMQP_STATUS_QUEUE_FULL = -0x1003;
+static const int RGW_AMQP_STATUS_MAX_INFLIGHT = -0x1004;
+static const int RGW_AMQP_STATUS_MANAGER_STOPPED = -0x1005;
+// RGW AMQP status code for connection opening
+static const int RGW_AMQP_STATUS_CONN_ALLOC_FAILED = -0x2001;
+static const int RGW_AMQP_STATUS_SOCKET_ALLOC_FAILED = -0x2002;
+static const int RGW_AMQP_STATUS_SOCKET_OPEN_FAILED = -0x2003;
+static const int RGW_AMQP_STATUS_LOGIN_FAILED = -0x2004;
+static const int RGW_AMQP_STATUS_CHANNEL_OPEN_FAILED = -0x2005;
+static const int RGW_AMQP_STATUS_VERIFY_EXCHANGE_FAILED = -0x2006;
+static const int RGW_AMQP_STATUS_Q_DECLARE_FAILED = -0x2007;
+static const int RGW_AMQP_STATUS_CONFIRM_DECLARE_FAILED = -0x2008;
+static const int RGW_AMQP_STATUS_CONSUME_DECLARE_FAILED = -0x2009;
+
+static const int RGW_AMQP_RESPONSE_SOCKET_ERROR = -0x3008;
+static const int RGW_AMQP_NO_REPLY_CODE = 0x0;
+
+// key class for the connection list
+struct connection_id_t {
+ const std::string host;
+ const int port;
+ const std::string vhost;
+ // constructed from amqp_connection_info struct
+ connection_id_t(const amqp_connection_info& info)
+ : host(info.host), port(info.port), vhost(info.vhost) {}
+
+ // equality operator and hasher functor are needed
+ // so that connection_id_t could be used as key in unordered_map
+ bool operator==(const connection_id_t& other) const {
+ return host == other.host && port == other.port && vhost == other.vhost;
+ }
+
+ struct hasher {
+ std::size_t operator()(const connection_id_t& k) const {
+ return ((std::hash<std::string>()(k.host)
+ ^ (std::hash<int>()(k.port) << 1)) >> 1)
+ ^ (std::hash<std::string>()(k.vhost) << 1);
+ }
+ };
+};
+
+std::string to_string(const connection_id_t& id) {
+ return id.host+":"+std::to_string(id.port)+"/"+id.vhost;
+}
+
+// connection_t state cleaner
+// could be used for automatic cleanup when getting out of scope
+class ConnectionCleaner {
+ private:
+ amqp_connection_state_t conn;
+ public:
+ ConnectionCleaner(amqp_connection_state_t _conn) : conn(_conn) {}
+ ~ConnectionCleaner() {
+ if (conn) {
+ amqp_destroy_connection(conn);
+ }
+ }
+ // call reset() if cleanup is not needed anymore
+ void reset() {
+ conn = nullptr;
+ }
+};
+
+// struct for holding the callback and its tag in the callback list
+struct reply_callback_with_tag_t {
+ uint64_t tag;
+ reply_callback_t cb;
+
+ reply_callback_with_tag_t(uint64_t _tag, reply_callback_t _cb) : tag(_tag), cb(_cb) {}
+
+ bool operator==(uint64_t rhs) {
+ return tag == rhs;
+ }
+};
+
+typedef std::vector<reply_callback_with_tag_t> CallbackList;
+
+// struct for holding the connection state object as well as the exchange
+// it is used inside an intrusive ref counted pointer (boost::intrusive_ptr)
+// since references to deleted objects may still exist in the calling code
+struct connection_t {
+ amqp_connection_state_t state;
+ std::string exchange;
+ std::string user;
+ std::string password;
+ amqp_bytes_t reply_to_queue;
+ bool marked_for_deletion;
+ uint64_t delivery_tag;
+ int status;
+ int reply_type;
+ int reply_code;
+ mutable std::atomic<int> ref_count;
+ CephContext* cct;
+ CallbackList callbacks;
+
+ // default ctor
+ connection_t() :
+ state(nullptr),
+ reply_to_queue(amqp_empty_bytes),
+ marked_for_deletion(false),
+ delivery_tag(1),
+ status(AMQP_STATUS_OK),
+ reply_type(AMQP_RESPONSE_NORMAL),
+ reply_code(RGW_AMQP_NO_REPLY_CODE),
+ ref_count(0),
+ cct(nullptr) {}
+
+ // cleanup of all internal connection resource
+ // the object can still remain, and internal connection
+ // resources created again on successful reconnection
+ void destroy(int s) {
+ status = s;
+ ConnectionCleaner clean_state(state);
+ state = nullptr;
+ amqp_bytes_free(reply_to_queue);
+ reply_to_queue = amqp_empty_bytes;
+ // fire all remaining callbacks
+ std::for_each(callbacks.begin(), callbacks.end(), [this](auto& cb_tag) {
+ cb_tag.cb(status);
+ ldout(cct, 20) << "AMQP destroy: invoking callback with tag=" << cb_tag.tag << dendl;
+ });
+ callbacks.clear();
+ delivery_tag = 1;
+ }
+
+ bool is_ok() const {
+ return (state != nullptr && !marked_for_deletion);
+ }
+
+ // dtor also destroys the internals
+ ~connection_t() {
+ destroy(RGW_AMQP_STATUS_CONNECTION_CLOSED);
+ }
+
+ friend void intrusive_ptr_add_ref(const connection_t* p);
+ friend void intrusive_ptr_release(const connection_t* p);
+};
+
+// these are required interfaces so that connection_t could be used inside boost::intrusive_ptr
+void intrusive_ptr_add_ref(const connection_t* p) {
+ ++p->ref_count;
+}
+void intrusive_ptr_release(const connection_t* p) {
+ if (--p->ref_count == 0) {
+ delete p;
+ }
+}
+
+// convert connection info to string
+std::string to_string(const amqp_connection_info& info) {
+ std::stringstream ss;
+ ss << "connection info:" <<
+ "\nHost: " << info.host <<
+ "\nPort: " << info.port <<
+ "\nUser: " << info.user <<
+ "\nPassword: " << info.password <<
+ "\nvhost: " << info.vhost <<
+ "\nSSL support: " << info.ssl << std::endl;
+ return ss.str();
+}
+
+// convert reply to error code
+int reply_to_code(const amqp_rpc_reply_t& reply) {
+ switch (reply.reply_type) {
+ case AMQP_RESPONSE_NONE:
+ case AMQP_RESPONSE_NORMAL:
+ return RGW_AMQP_NO_REPLY_CODE;
+ case AMQP_RESPONSE_LIBRARY_EXCEPTION:
+ return reply.library_error;
+ case AMQP_RESPONSE_SERVER_EXCEPTION:
+ if (reply.reply.decoded) {
+ const amqp_connection_close_t* m = (amqp_connection_close_t*)reply.reply.decoded;
+ return m->reply_code;
+ }
+ return reply.reply.id;
+ }
+ return RGW_AMQP_NO_REPLY_CODE;
+}
+
+// convert reply to string
+std::string to_string(const amqp_rpc_reply_t& reply) {
+ std::stringstream ss;
+ switch (reply.reply_type) {
+ case AMQP_RESPONSE_NORMAL:
+ return "";
+ case AMQP_RESPONSE_NONE:
+ return "missing RPC reply type";
+ case AMQP_RESPONSE_LIBRARY_EXCEPTION:
+ return amqp_error_string2(reply.library_error);
+ case AMQP_RESPONSE_SERVER_EXCEPTION:
+ {
+ switch (reply.reply.id) {
+ case AMQP_CONNECTION_CLOSE_METHOD:
+ ss << "server connection error: ";
+ break;
+ case AMQP_CHANNEL_CLOSE_METHOD:
+ ss << "server channel error: ";
+ break;
+ default:
+ ss << "server unknown error: ";
+ break;
+ }
+ if (reply.reply.decoded) {
+ amqp_connection_close_t* m = (amqp_connection_close_t*)reply.reply.decoded;
+ ss << m->reply_code << " text: " << std::string((char*)m->reply_text.bytes, m->reply_text.len);
+ }
+ return ss.str();
+ }
+ default:
+ ss << "unknown error, method id: " << reply.reply.id;
+ return ss.str();
+ }
+}
+
+// convert status enum to string
+std::string to_string(amqp_status_enum s) {
+ switch (s) {
+ case AMQP_STATUS_OK:
+ return "AMQP_STATUS_OK";
+ case AMQP_STATUS_NO_MEMORY:
+ return "AMQP_STATUS_NO_MEMORY";
+ case AMQP_STATUS_BAD_AMQP_DATA:
+ return "AMQP_STATUS_BAD_AMQP_DATA";
+ case AMQP_STATUS_UNKNOWN_CLASS:
+ return "AMQP_STATUS_UNKNOWN_CLASS";
+ case AMQP_STATUS_UNKNOWN_METHOD:
+ return "AMQP_STATUS_UNKNOWN_METHOD";
+ case AMQP_STATUS_HOSTNAME_RESOLUTION_FAILED:
+ return "AMQP_STATUS_HOSTNAME_RESOLUTION_FAILED";
+ case AMQP_STATUS_INCOMPATIBLE_AMQP_VERSION:
+ return "AMQP_STATUS_INCOMPATIBLE_AMQP_VERSION";
+ case AMQP_STATUS_CONNECTION_CLOSED:
+ return "AMQP_STATUS_CONNECTION_CLOSED";
+ case AMQP_STATUS_BAD_URL:
+ return "AMQP_STATUS_BAD_URL";
+ case AMQP_STATUS_SOCKET_ERROR:
+ return "AMQP_STATUS_SOCKET_ERROR";
+ case AMQP_STATUS_INVALID_PARAMETER:
+ return "AMQP_STATUS_INVALID_PARAMETER";
+ case AMQP_STATUS_TABLE_TOO_BIG:
+ return "AMQP_STATUS_TABLE_TOO_BIG";
+ case AMQP_STATUS_WRONG_METHOD:
+ return "AMQP_STATUS_WRONG_METHOD";
+ case AMQP_STATUS_TIMEOUT:
+ return "AMQP_STATUS_TIMEOUT";
+ case AMQP_STATUS_TIMER_FAILURE:
+ return "AMQP_STATUS_TIMER_FAILURE";
+ case AMQP_STATUS_HEARTBEAT_TIMEOUT:
+ return "AMQP_STATUS_HEARTBEAT_TIMEOUT";
+ case AMQP_STATUS_UNEXPECTED_STATE:
+ return "AMQP_STATUS_UNEXPECTED_STATE";
+ case AMQP_STATUS_SOCKET_CLOSED:
+ return "AMQP_STATUS_SOCKET_CLOSED";
+ case AMQP_STATUS_SOCKET_INUSE:
+ return "AMQP_STATUS_SOCKET_INUSE";
+ case AMQP_STATUS_BROKER_UNSUPPORTED_SASL_METHOD:
+ return "AMQP_STATUS_BROKER_UNSUPPORTED_SASL_METHOD";
+#if AMQP_VERSION >= AMQP_VERSION_CODE(0, 8, 0, 0)
+ case AMQP_STATUS_UNSUPPORTED:
+ return "AMQP_STATUS_UNSUPPORTED";
+#endif
+ case _AMQP_STATUS_NEXT_VALUE:
+ return "AMQP_STATUS_INTERNAL";
+ case AMQP_STATUS_TCP_ERROR:
+ return "AMQP_STATUS_TCP_ERROR";
+ case AMQP_STATUS_TCP_SOCKETLIB_INIT_ERROR:
+ return "AMQP_STATUS_TCP_SOCKETLIB_INIT_ERROR";
+ case _AMQP_STATUS_TCP_NEXT_VALUE:
+ return "AMQP_STATUS_INTERNAL";
+ case AMQP_STATUS_SSL_ERROR:
+ return "AMQP_STATUS_SSL_ERROR";
+ case AMQP_STATUS_SSL_HOSTNAME_VERIFY_FAILED:
+ return "AMQP_STATUS_SSL_HOSTNAME_VERIFY_FAILED";
+ case AMQP_STATUS_SSL_PEER_VERIFY_FAILED:
+ return "AMQP_STATUS_SSL_PEER_VERIFY_FAILED";
+ case AMQP_STATUS_SSL_CONNECTION_FAILED:
+ return "AMQP_STATUS_SSL_CONNECTION_FAILED";
+ case _AMQP_STATUS_SSL_NEXT_VALUE:
+ return "AMQP_STATUS_INTERNAL";
+ }
+ return "AMQP_STATUS_UNKNOWN";
+}
+
+// TODO: add status_to_string on the connection object to prinf full status
+
+// convert int status to string - including RGW specific values
+std::string status_to_string(int s) {
+ switch (s) {
+ case RGW_AMQP_STATUS_BROKER_NACK:
+ return "RGW_AMQP_STATUS_BROKER_NACK";
+ case RGW_AMQP_STATUS_CONNECTION_CLOSED:
+ return "RGW_AMQP_STATUS_CONNECTION_CLOSED";
+ case RGW_AMQP_STATUS_QUEUE_FULL:
+ return "RGW_AMQP_STATUS_QUEUE_FULL";
+ case RGW_AMQP_STATUS_MAX_INFLIGHT:
+ return "RGW_AMQP_STATUS_MAX_INFLIGHT";
+ case RGW_AMQP_STATUS_MANAGER_STOPPED:
+ return "RGW_AMQP_STATUS_MANAGER_STOPPED";
+ case RGW_AMQP_STATUS_CONN_ALLOC_FAILED:
+ return "RGW_AMQP_STATUS_CONN_ALLOC_FAILED";
+ case RGW_AMQP_STATUS_SOCKET_ALLOC_FAILED:
+ return "RGW_AMQP_STATUS_SOCKET_ALLOC_FAILED";
+ case RGW_AMQP_STATUS_SOCKET_OPEN_FAILED:
+ return "RGW_AMQP_STATUS_SOCKET_OPEN_FAILED";
+ case RGW_AMQP_STATUS_LOGIN_FAILED:
+ return "RGW_AMQP_STATUS_LOGIN_FAILED";
+ case RGW_AMQP_STATUS_CHANNEL_OPEN_FAILED:
+ return "RGW_AMQP_STATUS_CHANNEL_OPEN_FAILED";
+ case RGW_AMQP_STATUS_VERIFY_EXCHANGE_FAILED:
+ return "RGW_AMQP_STATUS_VERIFY_EXCHANGE_FAILED";
+ case RGW_AMQP_STATUS_Q_DECLARE_FAILED:
+ return "RGW_AMQP_STATUS_Q_DECLARE_FAILED";
+ case RGW_AMQP_STATUS_CONFIRM_DECLARE_FAILED:
+ return "RGW_AMQP_STATUS_CONFIRM_DECLARE_FAILED";
+ case RGW_AMQP_STATUS_CONSUME_DECLARE_FAILED:
+ return "RGW_AMQP_STATUS_CONSUME_DECLARE_FAILED";
+ }
+ return to_string((amqp_status_enum)s);
+}
+
+// check the result from calls and return if error (=null)
+#define RETURN_ON_ERROR(C, S, OK) \
+ if (!OK) { \
+ C->status = S; \
+ return C; \
+ }
+
+// in case of RPC calls, getting the RPC reply and return if an error is detected
+#define RETURN_ON_REPLY_ERROR(C, ST, S) { \
+ const auto reply = amqp_get_rpc_reply(ST); \
+ if (reply.reply_type != AMQP_RESPONSE_NORMAL) { \
+ C->status = S; \
+ C->reply_type = reply.reply_type; \
+ C->reply_code = reply_to_code(reply); \
+ return C; \
+ } \
+ }
+
+static const amqp_channel_t CHANNEL_ID = 1;
+static const amqp_channel_t CONFIRMING_CHANNEL_ID = 2;
+
+// utility function to create a connection, when the connection object already exists
+connection_ptr_t& create_connection(connection_ptr_t& conn, const amqp_connection_info& info) {
+ // pointer must be valid and not marked for deletion
+ ceph_assert(conn && !conn->marked_for_deletion);
+
+ // reset all status codes
+ conn->status = AMQP_STATUS_OK;
+ conn->reply_type = AMQP_RESPONSE_NORMAL;
+ conn->reply_code = RGW_AMQP_NO_REPLY_CODE;
+
+ auto state = amqp_new_connection();
+ if (!state) {
+ conn->status = RGW_AMQP_STATUS_CONN_ALLOC_FAILED;
+ return conn;
+ }
+ // make sure that the connection state is cleaned up in case of error
+ ConnectionCleaner state_guard(state);
+
+ // create and open socket
+ auto socket = amqp_tcp_socket_new(state);
+ if (!socket) {
+ conn->status = RGW_AMQP_STATUS_SOCKET_ALLOC_FAILED;
+ return conn;
+ }
+ const auto s = amqp_socket_open(socket, info.host, info.port);
+ if (s < 0) {
+ conn->status = RGW_AMQP_STATUS_SOCKET_OPEN_FAILED;
+ conn->reply_type = RGW_AMQP_RESPONSE_SOCKET_ERROR;
+ conn->reply_code = s;
+ return conn;
+ }
+
+ // login to broker
+ const auto reply = amqp_login(state,
+ info.vhost,
+ AMQP_DEFAULT_MAX_CHANNELS,
+ AMQP_DEFAULT_FRAME_SIZE,
+ 0, // no heartbeat TODO: add conf
+ AMQP_SASL_METHOD_PLAIN, // TODO: add other types of security
+ info.user,
+ info.password);
+ if (reply.reply_type != AMQP_RESPONSE_NORMAL) {
+ conn->status = RGW_AMQP_STATUS_LOGIN_FAILED;
+ conn->reply_type = reply.reply_type;
+ conn->reply_code = reply_to_code(reply);
+ return conn;
+ }
+
+ // open channels
+ {
+ const auto ok = amqp_channel_open(state, CHANNEL_ID);
+ RETURN_ON_ERROR(conn, RGW_AMQP_STATUS_CHANNEL_OPEN_FAILED, ok);
+ RETURN_ON_REPLY_ERROR(conn, state, RGW_AMQP_STATUS_CHANNEL_OPEN_FAILED);
+ }
+ {
+ const auto ok = amqp_channel_open(state, CONFIRMING_CHANNEL_ID);
+ RETURN_ON_ERROR(conn, RGW_AMQP_STATUS_CHANNEL_OPEN_FAILED, ok);
+ RETURN_ON_REPLY_ERROR(conn, state, RGW_AMQP_STATUS_CHANNEL_OPEN_FAILED);
+ }
+ {
+ const auto ok = amqp_confirm_select(state, CONFIRMING_CHANNEL_ID);
+ RETURN_ON_ERROR(conn, RGW_AMQP_STATUS_CONFIRM_DECLARE_FAILED, ok);
+ RETURN_ON_REPLY_ERROR(conn, state, RGW_AMQP_STATUS_CONFIRM_DECLARE_FAILED);
+ }
+
+ // verify that the topic exchange is there
+ // TODO: make this step optional
+ {
+ const auto ok = amqp_exchange_declare(state,
+ CHANNEL_ID,
+ amqp_cstring_bytes(conn->exchange.c_str()),
+ amqp_cstring_bytes("topic"),
+ 1, // passive - exchange must already exist on broker
+ 1, // durable
+ 0, // dont auto-delete
+ 0, // not internal
+ amqp_empty_table);
+ RETURN_ON_ERROR(conn, RGW_AMQP_STATUS_VERIFY_EXCHANGE_FAILED, ok);
+ RETURN_ON_REPLY_ERROR(conn, state, RGW_AMQP_STATUS_VERIFY_EXCHANGE_FAILED);
+ }
+ {
+ // create queue for confirmations
+ const auto queue_ok = amqp_queue_declare(state,
+ CHANNEL_ID, // use the regular channel for this call
+ amqp_empty_bytes, // let broker allocate queue name
+ 0, // not passive - create the queue
+ 0, // not durable
+ 1, // exclusive
+ 1, // auto-delete
+ amqp_empty_table // not args TODO add args from conf: TTL, max length etc.
+ );
+ RETURN_ON_ERROR(conn, RGW_AMQP_STATUS_Q_DECLARE_FAILED, queue_ok);
+ RETURN_ON_REPLY_ERROR(conn, state, RGW_AMQP_STATUS_Q_DECLARE_FAILED);
+
+ // define consumption for connection
+ const auto consume_ok = amqp_basic_consume(state,
+ CONFIRMING_CHANNEL_ID,
+ queue_ok->queue,
+ amqp_empty_bytes, // broker will generate consumer tag
+ 1, // messages sent from client are never routed back
+ 1, // client does not ack thr acks
+ 1, // exclusive access to queue
+ amqp_empty_table // no parameters
+ );
+
+ RETURN_ON_ERROR(conn, RGW_AMQP_STATUS_CONSUME_DECLARE_FAILED, consume_ok);
+ RETURN_ON_REPLY_ERROR(conn, state, RGW_AMQP_STATUS_CONSUME_DECLARE_FAILED);
+ // broker generated consumer_tag could be used to cancel sending of n/acks from broker - not needed
+
+ state_guard.reset();
+ conn->state = state;
+ conn->reply_to_queue = amqp_bytes_malloc_dup(queue_ok->queue);
+ return conn;
+ }
+}
+
+// utility function to create a new connection
+connection_ptr_t create_new_connection(const amqp_connection_info& info,
+ const std::string& exchange, CephContext* cct) {
+ // create connection state
+ connection_ptr_t conn = new connection_t;
+ conn->exchange = exchange;
+ conn->user.assign(info.user);
+ conn->password.assign(info.password);
+ conn->cct = cct;
+ return create_connection(conn, info);
+}
+
+/// struct used for holding messages in the message queue
+struct message_wrapper_t {
+ connection_ptr_t conn;
+ std::string topic;
+ std::string message;
+ reply_callback_t cb;
+
+ message_wrapper_t(connection_ptr_t& _conn,
+ const std::string& _topic,
+ const std::string& _message,
+ reply_callback_t _cb) : conn(_conn), topic(_topic), message(_message), cb(_cb) {}
+};
+
+
+typedef std::unordered_map<connection_id_t, connection_ptr_t, connection_id_t::hasher> ConnectionList;
+typedef boost::lockfree::queue<message_wrapper_t*, boost::lockfree::fixed_sized<true>> MessageQueue;
+
+// macros used inside a loop where an iterator is either incremented or erased
+#define INCREMENT_AND_CONTINUE(IT) \
+ ++IT; \
+ continue;
+
+#define ERASE_AND_CONTINUE(IT,CONTAINER) \
+ IT=CONTAINER.erase(IT); \
+ --connection_count; \
+ continue;
+
+class Manager {
+public:
+ const size_t max_connections;
+ const size_t max_inflight;
+ const size_t max_queue;
+private:
+ std::atomic<size_t> connection_count;
+ bool stopped;
+ struct timeval read_timeout;
+ ConnectionList connections;
+ MessageQueue messages;
+ std::atomic<size_t> queued;
+ std::atomic<size_t> dequeued;
+ CephContext* const cct;
+ mutable std::mutex connections_lock;
+ std::thread runner;
+
+ void publish_internal(message_wrapper_t* message) {
+ const std::unique_ptr<message_wrapper_t> msg_owner(message);
+ auto& conn = message->conn;
+
+ if (!conn->is_ok()) {
+ // connection had an issue while message was in the queue
+ // TODO add error stats
+ ldout(conn->cct, 1) << "AMQP publish: connection had an issue while message was in the queue" << dendl;
+ if (message->cb) {
+ message->cb(RGW_AMQP_STATUS_CONNECTION_CLOSED);
+ }
+ return;
+ }
+
+ if (message->cb == nullptr) {
+ // TODO add error stats
+ const auto rc = amqp_basic_publish(conn->state,
+ CHANNEL_ID,
+ amqp_cstring_bytes(conn->exchange.c_str()),
+ amqp_cstring_bytes(message->topic.c_str()),
+ 1, // mandatory, TODO: take from conf
+ 0, // not immediate
+ nullptr,
+ amqp_cstring_bytes(message->message.c_str()));
+ if (rc == AMQP_STATUS_OK) {
+ ldout(conn->cct, 20) << "AMQP publish (no callback): OK" << dendl;
+ return;
+ }
+ ldout(conn->cct, 1) << "AMQP publish (no callback): failed with error " << status_to_string(rc) << dendl;
+ // an error occurred, close connection
+ // it will be retied by the main loop
+ conn->destroy(rc);
+ return;
+ }
+
+ amqp_basic_properties_t props;
+ props._flags =
+ AMQP_BASIC_DELIVERY_MODE_FLAG |
+ AMQP_BASIC_REPLY_TO_FLAG;
+ props.delivery_mode = 2; // persistent delivery TODO take from conf
+ props.reply_to = conn->reply_to_queue;
+
+ const auto rc = amqp_basic_publish(conn->state,
+ CONFIRMING_CHANNEL_ID,
+ amqp_cstring_bytes(conn->exchange.c_str()),
+ amqp_cstring_bytes(message->topic.c_str()),
+ 1, // mandatory, TODO: take from conf
+ 0, // not immediate
+ &props,
+ amqp_cstring_bytes(message->message.c_str()));
+
+ if (rc == AMQP_STATUS_OK) {
+ auto const q_len = conn->callbacks.size();
+ if (q_len < max_inflight) {
+ ldout(conn->cct, 20) << "AMQP publish (with callback, tag=" << conn->delivery_tag << "): OK. Queue has: " << q_len << " callbacks" << dendl;
+ conn->callbacks.emplace_back(conn->delivery_tag++, message->cb);
+ } else {
+ // immediately invoke callback with error
+ ldout(conn->cct, 1) << "AMQP publish (with callback): failed with error: callback queue full" << dendl;
+ message->cb(RGW_AMQP_STATUS_MAX_INFLIGHT);
+ }
+ } else {
+ // an error occurred, close connection
+ // it will be retied by the main loop
+ ldout(conn->cct, 1) << "AMQP publish (with callback): failed with error: " << status_to_string(rc) << dendl;
+ conn->destroy(rc);
+ // immediately invoke callback with error
+ message->cb(rc);
+ }
+ }
+
+ // the managers thread:
+ // (1) empty the queue of messages to be published
+ // (2) loop over all connections and read acks
+ // (3) manages deleted connections
+ // (4) TODO reconnect on connection errors
+ // (5) TODO cleanup timedout callbacks
+ void run() {
+ amqp_frame_t frame;
+ while (!stopped) {
+
+ // publish all messages in the queue
+ const auto count = messages.consume_all(std::bind(&Manager::publish_internal, this, std::placeholders::_1));
+ dequeued += count;
+ ConnectionList::iterator conn_it;
+ ConnectionList::const_iterator end_it;
+ {
+ // thread safe access to the connection list
+ // once the iterators are fetched they are guaranteed to remain valid
+ std::lock_guard lock(connections_lock);
+ conn_it = connections.begin();
+ end_it = connections.end();
+ }
+ auto incoming_message = false;
+ // loop over all connections to read acks
+ for (;conn_it != end_it;) {
+
+ auto& conn = conn_it->second;
+ // delete the connection if marked for deletion
+ if (conn->marked_for_deletion) {
+ ldout(conn->cct, 10) << "AMQP run: connection is deleted" << dendl;
+ conn->destroy(RGW_AMQP_STATUS_CONNECTION_CLOSED);
+ std::lock_guard lock(connections_lock);
+ // erase is safe - does not invalidate any other iterator
+ // lock so no insertion happens at the same time
+ ERASE_AND_CONTINUE(conn_it, connections);
+ }
+
+ // try to reconnect the connection if it has an error
+ if (!conn->is_ok()) {
+ // pointers are used temporarily inside the amqp_connection_info object
+ // as read-only values, hence the assignment, and const_cast are safe here
+ amqp_connection_info info;
+ info.host = const_cast<char*>(conn_it->first.host.c_str());
+ info.port = conn_it->first.port;
+ info.vhost = const_cast<char*>(conn_it->first.vhost.c_str());
+ info.user = const_cast<char*>(conn->user.c_str());
+ info.password = const_cast<char*>(conn->password.c_str());
+ ldout(conn->cct, 20) << "AMQP run: retry connection" << dendl;
+ if (create_connection(conn, info)->is_ok() == false) {
+ ldout(conn->cct, 10) << "AMQP run: connection (" << to_string(conn_it->first) << ") retry failed" << dendl;
+ // TODO: add error counter for failed retries
+ // TODO: add exponential backoff for retries
+ } else {
+ ldout(conn->cct, 10) << "AMQP run: connection (" << to_string(conn_it->first) << ") retry successfull" << dendl;
+ }
+ INCREMENT_AND_CONTINUE(conn_it);
+ }
+
+ const auto rc = amqp_simple_wait_frame_noblock(conn->state, &frame, &read_timeout);
+
+ if (rc == AMQP_STATUS_TIMEOUT) {
+ // TODO mark connection as idle
+ INCREMENT_AND_CONTINUE(conn_it);
+ }
+
+ // this is just to prevent spinning idle, does not indicate that a message
+ // was successfully processed or not
+ incoming_message = true;
+
+ // check if error occurred that require reopening the connection
+ if (rc != AMQP_STATUS_OK) {
+ // an error occurred, close connection
+ // it will be retied by the main loop
+ ldout(conn->cct, 1) << "AMQP run: connection read error: " << status_to_string(rc) << dendl;
+ conn->destroy(rc);
+ INCREMENT_AND_CONTINUE(conn_it);
+ }
+
+ if (frame.frame_type != AMQP_FRAME_METHOD) {
+ ldout(conn->cct, 10) << "AMQP run: ignoring non n/ack messages" << dendl;
+ // handler is for publish confirmation only - handle only method frames
+ // TODO: add a counter
+ INCREMENT_AND_CONTINUE(conn_it);
+ }
+
+ uint64_t tag;
+ bool multiple;
+ int result;
+
+ switch (frame.payload.method.id) {
+ case AMQP_BASIC_ACK_METHOD:
+ {
+ result = AMQP_STATUS_OK;
+ const auto ack = (amqp_basic_ack_t*)frame.payload.method.decoded;
+ ceph_assert(ack);
+ tag = ack->delivery_tag;
+ multiple = ack->multiple;
+ break;
+ }
+ case AMQP_BASIC_NACK_METHOD:
+ {
+ result = RGW_AMQP_STATUS_BROKER_NACK;
+ const auto nack = (amqp_basic_nack_t*)frame.payload.method.decoded;
+ ceph_assert(nack);
+ tag = nack->delivery_tag;
+ multiple = nack->multiple;
+ break;
+ }
+ case AMQP_CONNECTION_CLOSE_METHOD:
+ // TODO on channel close, no need to reopen the connection
+ case AMQP_CHANNEL_CLOSE_METHOD:
+ {
+ // other side closed the connection, no need to continue
+ ldout(conn->cct, 10) << "AMQP run: connection was closed by broker" << dendl;
+ conn->destroy(rc);
+ INCREMENT_AND_CONTINUE(conn_it);
+ }
+ case AMQP_BASIC_RETURN_METHOD:
+ // message was not delivered, returned to sender
+ // TODO: add a counter
+ ldout(conn->cct, 10) << "AMQP run: message delivery error" << dendl;
+ INCREMENT_AND_CONTINUE(conn_it);
+ break;
+ default:
+ // unexpected method
+ // TODO: add a counter
+ ldout(conn->cct, 10) << "AMQP run: unexpected message" << dendl;
+ INCREMENT_AND_CONTINUE(conn_it);
+ }
+
+ const auto& callbacks_end = conn->callbacks.end();
+ const auto& callbacks_begin = conn->callbacks.begin();
+ const auto tag_it = std::find(callbacks_begin, callbacks_end, tag);
+ if (tag_it != callbacks_end) {
+ if (multiple) {
+ // n/ack all up to (and including) the tag
+ ldout(conn->cct, 20) << "AMQP run: multiple n/acks received with tag=" << tag << " and result=" << result << dendl;
+ auto it = callbacks_begin;
+ while (it->tag <= tag && it != conn->callbacks.end()) {
+ ldout(conn->cct, 20) << "AMQP run: invoking callback with tag=" << it->tag << dendl;
+ it->cb(result);
+ it = conn->callbacks.erase(it);
+ }
+ } else {
+ // n/ack a specific tag
+ ldout(conn->cct, 20) << "AMQP run: n/ack received, invoking callback with tag=" << tag << " and result=" << result << dendl;
+ tag_it->cb(result);
+ conn->callbacks.erase(tag_it);
+ }
+ } else {
+ // TODO add counter for acks with no callback
+ ldout(conn->cct, 10) << "AMQP run: unsolicited n/ack received with tag=" << tag << dendl;
+ }
+ // just increment the iterator
+ ++conn_it;
+ }
+ // if no messages were received or published, sleep for 100ms
+ if (count == 0 && !incoming_message) {
+ std::this_thread::sleep_for(std::chrono::milliseconds(100));
+ }
+ }
+ }
+
+ // used in the dtor for message cleanup
+ static void delete_message(const message_wrapper_t* message) {
+ delete message;
+ }
+
+public:
+ Manager(size_t _max_connections,
+ size_t _max_inflight,
+ size_t _max_queue,
+ long _usec_timeout,
+ CephContext* _cct) :
+ max_connections(_max_connections),
+ max_inflight(_max_inflight),
+ max_queue(_max_queue),
+ connection_count(0),
+ stopped(false),
+ read_timeout{0, _usec_timeout},
+ connections(_max_connections),
+ messages(max_queue),
+ queued(0),
+ dequeued(0),
+ cct(_cct),
+ runner(&Manager::run, this) {
+ // The hashmap has "max connections" as the initial number of buckets,
+ // and allows for 10 collisions per bucket before rehash.
+ // This is to prevent rehashing so that iterators are not invalidated
+ // when a new connection is added.
+ connections.max_load_factor(10.0);
+ // give the runner thread a name for easier debugging
+ const auto rc = ceph_pthread_setname(runner.native_handle(), "amqp_manager");
+ ceph_assert(rc==0);
+ }
+
+ // non copyable
+ Manager(const Manager&) = delete;
+ const Manager& operator=(const Manager&) = delete;
+
+ // stop the main thread
+ void stop() {
+ stopped = true;
+ }
+
+ // disconnect from a broker
+ bool disconnect(connection_ptr_t& conn) {
+ if (!conn || stopped) {
+ return false;
+ }
+ conn->marked_for_deletion = true;
+ return true;
+ }
+
+ // connect to a broker, or reuse an existing connection if already connected
+ connection_ptr_t connect(const std::string& url, const std::string& exchange) {
+ if (stopped) {
+ // TODO: increment counter
+ ldout(cct, 1) << "AMQP connect: manager is stopped" << dendl;
+ return nullptr;
+ }
+
+ struct amqp_connection_info info;
+ // cache the URL so that parsing could happen in-place
+ std::vector<char> url_cache(url.c_str(), url.c_str()+url.size()+1);
+ if (AMQP_STATUS_OK != amqp_parse_url(url_cache.data(), &info)) {
+ // TODO: increment counter
+ ldout(cct, 1) << "AMQP connect: URL parsing failed" << dendl;
+ return nullptr;
+ }
+
+ const connection_id_t id(info);
+ std::lock_guard lock(connections_lock);
+ const auto it = connections.find(id);
+ if (it != connections.end()) {
+ if (it->second->marked_for_deletion) {
+ // TODO: increment counter
+ ldout(cct, 1) << "AMQP connect: endpoint marked for deletion" << dendl;
+ return nullptr;
+ } else if (it->second->exchange != exchange) {
+ // TODO: increment counter
+ ldout(cct, 1) << "AMQP connect: exchange mismatch" << dendl;
+ return nullptr;
+ }
+ // connection found - return even if non-ok
+ ldout(cct, 20) << "AMQP connect: connection found" << dendl;
+ return it->second;
+ }
+
+ // connection not found, creating a new one
+ if (connection_count >= max_connections) {
+ // TODO: increment counter
+ ldout(cct, 1) << "AMQP connect: max connections exceeded" << dendl;
+ return nullptr;
+ }
+ const auto conn = create_new_connection(info, exchange, cct);
+ // create_new_connection must always return a connection object
+ // even if error occurred during creation.
+ // in such a case the creation will be retried in the main thread
+ ceph_assert(conn);
+ ++connection_count;
+ ldout(cct, 10) << "AMQP connect: new connection is created. Total connections: " << connection_count << dendl;
+ ldout(cct, 10) << "AMQP connect: new connection status is: " << status_to_string(conn->status) << dendl;
+ return connections.emplace(id, conn).first->second;
+ }
+
+ // TODO publish with confirm is needed in "none" case as well, cb should be invoked publish is ok (no ack)
+ int publish(connection_ptr_t& conn,
+ const std::string& topic,
+ const std::string& message) {
+ if (stopped) {
+ return RGW_AMQP_STATUS_MANAGER_STOPPED;
+ }
+ if (!conn || !conn->is_ok()) {
+ return RGW_AMQP_STATUS_CONNECTION_CLOSED;
+ }
+ if (messages.push(new message_wrapper_t(conn, topic, message, nullptr))) {
+ ++queued;
+ return AMQP_STATUS_OK;
+ }
+ return RGW_AMQP_STATUS_QUEUE_FULL;
+ }
+
+ int publish_with_confirm(connection_ptr_t& conn,
+ const std::string& topic,
+ const std::string& message,
+ reply_callback_t cb) {
+ if (stopped) {
+ return RGW_AMQP_STATUS_MANAGER_STOPPED;
+ }
+ if (!conn || !conn->is_ok()) {
+ return RGW_AMQP_STATUS_CONNECTION_CLOSED;
+ }
+ if (messages.push(new message_wrapper_t(conn, topic, message, cb))) {
+ ++queued;
+ return AMQP_STATUS_OK;
+ }
+ return RGW_AMQP_STATUS_QUEUE_FULL;
+ }
+
+ // dtor wait for thread to stop
+ // then connection are cleaned-up
+ ~Manager() {
+ stopped = true;
+ runner.join();
+ messages.consume_all(delete_message);
+ }
+
+ // get the number of connections
+ size_t get_connection_count() const {
+ return connection_count;
+ }
+
+ // get the number of in-flight messages
+ size_t get_inflight() const {
+ size_t sum = 0;
+ std::lock_guard lock(connections_lock);
+ std::for_each(connections.begin(), connections.end(), [&sum](auto& conn_pair) {
+ sum += conn_pair.second->callbacks.size();
+ });
+ return sum;
+ }
+
+ // running counter of the queued messages
+ size_t get_queued() const {
+ return queued;
+ }
+
+ // running counter of the dequeued messages
+ size_t get_dequeued() const {
+ return dequeued;
+ }
+};
+
+// singleton manager
+// note that the manager itself is not a singleton, and multiple instances may co-exist
+// TODO make the pointer atomic in allocation and deallocation to avoid race conditions
+static Manager* s_manager = nullptr;
+
+static const size_t MAX_CONNECTIONS_DEFAULT = 256;
+static const size_t MAX_INFLIGHT_DEFAULT = 8192;
+static const size_t MAX_QUEUE_DEFAULT = 8192;
+
+bool init(CephContext* cct) {
+ if (s_manager) {
+ return false;
+ }
+ // TODO: take conf from CephContext
+ s_manager = new Manager(MAX_CONNECTIONS_DEFAULT, MAX_INFLIGHT_DEFAULT, MAX_QUEUE_DEFAULT, 100, cct);
+ return true;
+}
+
+void shutdown() {
+ delete s_manager;
+ s_manager = nullptr;
+}
+
+connection_ptr_t connect(const std::string& url, const std::string& exchange) {
+ if (!s_manager) return nullptr;
+ return s_manager->connect(url, exchange);
+}
+
+int publish(connection_ptr_t& conn,
+ const std::string& topic,
+ const std::string& message) {
+ if (!s_manager) return RGW_AMQP_STATUS_MANAGER_STOPPED;
+ return s_manager->publish(conn, topic, message);
+}
+
+int publish_with_confirm(connection_ptr_t& conn,
+ const std::string& topic,
+ const std::string& message,
+ reply_callback_t cb) {
+ if (!s_manager) return RGW_AMQP_STATUS_MANAGER_STOPPED;
+ return s_manager->publish_with_confirm(conn, topic, message, cb);
+}
+
+size_t get_connection_count() {
+ if (!s_manager) return 0;
+ return s_manager->get_connection_count();
+}
+
+size_t get_inflight() {
+ if (!s_manager) return 0;
+ return s_manager->get_inflight();
+}
+
+size_t get_queued() {
+ if (!s_manager) return 0;
+ return s_manager->get_queued();
+}
+
+size_t get_dequeued() {
+ if (!s_manager) return 0;
+ return s_manager->get_dequeued();
+}
+
+size_t get_max_connections() {
+ if (!s_manager) return MAX_CONNECTIONS_DEFAULT;
+ return s_manager->max_connections;
+}
+
+size_t get_max_inflight() {
+ if (!s_manager) return MAX_INFLIGHT_DEFAULT;
+ return s_manager->max_inflight;
+}
+
+size_t get_max_queue() {
+ if (!s_manager) return MAX_QUEUE_DEFAULT;
+ return s_manager->max_queue;
+}
+
+bool disconnect(connection_ptr_t& conn) {
+ if (!s_manager) return false;
+ return s_manager->disconnect(conn);
+}
+
+} // namespace amqp
+
diff --git a/src/rgw/rgw_amqp.h b/src/rgw/rgw_amqp.h
new file mode 100644
index 00000000..938bdade
--- /dev/null
+++ b/src/rgw/rgw_amqp.h
@@ -0,0 +1,77 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <string>
+#include <functional>
+#include <boost/smart_ptr/intrusive_ptr.hpp>
+
+class CephContext;
+
+namespace rgw::amqp {
+// forward declaration of connection object
+struct connection_t;
+
+typedef boost::intrusive_ptr<connection_t> connection_ptr_t;
+
+// required interfaces needed so that connection_t could be used inside boost::intrusive_ptr
+void intrusive_ptr_add_ref(const connection_t* p);
+void intrusive_ptr_release(const connection_t* p);
+
+// the reply callback is expected to get an integer parameter
+// indicating the result, and not to return anything
+typedef std::function<void(int)> reply_callback_t;
+
+// initialize the amqp manager
+bool init(CephContext* cct);
+
+// shutdown the amqp manager
+void shutdown();
+
+// connect to an amqp endpoint
+connection_ptr_t connect(const std::string& url, const std::string& exchange);
+
+// publish a message over a connection that was already created
+int publish(connection_ptr_t& conn,
+ const std::string& topic,
+ const std::string& message);
+
+// publish a message over a connection that was already created
+// and pass a callback that will be invoked (async) when broker confirms
+// receiving the message
+int publish_with_confirm(connection_ptr_t& conn,
+ const std::string& topic,
+ const std::string& message,
+ reply_callback_t cb);
+
+// convert the integer status returned from the "publish" function to a string
+std::string status_to_string(int s);
+
+// number of connections
+size_t get_connection_count();
+
+// return the number of messages that were sent
+// to broker, but were not yet acked/nacked/timedout
+size_t get_inflight();
+
+// running counter of successfully queued messages
+size_t get_queued();
+
+// running counter of dequeued messages
+size_t get_dequeued();
+
+// number of maximum allowed connections
+size_t get_max_connections();
+
+// number of maximum allowed inflight messages
+size_t get_max_inflight();
+
+// maximum number of messages in the queue
+size_t get_max_queue();
+
+// disconnect from an amqp broker
+bool disconnect(connection_ptr_t& conn);
+
+}
+
diff --git a/src/rgw/rgw_arn.cc b/src/rgw/rgw_arn.cc
new file mode 100644
index 00000000..d8b4ed39
--- /dev/null
+++ b/src/rgw/rgw_arn.cc
@@ -0,0 +1,385 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#include "rgw_arn.h"
+#include "rgw_common.h"
+#include <regex>
+
+namespace rgw {
+
+namespace {
+boost::optional<Partition> to_partition(const smatch::value_type& p,
+ bool wildcards) {
+ if (p == "aws") {
+ return Partition::aws;
+ } else if (p == "aws-cn") {
+ return Partition::aws_cn;
+ } else if (p == "aws-us-gov") {
+ return Partition::aws_us_gov;
+ } else if (p == "*" && wildcards) {
+ return Partition::wildcard;
+ } else {
+ return boost::none;
+ }
+
+ ceph_abort();
+}
+
+boost::optional<Service> to_service(const smatch::value_type& s,
+ bool wildcards) {
+ static const unordered_map<string, Service> services = {
+ { "acm", Service::acm },
+ { "apigateway", Service::apigateway },
+ { "appstream", Service::appstream },
+ { "artifact", Service::artifact },
+ { "autoscaling", Service::autoscaling },
+ { "aws-marketplace", Service::aws_marketplace },
+ { "aws-marketplace-management",
+ Service::aws_marketplace_management },
+ { "aws-portal", Service::aws_portal },
+ { "cloudformation", Service::cloudformation },
+ { "cloudfront", Service::cloudfront },
+ { "cloudhsm", Service::cloudhsm },
+ { "cloudsearch", Service::cloudsearch },
+ { "cloudtrail", Service::cloudtrail },
+ { "cloudwatch", Service::cloudwatch },
+ { "codebuild", Service::codebuild },
+ { "codecommit", Service::codecommit },
+ { "codedeploy", Service::codedeploy },
+ { "codepipeline", Service::codepipeline },
+ { "cognito-identity", Service::cognito_identity },
+ { "cognito-idp", Service::cognito_idp },
+ { "cognito-sync", Service::cognito_sync },
+ { "config", Service::config },
+ { "datapipeline", Service::datapipeline },
+ { "devicefarm", Service::devicefarm },
+ { "directconnect", Service::directconnect },
+ { "dms", Service::dms },
+ { "ds", Service::ds },
+ { "dynamodb", Service::dynamodb },
+ { "ec2", Service::ec2 },
+ { "ecr", Service::ecr },
+ { "ecs", Service::ecs },
+ { "elasticache", Service::elasticache },
+ { "elasticbeanstalk", Service::elasticbeanstalk },
+ { "elasticfilesystem", Service::elasticfilesystem },
+ { "elasticloadbalancing", Service::elasticloadbalancing },
+ { "elasticmapreduce", Service::elasticmapreduce },
+ { "elastictranscoder", Service::elastictranscoder },
+ { "es", Service::es },
+ { "events", Service::events },
+ { "firehose", Service::firehose },
+ { "gamelift", Service::gamelift },
+ { "glacier", Service::glacier },
+ { "health", Service::health },
+ { "iam", Service::iam },
+ { "importexport", Service::importexport },
+ { "inspector", Service::inspector },
+ { "iot", Service::iot },
+ { "kinesis", Service::kinesis },
+ { "kinesisanalytics", Service::kinesisanalytics },
+ { "kms", Service::kms },
+ { "lambda", Service::lambda },
+ { "lightsail", Service::lightsail },
+ { "logs", Service::logs },
+ { "machinelearning", Service::machinelearning },
+ { "mobileanalytics", Service::mobileanalytics },
+ { "mobilehub", Service::mobilehub },
+ { "opsworks", Service::opsworks },
+ { "opsworks-cm", Service::opsworks_cm },
+ { "polly", Service::polly },
+ { "rds", Service::rds },
+ { "redshift", Service::redshift },
+ { "route53", Service::route53 },
+ { "route53domains", Service::route53domains },
+ { "s3", Service::s3 },
+ { "sdb", Service::sdb },
+ { "servicecatalog", Service::servicecatalog },
+ { "ses", Service::ses },
+ { "sns", Service::sns },
+ { "sqs", Service::sqs },
+ { "ssm", Service::ssm },
+ { "states", Service::states },
+ { "storagegateway", Service::storagegateway },
+ { "sts", Service::sts },
+ { "support", Service::support },
+ { "swf", Service::swf },
+ { "trustedadvisor", Service::trustedadvisor },
+ { "waf", Service::waf },
+ { "workmail", Service::workmail },
+ { "workspaces", Service::workspaces }};
+
+ if (wildcards && s == "*") {
+ return Service::wildcard;
+ }
+
+ auto i = services.find(s);
+ if (i == services.end()) {
+ return boost::none;
+ } else {
+ return i->second;
+ }
+}
+}
+ARN::ARN(const rgw_obj& o)
+ : partition(Partition::aws),
+ service(Service::s3),
+ region(),
+ account(o.bucket.tenant),
+ resource(o.bucket.name)
+{
+ resource.push_back('/');
+ resource.append(o.key.name);
+}
+
+ARN::ARN(const rgw_bucket& b)
+ : partition(Partition::aws),
+ service(Service::s3),
+ region(),
+ account(b.tenant),
+ resource(b.name) { }
+
+ARN::ARN(const rgw_bucket& b, const std::string& o)
+ : partition(Partition::aws),
+ service(Service::s3),
+ region(),
+ account(b.tenant),
+ resource(b.name) {
+ resource.push_back('/');
+ resource.append(o);
+}
+
+ARN::ARN(const std::string& resource_name, const std::string& type, const std::string& tenant, bool has_path)
+ : partition(Partition::aws),
+ service(Service::iam),
+ region(),
+ account(tenant),
+ resource(type) {
+ if (! has_path)
+ resource.push_back('/');
+ resource.append(resource_name);
+}
+
+boost::optional<ARN> ARN::parse(const std::string& s, bool wildcards) {
+ static const std::regex rx_wild("arn:([^:]*):([^:]*):([^:]*):([^:]*):([^:]*)",
+ std::regex_constants::ECMAScript |
+ std::regex_constants::optimize);
+ static const std::regex rx_no_wild(
+ "arn:([^:*]*):([^:*]*):([^:*]*):([^:*]*):(.*)",
+ std::regex_constants::ECMAScript |
+ std::regex_constants::optimize);
+
+ smatch match;
+
+ if ((s == "*") && wildcards) {
+ return ARN(Partition::wildcard, Service::wildcard, "*", "*", "*");
+ } else if (regex_match(s, match, wildcards ? rx_wild : rx_no_wild) &&
+ match.size() == 6) {
+ if (auto p = to_partition(match[1], wildcards)) {
+ if (auto s = to_service(match[2], wildcards)) {
+ return ARN(*p, *s, match[3], match[4], match[5]);
+ }
+ }
+ }
+ return boost::none;
+}
+
+std::string ARN::to_string() const {
+ std::string s{"arn:"};
+
+ if (partition == Partition::aws) {
+ s.append("aws:");
+ } else if (partition == Partition::aws_cn) {
+ s.append("aws-cn:");
+ } else if (partition == Partition::aws_us_gov) {
+ s.append("aws-us-gov:");
+ } else {
+ s.append("*:");
+ }
+
+ static const std::unordered_map<Service, string> services = {
+ { Service::acm, "acm" },
+ { Service::apigateway, "apigateway" },
+ { Service::appstream, "appstream" },
+ { Service::artifact, "artifact" },
+ { Service::autoscaling, "autoscaling" },
+ { Service::aws_marketplace, "aws-marketplace" },
+ { Service::aws_marketplace_management, "aws-marketplace-management" },
+ { Service::aws_portal, "aws-portal" },
+ { Service::cloudformation, "cloudformation" },
+ { Service::cloudfront, "cloudfront" },
+ { Service::cloudhsm, "cloudhsm" },
+ { Service::cloudsearch, "cloudsearch" },
+ { Service::cloudtrail, "cloudtrail" },
+ { Service::cloudwatch, "cloudwatch" },
+ { Service::codebuild, "codebuild" },
+ { Service::codecommit, "codecommit" },
+ { Service::codedeploy, "codedeploy" },
+ { Service::codepipeline, "codepipeline" },
+ { Service::cognito_identity, "cognito-identity" },
+ { Service::cognito_idp, "cognito-idp" },
+ { Service::cognito_sync, "cognito-sync" },
+ { Service::config, "config" },
+ { Service::datapipeline, "datapipeline" },
+ { Service::devicefarm, "devicefarm" },
+ { Service::directconnect, "directconnect" },
+ { Service::dms, "dms" },
+ { Service::ds, "ds" },
+ { Service::dynamodb, "dynamodb" },
+ { Service::ec2, "ec2" },
+ { Service::ecr, "ecr" },
+ { Service::ecs, "ecs" },
+ { Service::elasticache, "elasticache" },
+ { Service::elasticbeanstalk, "elasticbeanstalk" },
+ { Service::elasticfilesystem, "elasticfilesystem" },
+ { Service::elasticloadbalancing, "elasticloadbalancing" },
+ { Service::elasticmapreduce, "elasticmapreduce" },
+ { Service::elastictranscoder, "elastictranscoder" },
+ { Service::es, "es" },
+ { Service::events, "events" },
+ { Service::firehose, "firehose" },
+ { Service::gamelift, "gamelift" },
+ { Service::glacier, "glacier" },
+ { Service::health, "health" },
+ { Service::iam, "iam" },
+ { Service::importexport, "importexport" },
+ { Service::inspector, "inspector" },
+ { Service::iot, "iot" },
+ { Service::kinesis, "kinesis" },
+ { Service::kinesisanalytics, "kinesisanalytics" },
+ { Service::kms, "kms" },
+ { Service::lambda, "lambda" },
+ { Service::lightsail, "lightsail" },
+ { Service::logs, "logs" },
+ { Service::machinelearning, "machinelearning" },
+ { Service::mobileanalytics, "mobileanalytics" },
+ { Service::mobilehub, "mobilehub" },
+ { Service::opsworks, "opsworks" },
+ { Service::opsworks_cm, "opsworks-cm" },
+ { Service::polly, "polly" },
+ { Service::rds, "rds" },
+ { Service::redshift, "redshift" },
+ { Service::route53, "route53" },
+ { Service::route53domains, "route53domains" },
+ { Service::s3, "s3" },
+ { Service::sdb, "sdb" },
+ { Service::servicecatalog, "servicecatalog" },
+ { Service::ses, "ses" },
+ { Service::sns, "sns" },
+ { Service::sqs, "sqs" },
+ { Service::ssm, "ssm" },
+ { Service::states, "states" },
+ { Service::storagegateway, "storagegateway" },
+ { Service::sts, "sts" },
+ { Service::support, "support" },
+ { Service::swf, "swf" },
+ { Service::trustedadvisor, "trustedadvisor" },
+ { Service::waf, "waf" },
+ { Service::workmail, "workmail" },
+ { Service::workspaces, "workspaces" }};
+
+ auto i = services.find(service);
+ if (i != services.end()) {
+ s.append(i->second);
+ } else {
+ s.push_back('*');
+ }
+ s.push_back(':');
+
+ s.append(region);
+ s.push_back(':');
+
+ s.append(account);
+ s.push_back(':');
+
+ s.append(resource);
+
+ return s;
+}
+
+bool operator ==(const ARN& l, const ARN& r) {
+ return ((l.partition == r.partition) &&
+ (l.service == r.service) &&
+ (l.region == r.region) &&
+ (l.account == r.account) &&
+ (l.resource == r.resource));
+}
+bool operator <(const ARN& l, const ARN& r) {
+ return ((l.partition < r.partition) ||
+ (l.service < r.service) ||
+ (l.region < r.region) ||
+ (l.account < r.account) ||
+ (l.resource < r.resource));
+}
+
+// The candidate is not allowed to have wildcards. The only way to
+// do that sanely would be to use unification rather than matching.
+bool ARN::match(const ARN& candidate) const {
+ if ((candidate.partition == Partition::wildcard) ||
+ (partition != candidate.partition && partition
+ != Partition::wildcard)) {
+ return false;
+ }
+
+ if ((candidate.service == Service::wildcard) ||
+ (service != candidate.service && service != Service::wildcard)) {
+ return false;
+ }
+
+ if (!match_policy(region, candidate.region, MATCH_POLICY_ARN)) {
+ return false;
+ }
+
+ if (!match_policy(account, candidate.account, MATCH_POLICY_ARN)) {
+ return false;
+ }
+
+ if (!match_policy(resource, candidate.resource, MATCH_POLICY_RESOURCE)) {
+ return false;
+ }
+
+ return true;
+}
+
+boost::optional<ARNResource> ARNResource::parse(const std::string& s) {
+ static const std::regex rx("^([^:/]*)[:/]?([^:/]*)?[:/]?(.*)$",
+ std::regex_constants::ECMAScript |
+ std::regex_constants::optimize);
+ std::smatch match;
+ if (!regex_match(s, match, rx)) {
+ return boost::none;
+ }
+ if (match[2].str().empty() && match[3].str().empty()) {
+ // only resource exist
+ return rgw::ARNResource("", match[1], "");
+ }
+
+ // resource type also exist, and cannot be wildcard
+ if (match[1] != std::string(wildcard)) {
+ // resource type cannot be wildcard
+ return rgw::ARNResource(match[1], match[2], match[3]);
+ }
+
+ return boost::none;
+}
+
+std::string ARNResource::to_string() const {
+ std::string s;
+
+ if (!resource_type.empty()) {
+ s.append(resource_type);
+ s.push_back(':');
+
+ s.append(resource);
+ s.push_back(':');
+
+ s.append(qualifier);
+ } else {
+ s.append(resource);
+ }
+
+ return s;
+}
+
+}
+
diff --git a/src/rgw/rgw_arn.h b/src/rgw/rgw_arn.h
new file mode 100644
index 00000000..406a9f42
--- /dev/null
+++ b/src/rgw/rgw_arn.h
@@ -0,0 +1,121 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#pragma once
+#include <string>
+#include <boost/optional.hpp>
+
+class rgw_obj;
+class rgw_bucket;
+
+namespace rgw {
+
+enum struct Partition {
+ aws, aws_cn, aws_us_gov, wildcard
+ // If we wanted our own ARNs for principal type unique to us
+ // (maybe to integrate better with Swift) or for anything else we
+ // provide that doesn't map onto S3, we could add an 'rgw'
+ // partition type.
+};
+
+enum struct Service {
+ apigateway, appstream, artifact, autoscaling, aws_portal, acm,
+ cloudformation, cloudfront, cloudhsm, cloudsearch, cloudtrail,
+ cloudwatch, events, logs, codebuild, codecommit, codedeploy,
+ codepipeline, cognito_idp, cognito_identity, cognito_sync,
+ config, datapipeline, dms, devicefarm, directconnect,
+ ds, dynamodb, ec2, ecr, ecs, ssm, elasticbeanstalk, elasticfilesystem,
+ elasticloadbalancing, elasticmapreduce, elastictranscoder, elasticache,
+ es, gamelift, glacier, health, iam, importexport, inspector, iot,
+ kms, kinesisanalytics, firehose, kinesis, lambda, lightsail,
+ machinelearning, aws_marketplace, aws_marketplace_management,
+ mobileanalytics, mobilehub, opsworks, opsworks_cm, polly,
+ redshift, rds, route53, route53domains, sts, servicecatalog,
+ ses, sns, sqs, s3, swf, sdb, states, storagegateway, support,
+ trustedadvisor, waf, workmail, workspaces, wildcard
+};
+
+/* valid format:
+ * 'arn:partition:service:region:account-id:resource'
+ * The 'resource' part can be further broken down via ARNResource
+*/
+struct ARN {
+ Partition partition;
+ Service service;
+ std::string region;
+ // Once we refit tenant, we should probably use that instead of a
+ // string.
+ std::string account;
+ std::string resource;
+
+ ARN()
+ : partition(Partition::wildcard), service(Service::wildcard) {}
+ ARN(Partition partition, Service service, std::string region,
+ std::string account, std::string resource)
+ : partition(partition), service(service), region(std::move(region)),
+ account(std::move(account)), resource(std::move(resource)) {}
+ ARN(const rgw_obj& o);
+ ARN(const rgw_bucket& b);
+ ARN(const rgw_bucket& b, const std::string& o);
+ ARN(const std::string& resource_name, const std::string& type, const std::string& tenant, bool has_path=false);
+
+ static boost::optional<ARN> parse(const std::string& s,
+ bool wildcard = false);
+ std::string to_string() const;
+
+ // `this` is the pattern
+ bool match(const ARN& candidate) const;
+};
+
+inline std::string to_string(const ARN& a) {
+ return a.to_string();
+}
+
+inline std::ostream& operator <<(std::ostream& m, const ARN& a) {
+ return m << to_string(a);
+}
+
+bool operator ==(const ARN& l, const ARN& r);
+bool operator <(const ARN& l, const ARN& r);
+
+/* valid formats (only resource part):
+ * 'resource'
+ * 'resourcetype/resource'
+ * 'resourcetype/resource/qualifier'
+ * 'resourcetype/resource:qualifier'
+ * 'resourcetype:resource'
+ * 'resourcetype:resource:qualifier'
+ * Note that 'resourceType' cannot be wildcard
+*/
+struct ARNResource {
+ constexpr static const char* const wildcard = "*";
+ std::string resource_type;
+ std::string resource;
+ std::string qualifier;
+
+ ARNResource() : resource_type(""), resource(wildcard), qualifier("") {}
+
+ ARNResource(const std::string& _resource_type, const std::string& _resource, const std::string& _qualifier) :
+ resource_type(std::move(_resource_type)), resource(std::move(_resource)), qualifier(std::move(_qualifier)) {}
+
+ static boost::optional<ARNResource> parse(const std::string& s);
+
+ std::string to_string() const;
+};
+
+inline std::string to_string(const ARNResource& r) {
+ return r.to_string();
+}
+
+} // namespace rgw
+
+namespace std {
+template<>
+struct hash<::rgw::Service> {
+ size_t operator()(const ::rgw::Service& s) const noexcept {
+ // Invoke a default-constructed hash object for int.
+ return hash<int>()(static_cast<int>(s));
+ }
+};
+} // namespace std
+
diff --git a/src/rgw/rgw_asio_client.cc b/src/rgw/rgw_asio_client.cc
new file mode 100644
index 00000000..bea985a7
--- /dev/null
+++ b/src/rgw/rgw_asio_client.cc
@@ -0,0 +1,188 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <boost/algorithm/string/predicate.hpp>
+#include <boost/asio/write.hpp>
+
+#include "rgw_asio_client.h"
+#include "rgw_perf_counters.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+using namespace rgw::asio;
+
+ClientIO::ClientIO(parser_type& parser, bool is_ssl,
+ const endpoint_type& local_endpoint,
+ const endpoint_type& remote_endpoint)
+ : parser(parser), is_ssl(is_ssl),
+ local_endpoint(local_endpoint),
+ remote_endpoint(remote_endpoint),
+ txbuf(*this)
+{
+}
+
+ClientIO::~ClientIO() = default;
+
+int ClientIO::init_env(CephContext *cct)
+{
+ env.init(cct);
+
+ perfcounter->inc(l_rgw_qlen);
+ perfcounter->inc(l_rgw_qactive);
+
+ const auto& request = parser.get();
+ const auto& headers = request;
+ for (auto header = headers.begin(); header != headers.end(); ++header) {
+ const auto& field = header->name(); // enum type for known headers
+ const auto& name = header->name_string();
+ const auto& value = header->value();
+
+ if (field == beast::http::field::content_length) {
+ env.set("CONTENT_LENGTH", value.to_string());
+ continue;
+ }
+ if (field == beast::http::field::content_type) {
+ env.set("CONTENT_TYPE", value.to_string());
+ continue;
+ }
+
+ static const boost::string_ref HTTP_{"HTTP_"};
+
+ char buf[name.size() + HTTP_.size() + 1];
+ auto dest = std::copy(std::begin(HTTP_), std::end(HTTP_), buf);
+ for (auto src = name.begin(); src != name.end(); ++src, ++dest) {
+ if (*src == '-') {
+ *dest = '_';
+ } else {
+ *dest = std::toupper(*src);
+ }
+ }
+ *dest = '\0';
+
+ env.set(buf, value.to_string());
+ }
+
+ int major = request.version() / 10;
+ int minor = request.version() % 10;
+ env.set("HTTP_VERSION", std::to_string(major) + '.' + std::to_string(minor));
+
+ env.set("REQUEST_METHOD", request.method_string().to_string());
+
+ // split uri from query
+ auto url = request.target();
+ auto pos = url.find('?');
+ if (pos != url.npos) {
+ auto query = url.substr(pos + 1);
+ env.set("QUERY_STRING", query.to_string());
+ url = url.substr(0, pos);
+ }
+ env.set("REQUEST_URI", url.to_string());
+ env.set("SCRIPT_URI", url.to_string()); /* FIXME */
+
+ char port_buf[16];
+ snprintf(port_buf, sizeof(port_buf), "%d", local_endpoint.port());
+ env.set("SERVER_PORT", port_buf);
+ if (is_ssl) {
+ env.set("SERVER_PORT_SECURE", port_buf);
+ }
+ env.set("REMOTE_ADDR", remote_endpoint.address().to_string());
+ // TODO: set REMOTE_USER if authenticated
+ return 0;
+}
+
+size_t ClientIO::complete_request()
+{
+ perfcounter->inc(l_rgw_qlen, -1);
+ perfcounter->inc(l_rgw_qactive, -1);
+ return 0;
+}
+
+void ClientIO::flush()
+{
+ txbuf.pubsync();
+}
+
+size_t ClientIO::send_status(int status, const char* status_name)
+{
+ static constexpr size_t STATUS_BUF_SIZE = 128;
+
+ char statusbuf[STATUS_BUF_SIZE];
+ const auto statuslen = snprintf(statusbuf, sizeof(statusbuf),
+ "HTTP/1.1 %d %s\r\n", status, status_name);
+
+ return txbuf.sputn(statusbuf, statuslen);
+}
+
+size_t ClientIO::send_100_continue()
+{
+ const char HTTTP_100_CONTINUE[] = "HTTP/1.1 100 CONTINUE\r\n\r\n";
+ const size_t sent = txbuf.sputn(HTTTP_100_CONTINUE,
+ sizeof(HTTTP_100_CONTINUE) - 1);
+ flush();
+ return sent;
+}
+
+static constexpr size_t TIME_BUF_SIZE = 128;
+static size_t dump_date_header(char (&timestr)[TIME_BUF_SIZE])
+{
+ const time_t gtime = time(nullptr);
+ struct tm result;
+ struct tm const * const tmp = gmtime_r(&gtime, &result);
+ if (tmp == nullptr) {
+ return 0;
+ }
+ return strftime(timestr, sizeof(timestr),
+ "Date: %a, %d %b %Y %H:%M:%S %Z\r\n", tmp);
+}
+
+size_t ClientIO::complete_header()
+{
+ size_t sent = 0;
+
+ char timestr[TIME_BUF_SIZE];
+ if (dump_date_header(timestr)) {
+ sent += txbuf.sputn(timestr, strlen(timestr));
+ }
+
+ if (parser.keep_alive()) {
+ constexpr char CONN_KEEP_ALIVE[] = "Connection: Keep-Alive\r\n";
+ sent += txbuf.sputn(CONN_KEEP_ALIVE, sizeof(CONN_KEEP_ALIVE) - 1);
+ } else {
+ constexpr char CONN_KEEP_CLOSE[] = "Connection: close\r\n";
+ sent += txbuf.sputn(CONN_KEEP_CLOSE, sizeof(CONN_KEEP_CLOSE) - 1);
+ }
+
+ constexpr char HEADER_END[] = "\r\n";
+ sent += txbuf.sputn(HEADER_END, sizeof(HEADER_END) - 1);
+
+ flush();
+ return sent;
+}
+
+size_t ClientIO::send_header(const boost::string_ref& name,
+ const boost::string_ref& value)
+{
+ static constexpr char HEADER_SEP[] = ": ";
+ static constexpr char HEADER_END[] = "\r\n";
+
+ size_t sent = 0;
+
+ sent += txbuf.sputn(name.data(), name.length());
+ sent += txbuf.sputn(HEADER_SEP, sizeof(HEADER_SEP) - 1);
+ sent += txbuf.sputn(value.data(), value.length());
+ sent += txbuf.sputn(HEADER_END, sizeof(HEADER_END) - 1);
+
+ return sent;
+}
+
+size_t ClientIO::send_content_length(uint64_t len)
+{
+ static constexpr size_t CONLEN_BUF_SIZE = 128;
+
+ char sizebuf[CONLEN_BUF_SIZE];
+ const auto sizelen = snprintf(sizebuf, sizeof(sizebuf),
+ "Content-Length: %" PRIu64 "\r\n", len);
+
+ return txbuf.sputn(sizebuf, sizelen);
+}
diff --git a/src/rgw/rgw_asio_client.h b/src/rgw/rgw_asio_client.h
new file mode 100644
index 00000000..e99c3f7c
--- /dev/null
+++ b/src/rgw/rgw_asio_client.h
@@ -0,0 +1,62 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RGW_ASIO_CLIENT_H
+#define RGW_ASIO_CLIENT_H
+
+#include <boost/asio/ip/tcp.hpp>
+#include <boost/beast/core.hpp>
+#include <boost/beast/http.hpp>
+#include "include/ceph_assert.h"
+
+#include "rgw_client_io.h"
+
+namespace rgw {
+namespace asio {
+
+namespace beast = boost::beast;
+using parser_type = beast::http::request_parser<beast::http::buffer_body>;
+
+class ClientIO : public io::RestfulClient,
+ public io::BuffererSink {
+ protected:
+ parser_type& parser;
+ private:
+ const bool is_ssl;
+ using endpoint_type = boost::asio::ip::tcp::endpoint;
+ endpoint_type local_endpoint;
+ endpoint_type remote_endpoint;
+
+ RGWEnv env;
+
+ rgw::io::StaticOutputBufferer<> txbuf;
+
+ public:
+ ClientIO(parser_type& parser, bool is_ssl,
+ const endpoint_type& local_endpoint,
+ const endpoint_type& remote_endpoint);
+ ~ClientIO() override;
+
+ int init_env(CephContext *cct) override;
+ size_t complete_request() override;
+ void flush() override;
+ size_t send_status(int status, const char *status_name) override;
+ size_t send_100_continue() override;
+ size_t send_header(const boost::string_ref& name,
+ const boost::string_ref& value) override;
+ size_t send_content_length(uint64_t len) override;
+ size_t complete_header() override;
+
+ size_t send_body(const char* buf, size_t len) override {
+ return write_data(buf, len);
+ }
+
+ RGWEnv& get_env() noexcept override {
+ return env;
+ }
+};
+
+} // namespace asio
+} // namespace rgw
+
+#endif // RGW_ASIO_CLIENT_H
diff --git a/src/rgw/rgw_asio_frontend.cc b/src/rgw/rgw_asio_frontend.cc
new file mode 100644
index 00000000..10e8d35a
--- /dev/null
+++ b/src/rgw/rgw_asio_frontend.cc
@@ -0,0 +1,834 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <atomic>
+#include <thread>
+#include <vector>
+
+#include <boost/asio.hpp>
+#define BOOST_COROUTINES_NO_DEPRECATION_WARNING
+#include <boost/range/begin.hpp>
+#include <boost/range/end.hpp>
+#include <boost/asio/spawn.hpp>
+#include <boost/intrusive/list.hpp>
+
+#include "common/async/shared_mutex.h"
+#include "common/errno.h"
+#include "common/strtol.h"
+
+#include "rgw_asio_client.h"
+#include "rgw_asio_frontend.h"
+
+#ifdef WITH_RADOSGW_BEAST_OPENSSL
+#include <boost/asio/ssl.hpp>
+#endif
+
+#include "rgw_dmclock_async_scheduler.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+namespace {
+
+using tcp = boost::asio::ip::tcp;
+namespace http = boost::beast::http;
+#ifdef WITH_RADOSGW_BEAST_OPENSSL
+namespace ssl = boost::asio::ssl;
+#endif
+
+using parse_buffer = boost::beast::flat_static_buffer<65536>;
+
+template <typename Stream>
+class StreamIO : public rgw::asio::ClientIO {
+ CephContext* const cct;
+ Stream& stream;
+ parse_buffer& buffer;
+ public:
+ StreamIO(CephContext *cct, Stream& stream, rgw::asio::parser_type& parser,
+ parse_buffer& buffer, bool is_ssl,
+ const tcp::endpoint& local_endpoint,
+ const tcp::endpoint& remote_endpoint)
+ : ClientIO(parser, is_ssl, local_endpoint, remote_endpoint),
+ cct(cct), stream(stream), buffer(buffer)
+ {}
+
+ size_t write_data(const char* buf, size_t len) override {
+ boost::system::error_code ec;
+ auto bytes = boost::asio::write(stream, boost::asio::buffer(buf, len), ec);
+ if (ec) {
+ ldout(cct, 4) << "write_data failed: " << ec.message() << dendl;
+ if (ec==boost::asio::error::broken_pipe) {
+ boost::system::error_code ec_ignored;
+ stream.lowest_layer().shutdown(tcp::socket::shutdown_both, ec_ignored);
+ }
+ throw rgw::io::Exception(ec.value(), std::system_category());
+ }
+ return bytes;
+ }
+
+ size_t recv_body(char* buf, size_t max) override {
+ auto& message = parser.get();
+ auto& body_remaining = message.body();
+ body_remaining.data = buf;
+ body_remaining.size = max;
+
+ while (body_remaining.size && !parser.is_done()) {
+ boost::system::error_code ec;
+ http::read_some(stream, buffer, parser, ec);
+ if (ec == http::error::need_buffer) {
+ break;
+ }
+ if (ec) {
+ ldout(cct, 4) << "failed to read body: " << ec.message() << dendl;
+ throw rgw::io::Exception(ec.value(), std::system_category());
+ }
+ }
+ return max - body_remaining.size;
+ }
+};
+
+// output the http version as a string, ie 'HTTP/1.1'
+struct http_version {
+ unsigned major_ver;
+ unsigned minor_ver;
+ explicit http_version(unsigned version)
+ : major_ver(version / 10), minor_ver(version % 10) {}
+};
+std::ostream& operator<<(std::ostream& out, const http_version& v) {
+ return out << "HTTP/" << v.major_ver << '.' << v.minor_ver;
+}
+
+// log an http header value or '-' if it's missing
+struct log_header {
+ const http::fields& fields;
+ http::field field;
+ std::string_view quote;
+ log_header(const http::fields& fields, http::field field,
+ std::string_view quote = "")
+ : fields(fields), field(field), quote(quote) {}
+};
+std::ostream& operator<<(std::ostream& out, const log_header& h) {
+ auto p = h.fields.find(h.field);
+ if (p == h.fields.end()) {
+ return out << '-';
+ }
+ return out << h.quote << p->value() << h.quote;
+}
+
+using SharedMutex = ceph::async::SharedMutex<boost::asio::io_context::executor_type>;
+
+template <typename Stream>
+void handle_connection(boost::asio::io_context& context,
+ RGWProcessEnv& env, Stream& stream,
+ parse_buffer& buffer, bool is_ssl,
+ SharedMutex& pause_mutex,
+ rgw::dmclock::Scheduler *scheduler,
+ boost::system::error_code& ec,
+ boost::asio::yield_context yield)
+{
+ // limit header to 4k, since we read it all into a single flat_buffer
+ static constexpr size_t header_limit = 4096;
+ // don't impose a limit on the body, since we read it in pieces
+ static constexpr size_t body_limit = std::numeric_limits<size_t>::max();
+
+ auto cct = env.store->ctx();
+
+ // read messages from the stream until eof
+ for (;;) {
+ // configure the parser
+ rgw::asio::parser_type parser;
+ parser.header_limit(header_limit);
+ parser.body_limit(body_limit);
+
+ // parse the header
+ http::async_read_header(stream, buffer, parser, yield[ec]);
+ if (ec == boost::asio::error::connection_reset ||
+ ec == boost::asio::error::bad_descriptor ||
+ ec == boost::asio::error::operation_aborted ||
+#ifdef WITH_RADOSGW_BEAST_OPENSSL
+ ec == ssl::error::stream_truncated ||
+#endif
+ ec == http::error::end_of_stream) {
+ ldout(cct, 20) << "failed to read header: " << ec.message() << dendl;
+ return;
+ }
+ auto& message = parser.get();
+ if (ec) {
+ ldout(cct, 1) << "failed to read header: " << ec.message() << dendl;
+ http::response<http::empty_body> response;
+ response.result(http::status::bad_request);
+ response.version(message.version() == 10 ? 10 : 11);
+ response.prepare_payload();
+ http::async_write(stream, response, yield[ec]);
+ if (ec) {
+ ldout(cct, 5) << "failed to write response: " << ec.message() << dendl;
+ }
+ ldout(cct, 1) << "====== req done http_status=400 ======" << dendl;
+ return;
+ }
+
+ {
+ auto lock = pause_mutex.async_lock_shared(yield[ec]);
+ if (ec == boost::asio::error::operation_aborted) {
+ return;
+ } else if (ec) {
+ ldout(cct, 1) << "failed to lock: " << ec.message() << dendl;
+ return;
+ }
+
+ // process the request
+ RGWRequest req{env.store->get_new_req_id()};
+
+ auto& socket = stream.lowest_layer();
+ const auto& remote_endpoint = socket.remote_endpoint(ec);
+ if (ec) {
+ ldout(cct, 1) << "failed to connect client: " << ec.message() << dendl;
+ return;
+ }
+
+ StreamIO real_client{cct, stream, parser, buffer, is_ssl,
+ socket.local_endpoint(),
+ remote_endpoint};
+
+ auto real_client_io = rgw::io::add_reordering(
+ rgw::io::add_buffering(cct,
+ rgw::io::add_chunking(
+ rgw::io::add_conlen_controlling(
+ &real_client))));
+ RGWRestfulIO client(cct, &real_client_io);
+ auto y = optional_yield{context, yield};
+ int http_ret = 0;
+ process_request(env.store, env.rest, &req, env.uri_prefix,
+ *env.auth_registry, &client, env.olog, y,
+ scheduler, &http_ret);
+
+ if (cct->_conf->subsys.should_gather(dout_subsys, 1)) {
+ // access log line elements begin per Apache Combined Log Format with additions following
+ const auto now = ceph::coarse_real_clock::now();
+ using ceph::operator<<; // for coarse_real_time
+ ldout(cct, 1) << "beast: " << hex << &req << dec << ": "
+ << remote_endpoint.address() << " - - [" << now << "] \""
+ << message.method_string() << ' ' << message.target() << ' '
+ << http_version{message.version()} << "\" " << http_ret << ' '
+ << client.get_bytes_sent() + client.get_bytes_received() << ' '
+ << log_header{message, http::field::referer, "\""} << ' '
+ << log_header{message, http::field::user_agent, "\""} << ' '
+ << log_header{message, http::field::range} << dendl;
+ }
+ }
+
+ if (!parser.keep_alive()) {
+ return;
+ }
+
+ // if we failed before reading the entire message, discard any remaining
+ // bytes before reading the next
+ while (!parser.is_done()) {
+ static std::array<char, 1024> discard_buffer;
+
+ auto& body = parser.get().body();
+ body.size = discard_buffer.size();
+ body.data = discard_buffer.data();
+
+ http::async_read_some(stream, buffer, parser, yield[ec]);
+ if (ec == http::error::need_buffer) {
+ continue;
+ }
+ if (ec == boost::asio::error::connection_reset) {
+ return;
+ }
+ if (ec) {
+ ldout(cct, 5) << "failed to discard unread message: "
+ << ec.message() << dendl;
+ return;
+ }
+ }
+ }
+}
+
+struct Connection : boost::intrusive::list_base_hook<> {
+ tcp::socket& socket;
+ Connection(tcp::socket& socket) : socket(socket) {}
+};
+
+class ConnectionList {
+ using List = boost::intrusive::list<Connection>;
+ List connections;
+ std::mutex mutex;
+
+ void remove(Connection& c) {
+ std::lock_guard lock{mutex};
+ if (c.is_linked()) {
+ connections.erase(List::s_iterator_to(c));
+ }
+ }
+ public:
+ class Guard {
+ ConnectionList *list;
+ Connection *conn;
+ public:
+ Guard(ConnectionList *list, Connection *conn) : list(list), conn(conn) {}
+ ~Guard() { list->remove(*conn); }
+ };
+ [[nodiscard]] Guard add(Connection& conn) {
+ std::lock_guard lock{mutex};
+ connections.push_back(conn);
+ return Guard{this, &conn};
+ }
+ void close(boost::system::error_code& ec) {
+ std::lock_guard lock{mutex};
+ for (auto& conn : connections) {
+ conn.socket.close(ec);
+ }
+ connections.clear();
+ }
+};
+
+namespace dmc = rgw::dmclock;
+class AsioFrontend {
+ RGWProcessEnv env;
+ RGWFrontendConfig* conf;
+ boost::asio::io_context context;
+#ifdef WITH_RADOSGW_BEAST_OPENSSL
+ boost::optional<ssl::context> ssl_context;
+ int init_ssl();
+#endif
+ SharedMutex pause_mutex;
+ std::unique_ptr<rgw::dmclock::Scheduler> scheduler;
+
+ struct Listener {
+ tcp::endpoint endpoint;
+ tcp::acceptor acceptor;
+ tcp::socket socket;
+ bool use_ssl = false;
+ bool use_nodelay = false;
+
+ explicit Listener(boost::asio::io_context& context)
+ : acceptor(context), socket(context) {}
+ };
+ std::vector<Listener> listeners;
+
+ ConnectionList connections;
+
+ // work guard to keep run() threads busy while listeners are paused
+ using Executor = boost::asio::io_context::executor_type;
+ std::optional<boost::asio::executor_work_guard<Executor>> work;
+
+ std::vector<std::thread> threads;
+ std::atomic<bool> going_down{false};
+
+ CephContext* ctx() const { return env.store->ctx(); }
+ std::optional<dmc::ClientCounters> client_counters;
+ std::unique_ptr<dmc::ClientConfig> client_config;
+ void accept(Listener& listener, boost::system::error_code ec);
+
+ public:
+ AsioFrontend(const RGWProcessEnv& env, RGWFrontendConfig* conf,
+ dmc::SchedulerCtx& sched_ctx)
+ : env(env), conf(conf), pause_mutex(context.get_executor())
+ {
+ auto sched_t = dmc::get_scheduler_t(ctx());
+ switch(sched_t){
+ case dmc::scheduler_t::dmclock:
+ scheduler.reset(new dmc::AsyncScheduler(ctx(),
+ context,
+ std::ref(sched_ctx.get_dmc_client_counters()),
+ sched_ctx.get_dmc_client_config(),
+ *sched_ctx.get_dmc_client_config(),
+ dmc::AtLimit::Reject));
+ break;
+ case dmc::scheduler_t::none:
+ lderr(ctx()) << "Got invalid scheduler type for beast, defaulting to throttler" << dendl;
+ [[fallthrough]];
+ case dmc::scheduler_t::throttler:
+ scheduler.reset(new dmc::SimpleThrottler(ctx()));
+
+ }
+ }
+
+ int init();
+ int run();
+ void stop();
+ void join();
+ void pause();
+ void unpause(RGWRados* store, rgw_auth_registry_ptr_t);
+};
+
+unsigned short parse_port(const char *input, boost::system::error_code& ec)
+{
+ char *end = nullptr;
+ auto port = std::strtoul(input, &end, 10);
+ if (port > std::numeric_limits<unsigned short>::max()) {
+ ec.assign(ERANGE, boost::system::system_category());
+ } else if (port == 0 && end == input) {
+ ec.assign(EINVAL, boost::system::system_category());
+ }
+ return port;
+}
+
+tcp::endpoint parse_endpoint(boost::asio::string_view input,
+ unsigned short default_port,
+ boost::system::error_code& ec)
+{
+ tcp::endpoint endpoint;
+
+ if (input.empty()) {
+ ec = boost::asio::error::invalid_argument;
+ return endpoint;
+ }
+
+ if (input[0] == '[') { // ipv6
+ const size_t addr_begin = 1;
+ const size_t addr_end = input.find(']');
+ if (addr_end == input.npos) { // no matching ]
+ ec = boost::asio::error::invalid_argument;
+ return endpoint;
+ }
+ if (addr_end + 1 < input.size()) {
+ // :port must must follow [ipv6]
+ if (input[addr_end + 1] != ':') {
+ ec = boost::asio::error::invalid_argument;
+ return endpoint;
+ } else {
+ auto port_str = input.substr(addr_end + 2);
+ endpoint.port(parse_port(port_str.data(), ec));
+ }
+ } else {
+ endpoint.port(default_port);
+ }
+ auto addr = input.substr(addr_begin, addr_end - addr_begin);
+ endpoint.address(boost::asio::ip::make_address_v6(addr, ec));
+ } else { // ipv4
+ auto colon = input.find(':');
+ if (colon != input.npos) {
+ auto port_str = input.substr(colon + 1);
+ endpoint.port(parse_port(port_str.data(), ec));
+ if (ec) {
+ return endpoint;
+ }
+ } else {
+ endpoint.port(default_port);
+ }
+ auto addr = input.substr(0, colon);
+ endpoint.address(boost::asio::ip::make_address_v4(addr, ec));
+ }
+ return endpoint;
+}
+
+static int drop_privileges(CephContext *ctx)
+{
+ uid_t uid = ctx->get_set_uid();
+ gid_t gid = ctx->get_set_gid();
+ std::string uid_string = ctx->get_set_uid_string();
+ std::string gid_string = ctx->get_set_gid_string();
+ if (gid && setgid(gid) != 0) {
+ int err = errno;
+ ldout(ctx, -1) << "unable to setgid " << gid << ": " << cpp_strerror(err) << dendl;
+ return -err;
+ }
+ if (uid && setuid(uid) != 0) {
+ int err = errno;
+ ldout(ctx, -1) << "unable to setuid " << uid << ": " << cpp_strerror(err) << dendl;
+ return -err;
+ }
+ if (uid && gid) {
+ ldout(ctx, 0) << "set uid:gid to " << uid << ":" << gid
+ << " (" << uid_string << ":" << gid_string << ")" << dendl;
+ }
+ return 0;
+}
+
+int AsioFrontend::init()
+{
+ boost::system::error_code ec;
+ auto& config = conf->get_config_map();
+
+#ifdef WITH_RADOSGW_BEAST_OPENSSL
+ int r = init_ssl();
+ if (r < 0) {
+ return r;
+ }
+#endif
+
+ // parse endpoints
+ auto ports = config.equal_range("port");
+ for (auto i = ports.first; i != ports.second; ++i) {
+ auto port = parse_port(i->second.c_str(), ec);
+ if (ec) {
+ lderr(ctx()) << "failed to parse port=" << i->second << dendl;
+ return -ec.value();
+ }
+ listeners.emplace_back(context);
+ listeners.back().endpoint.port(port);
+
+ listeners.emplace_back(context);
+ listeners.back().endpoint = tcp::endpoint(tcp::v6(), port);
+ }
+
+ auto endpoints = config.equal_range("endpoint");
+ for (auto i = endpoints.first; i != endpoints.second; ++i) {
+ auto endpoint = parse_endpoint(i->second, 80, ec);
+ if (ec) {
+ lderr(ctx()) << "failed to parse endpoint=" << i->second << dendl;
+ return -ec.value();
+ }
+ listeners.emplace_back(context);
+ listeners.back().endpoint = endpoint;
+ }
+ // parse tcp nodelay
+ auto nodelay = config.find("tcp_nodelay");
+ if (nodelay != config.end()) {
+ for (auto& l : listeners) {
+ l.use_nodelay = (nodelay->second == "1");
+ }
+ }
+
+
+ bool socket_bound = false;
+ // start listeners
+ for (auto& l : listeners) {
+ l.acceptor.open(l.endpoint.protocol(), ec);
+ if (ec) {
+ if (ec == boost::asio::error::address_family_not_supported) {
+ ldout(ctx(), 0) << "WARNING: cannot open socket for endpoint=" << l.endpoint
+ << ", " << ec.message() << dendl;
+ continue;
+ }
+
+ lderr(ctx()) << "failed to open socket: " << ec.message() << dendl;
+ return -ec.value();
+ }
+
+ if (l.endpoint.protocol() == tcp::v6()) {
+ l.acceptor.set_option(boost::asio::ip::v6_only(true), ec);
+ if (ec) {
+ lderr(ctx()) << "failed to set v6_only socket option: "
+ << ec.message() << dendl;
+ return -ec.value();
+ }
+ }
+
+ l.acceptor.set_option(tcp::acceptor::reuse_address(true));
+ l.acceptor.bind(l.endpoint, ec);
+ if (ec) {
+ lderr(ctx()) << "failed to bind address " << l.endpoint
+ << ": " << ec.message() << dendl;
+ return -ec.value();
+ }
+
+ auto it = config.find("max_connection_backlog");
+ auto max_connection_backlog = boost::asio::socket_base::max_listen_connections;
+ if (it != config.end()) {
+ string err;
+ max_connection_backlog = strict_strtol(it->second.c_str(), 10, &err);
+ if (!err.empty()) {
+ ldout(ctx(), 0) << "WARNING: invalid value for max_connection_backlog=" << it->second << dendl;
+ max_connection_backlog = boost::asio::socket_base::max_listen_connections;
+ }
+ }
+ l.acceptor.listen(max_connection_backlog);
+ l.acceptor.async_accept(l.socket,
+ [this, &l] (boost::system::error_code ec) {
+ accept(l, ec);
+ });
+
+ ldout(ctx(), 4) << "frontend listening on " << l.endpoint << dendl;
+ socket_bound = true;
+ }
+ if (!socket_bound) {
+ lderr(ctx()) << "Unable to listen at any endpoints" << dendl;
+ return -EINVAL;
+ }
+
+ return drop_privileges(ctx());
+}
+
+#ifdef WITH_RADOSGW_BEAST_OPENSSL
+int AsioFrontend::init_ssl()
+{
+ boost::system::error_code ec;
+ auto& config = conf->get_config_map();
+
+ // ssl configuration
+ auto cert = config.find("ssl_certificate");
+ const bool have_cert = cert != config.end();
+ if (have_cert) {
+ // only initialize the ssl context if it's going to be used
+ ssl_context = boost::in_place(ssl::context::tls);
+ }
+
+ auto key = config.find("ssl_private_key");
+ const bool have_private_key = key != config.end();
+ if (have_private_key) {
+ if (!have_cert) {
+ lderr(ctx()) << "no ssl_certificate configured for ssl_private_key" << dendl;
+ return -EINVAL;
+ }
+ ssl_context->use_private_key_file(key->second, ssl::context::pem, ec);
+ if (ec) {
+ lderr(ctx()) << "failed to add ssl_private_key=" << key->second
+ << ": " << ec.message() << dendl;
+ return -ec.value();
+ }
+ }
+ if (have_cert) {
+ ssl_context->use_certificate_chain_file(cert->second, ec);
+ if (ec) {
+ lderr(ctx()) << "failed to use ssl_certificate=" << cert->second
+ << ": " << ec.message() << dendl;
+ return -ec.value();
+ }
+ if (!have_private_key) {
+ // attempt to use it as a private key if a separate one wasn't provided
+ ssl_context->use_private_key_file(cert->second, ssl::context::pem, ec);
+ if (ec) {
+ lderr(ctx()) << "failed to use ssl_certificate=" << cert->second
+ << " as a private key: " << ec.message() << dendl;
+ return -ec.value();
+ }
+ }
+ }
+
+ // parse ssl endpoints
+ auto ports = config.equal_range("ssl_port");
+ for (auto i = ports.first; i != ports.second; ++i) {
+ if (!have_cert) {
+ lderr(ctx()) << "no ssl_certificate configured for ssl_port" << dendl;
+ return -EINVAL;
+ }
+ auto port = parse_port(i->second.c_str(), ec);
+ if (ec) {
+ lderr(ctx()) << "failed to parse ssl_port=" << i->second << dendl;
+ return -ec.value();
+ }
+ listeners.emplace_back(context);
+ listeners.back().endpoint.port(port);
+ listeners.back().use_ssl = true;
+
+ listeners.emplace_back(context);
+ listeners.back().endpoint = tcp::endpoint(tcp::v6(), port);
+ listeners.back().use_ssl = true;
+ }
+
+ auto endpoints = config.equal_range("ssl_endpoint");
+ for (auto i = endpoints.first; i != endpoints.second; ++i) {
+ if (!have_cert) {
+ lderr(ctx()) << "no ssl_certificate configured for ssl_endpoint" << dendl;
+ return -EINVAL;
+ }
+ auto endpoint = parse_endpoint(i->second, 443, ec);
+ if (ec) {
+ lderr(ctx()) << "failed to parse ssl_endpoint=" << i->second << dendl;
+ return -ec.value();
+ }
+ listeners.emplace_back(context);
+ listeners.back().endpoint = endpoint;
+ listeners.back().use_ssl = true;
+ }
+ return 0;
+}
+#endif // WITH_RADOSGW_BEAST_OPENSSL
+
+void AsioFrontend::accept(Listener& l, boost::system::error_code ec)
+{
+ if (!l.acceptor.is_open()) {
+ return;
+ } else if (ec == boost::asio::error::operation_aborted) {
+ return;
+ } else if (ec) {
+ ldout(ctx(), 1) << "accept failed: " << ec.message() << dendl;
+ return;
+ }
+ auto socket = std::move(l.socket);
+ tcp::no_delay options(l.use_nodelay);
+ socket.set_option(options,ec);
+ l.acceptor.async_accept(l.socket,
+ [this, &l] (boost::system::error_code ec) {
+ accept(l, ec);
+ });
+
+ // spawn a coroutine to handle the connection
+#ifdef WITH_RADOSGW_BEAST_OPENSSL
+ if (l.use_ssl) {
+ boost::asio::spawn(context,
+ [this, s=std::move(socket)] (boost::asio::yield_context yield) mutable {
+ Connection conn{s};
+ auto c = connections.add(conn);
+ // wrap the socket in an ssl stream
+ ssl::stream<tcp::socket&> stream{s, *ssl_context};
+ auto buffer = std::make_unique<parse_buffer>();
+ // do ssl handshake
+ boost::system::error_code ec;
+ auto bytes = stream.async_handshake(ssl::stream_base::server,
+ buffer->data(), yield[ec]);
+ if (ec) {
+ ldout(ctx(), 1) << "ssl handshake failed: " << ec.message() << dendl;
+ return;
+ }
+ buffer->consume(bytes);
+ handle_connection(context, env, stream, *buffer, true, pause_mutex,
+ scheduler.get(), ec, yield);
+ if (!ec) {
+ // ssl shutdown (ignoring errors)
+ stream.async_shutdown(yield[ec]);
+ }
+ s.shutdown(tcp::socket::shutdown_both, ec);
+ });
+ } else {
+#else
+ {
+#endif // WITH_RADOSGW_BEAST_OPENSSL
+ boost::asio::spawn(context,
+ [this, s=std::move(socket)] (boost::asio::yield_context yield) mutable {
+ Connection conn{s};
+ auto c = connections.add(conn);
+ auto buffer = std::make_unique<parse_buffer>();
+ boost::system::error_code ec;
+ handle_connection(context, env, s, *buffer, false, pause_mutex,
+ scheduler.get(), ec, yield);
+ s.shutdown(tcp::socket::shutdown_both, ec);
+ });
+ }
+}
+
+int AsioFrontend::run()
+{
+ auto cct = ctx();
+ const int thread_count = cct->_conf->rgw_thread_pool_size;
+ threads.reserve(thread_count);
+
+ ldout(cct, 4) << "frontend spawning " << thread_count << " threads" << dendl;
+
+ // the worker threads call io_context::run(), which will return when there's
+ // no work left. hold a work guard to keep these threads going until join()
+ work.emplace(boost::asio::make_work_guard(context));
+
+ for (int i = 0; i < thread_count; i++) {
+ threads.emplace_back([=] {
+ // request warnings on synchronous librados calls in this thread
+ is_asio_thread = true;
+ boost::system::error_code ec;
+ context.run(ec);
+ });
+ }
+ return 0;
+}
+
+void AsioFrontend::stop()
+{
+ ldout(ctx(), 4) << "frontend initiating shutdown..." << dendl;
+
+ going_down = true;
+
+ boost::system::error_code ec;
+ // close all listeners
+ for (auto& listener : listeners) {
+ listener.acceptor.close(ec);
+ }
+ // close all connections
+ connections.close(ec);
+ pause_mutex.cancel();
+}
+
+void AsioFrontend::join()
+{
+ if (!going_down) {
+ stop();
+ }
+ work.reset();
+
+ ldout(ctx(), 4) << "frontend joining threads..." << dendl;
+ for (auto& thread : threads) {
+ thread.join();
+ }
+ ldout(ctx(), 4) << "frontend done" << dendl;
+}
+
+void AsioFrontend::pause()
+{
+ ldout(ctx(), 4) << "frontend pausing connections..." << dendl;
+
+ // cancel pending calls to accept(), but don't close the sockets
+ boost::system::error_code ec;
+ for (auto& l : listeners) {
+ l.acceptor.cancel(ec);
+ }
+
+ // pause and wait for outstanding requests to complete
+ pause_mutex.lock(ec);
+
+ if (ec) {
+ ldout(ctx(), 1) << "frontend failed to pause: " << ec.message() << dendl;
+ } else {
+ ldout(ctx(), 4) << "frontend paused" << dendl;
+ }
+}
+
+void AsioFrontend::unpause(RGWRados* const store,
+ rgw_auth_registry_ptr_t auth_registry)
+{
+ env.store = store;
+ env.auth_registry = std::move(auth_registry);
+
+ // unpause to unblock connections
+ pause_mutex.unlock();
+
+ // start accepting connections again
+ for (auto& l : listeners) {
+ l.acceptor.async_accept(l.socket,
+ [this, &l] (boost::system::error_code ec) {
+ accept(l, ec);
+ });
+ }
+
+ ldout(ctx(), 4) << "frontend unpaused" << dendl;
+}
+
+} // anonymous namespace
+
+class RGWAsioFrontend::Impl : public AsioFrontend {
+ public:
+ Impl(const RGWProcessEnv& env, RGWFrontendConfig* conf,
+ rgw::dmclock::SchedulerCtx& sched_ctx)
+ : AsioFrontend(env, conf, sched_ctx) {}
+};
+
+RGWAsioFrontend::RGWAsioFrontend(const RGWProcessEnv& env,
+ RGWFrontendConfig* conf,
+ rgw::dmclock::SchedulerCtx& sched_ctx)
+ : impl(new Impl(env, conf, sched_ctx))
+{
+}
+
+RGWAsioFrontend::~RGWAsioFrontend() = default;
+
+int RGWAsioFrontend::init()
+{
+ return impl->init();
+}
+
+int RGWAsioFrontend::run()
+{
+ return impl->run();
+}
+
+void RGWAsioFrontend::stop()
+{
+ impl->stop();
+}
+
+void RGWAsioFrontend::join()
+{
+ impl->join();
+}
+
+void RGWAsioFrontend::pause_for_new_config()
+{
+ impl->pause();
+}
+
+void RGWAsioFrontend::unpause_with_new_config(
+ RGWRados* const store,
+ rgw_auth_registry_ptr_t auth_registry
+) {
+ impl->unpause(store, std::move(auth_registry));
+}
diff --git a/src/rgw/rgw_asio_frontend.h b/src/rgw/rgw_asio_frontend.h
new file mode 100644
index 00000000..857910bb
--- /dev/null
+++ b/src/rgw/rgw_asio_frontend.h
@@ -0,0 +1,28 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RGW_ASIO_FRONTEND_H
+#define RGW_ASIO_FRONTEND_H
+
+#include <memory>
+#include "rgw_frontend.h"
+
+class RGWAsioFrontend : public RGWFrontend {
+ class Impl;
+ std::unique_ptr<Impl> impl;
+public:
+ RGWAsioFrontend(const RGWProcessEnv& env, RGWFrontendConfig* conf,
+ rgw::dmclock::SchedulerCtx& sched_ctx);
+ ~RGWAsioFrontend() override;
+
+ int init() override;
+ int run() override;
+ void stop() override;
+ void join() override;
+
+ void pause_for_new_config() override;
+ void unpause_with_new_config(RGWRados *store,
+ rgw_auth_registry_ptr_t auth_registry) override;
+};
+
+#endif // RGW_ASIO_FRONTEND_H
diff --git a/src/rgw/rgw_auth.cc b/src/rgw/rgw_auth.cc
new file mode 100644
index 00000000..a6f84b22
--- /dev/null
+++ b/src/rgw/rgw_auth.cc
@@ -0,0 +1,722 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <array>
+
+#include "rgw_common.h"
+#include "rgw_auth.h"
+#include "rgw_quota.h"
+#include "rgw_user.h"
+#include "rgw_http_client.h"
+#include "rgw_keystone.h"
+
+#include "include/str_list.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+
+namespace rgw {
+namespace auth {
+
+std::unique_ptr<rgw::auth::Identity>
+transform_old_authinfo(const req_state* const s)
+{
+ /* This class is not intended for public use. Should be removed altogether
+ * with this function after moving all our APIs to the new authentication
+ * infrastructure. */
+ class DummyIdentityApplier : public rgw::auth::Identity {
+ CephContext* const cct;
+
+ /* For this particular case it's OK to use rgw_user structure to convey
+ * the identity info as this was the policy for doing that before the
+ * new auth. */
+ const rgw_user id;
+ const int perm_mask;
+ const bool is_admin;
+ const uint32_t type;
+ public:
+ DummyIdentityApplier(CephContext* const cct,
+ const rgw_user& auth_id,
+ const int perm_mask,
+ const bool is_admin,
+ const uint32_t type)
+ : cct(cct),
+ id(auth_id),
+ perm_mask(perm_mask),
+ is_admin(is_admin),
+ type(type) {
+ }
+
+ uint32_t get_perms_from_aclspec(const DoutPrefixProvider* dpp, const aclspec_t& aclspec) const override {
+ return rgw_perms_from_aclspec_default_strategy(id, aclspec);
+ }
+
+ bool is_admin_of(const rgw_user& acct_id) const override {
+ return is_admin;
+ }
+
+ bool is_owner_of(const rgw_user& acct_id) const override {
+ return id == acct_id;
+ }
+
+ bool is_identity(const idset_t& ids) const override {
+ for (auto& p : ids) {
+ if (p.is_wildcard()) {
+ return true;
+ } else if (p.is_tenant() && p.get_tenant() == id.tenant) {
+ return true;
+ } else if (p.is_user() &&
+ (p.get_tenant() == id.tenant) &&
+ (p.get_id() == id.id)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ uint32_t get_perm_mask() const override {
+ return perm_mask;
+ }
+
+ uint32_t get_identity_type() const override {
+ return type;
+ }
+
+ string get_acct_name() const override {
+ return {};
+ }
+
+ string get_subuser() const override {
+ return {};
+ }
+
+ void to_str(std::ostream& out) const override {
+ out << "RGWDummyIdentityApplier(auth_id=" << id
+ << ", perm_mask=" << perm_mask
+ << ", is_admin=" << is_admin << ")";
+ }
+ };
+
+ return std::unique_ptr<rgw::auth::Identity>(
+ new DummyIdentityApplier(s->cct,
+ s->user->user_id,
+ s->perm_mask,
+ /* System user has admin permissions by default - it's supposed to pass
+ * through any security check. */
+ s->system_request,
+ s->user->type));
+}
+
+} /* namespace auth */
+} /* namespace rgw */
+
+
+uint32_t rgw_perms_from_aclspec_default_strategy(
+ const rgw_user& uid,
+ const rgw::auth::Identity::aclspec_t& aclspec)
+{
+ dout(5) << "Searching permissions for uid=" << uid << dendl;
+
+ const auto iter = aclspec.find(uid.to_str());
+ if (std::end(aclspec) != iter) {
+ dout(5) << "Found permission: " << iter->second << dendl;
+ return iter->second;
+ }
+
+ dout(5) << "Permissions for user not found" << dendl;
+ return 0;
+}
+
+
+static inline const std::string make_spec_item(const std::string& tenant,
+ const std::string& id)
+{
+ return tenant + ":" + id;
+}
+
+
+static inline std::pair<bool, rgw::auth::Engine::result_t>
+strategy_handle_rejected(rgw::auth::Engine::result_t&& engine_result,
+ const rgw::auth::Strategy::Control policy,
+ rgw::auth::Engine::result_t&& strategy_result)
+{
+ using Control = rgw::auth::Strategy::Control;
+ switch (policy) {
+ case Control::REQUISITE:
+ /* Don't try next. */
+ return std::make_pair(false, std::move(engine_result));
+
+ case Control::SUFFICIENT:
+ /* Don't try next. */
+ return std::make_pair(false, std::move(engine_result));
+
+ case Control::FALLBACK:
+ /* Don't try next. */
+ return std::make_pair(false, std::move(strategy_result));
+
+ default:
+ /* Huh, memory corruption? */
+ ceph_abort();
+ }
+}
+
+static inline std::pair<bool, rgw::auth::Engine::result_t>
+strategy_handle_denied(rgw::auth::Engine::result_t&& engine_result,
+ const rgw::auth::Strategy::Control policy,
+ rgw::auth::Engine::result_t&& strategy_result)
+{
+ using Control = rgw::auth::Strategy::Control;
+ switch (policy) {
+ case Control::REQUISITE:
+ /* Don't try next. */
+ return std::make_pair(false, std::move(engine_result));
+
+ case Control::SUFFICIENT:
+ /* Just try next. */
+ return std::make_pair(true, std::move(engine_result));
+
+ case Control::FALLBACK:
+ return std::make_pair(true, std::move(strategy_result));
+
+ default:
+ /* Huh, memory corruption? */
+ ceph_abort();
+ }
+}
+
+static inline std::pair<bool, rgw::auth::Engine::result_t>
+strategy_handle_granted(rgw::auth::Engine::result_t&& engine_result,
+ const rgw::auth::Strategy::Control policy,
+ rgw::auth::Engine::result_t&& strategy_result)
+{
+ using Control = rgw::auth::Strategy::Control;
+ switch (policy) {
+ case Control::REQUISITE:
+ /* Try next. */
+ return std::make_pair(true, std::move(engine_result));
+
+ case Control::SUFFICIENT:
+ /* Don't try next. */
+ return std::make_pair(false, std::move(engine_result));
+
+ case Control::FALLBACK:
+ /* Don't try next. */
+ return std::make_pair(false, std::move(engine_result));
+
+ default:
+ /* Huh, memory corruption? */
+ ceph_abort();
+ }
+}
+
+rgw::auth::Engine::result_t
+rgw::auth::Strategy::authenticate(const DoutPrefixProvider* dpp, const req_state* const s) const
+{
+ result_t strategy_result = result_t::deny();
+
+ for (const stack_item_t& kv : auth_stack) {
+ const rgw::auth::Engine& engine = kv.first;
+ const auto& policy = kv.second;
+
+ ldpp_dout(dpp, 20) << get_name() << ": trying " << engine.get_name() << dendl;
+
+ result_t engine_result = result_t::deny();
+ try {
+ engine_result = engine.authenticate(dpp, s);
+ } catch (const int err) {
+ engine_result = result_t::deny(err);
+ }
+
+ bool try_next = true;
+ switch (engine_result.get_status()) {
+ case result_t::Status::REJECTED: {
+ ldpp_dout(dpp, 20) << engine.get_name() << " rejected with reason="
+ << engine_result.get_reason() << dendl;
+
+ std::tie(try_next, strategy_result) = \
+ strategy_handle_rejected(std::move(engine_result), policy,
+ std::move(strategy_result));
+ break;
+ }
+ case result_t::Status::DENIED: {
+ ldpp_dout(dpp, 20) << engine.get_name() << " denied with reason="
+ << engine_result.get_reason() << dendl;
+
+ std::tie(try_next, strategy_result) = \
+ strategy_handle_denied(std::move(engine_result), policy,
+ std::move(strategy_result));
+ break;
+ }
+ case result_t::Status::GRANTED: {
+ ldpp_dout(dpp, 20) << engine.get_name() << " granted access" << dendl;
+
+ std::tie(try_next, strategy_result) = \
+ strategy_handle_granted(std::move(engine_result), policy,
+ std::move(strategy_result));
+ break;
+ }
+ default: {
+ ceph_abort();
+ }
+ }
+
+ if (! try_next) {
+ break;
+ }
+ }
+
+ return strategy_result;
+}
+
+int
+rgw::auth::Strategy::apply(const DoutPrefixProvider *dpp, const rgw::auth::Strategy& auth_strategy,
+ req_state* const s) noexcept
+{
+ try {
+ auto result = auth_strategy.authenticate(dpp, s);
+ if (result.get_status() != decltype(result)::Status::GRANTED) {
+ /* Access denied is acknowledged by returning a std::unique_ptr with
+ * nullptr inside. */
+ ldpp_dout(dpp, 5) << "Failed the auth strategy, reason="
+ << result.get_reason() << dendl;
+ return result.get_reason();
+ }
+
+ try {
+ rgw::auth::IdentityApplier::aplptr_t applier = result.get_applier();
+ rgw::auth::Completer::cmplptr_t completer = result.get_completer();
+
+ /* Account used by a given RGWOp is decoupled from identity employed
+ * in the authorization phase (RGWOp::verify_permissions). */
+ applier->load_acct_info(dpp, *s->user);
+ s->perm_mask = applier->get_perm_mask();
+
+ /* This is the single place where we pass req_state as a pointer
+ * to non-const and thus its modification is allowed. In the time
+ * of writing only RGWTempURLEngine needed that feature. */
+ applier->modify_request_state(dpp, s);
+ if (completer) {
+ completer->modify_request_state(dpp, s);
+ }
+
+ s->auth.identity = std::move(applier);
+ s->auth.completer = std::move(completer);
+
+ return 0;
+ } catch (const int err) {
+ ldpp_dout(dpp, 5) << "applier throwed err=" << err << dendl;
+ return err;
+ }
+ } catch (const int err) {
+ ldpp_dout(dpp, 5) << "auth engine throwed err=" << err << dendl;
+ return err;
+ }
+
+ /* We never should be here. */
+ return -EPERM;
+}
+
+void
+rgw::auth::Strategy::add_engine(const Control ctrl_flag,
+ const Engine& engine) noexcept
+{
+ auth_stack.push_back(std::make_pair(std::cref(engine), ctrl_flag));
+}
+
+void rgw::auth::WebIdentityApplier::to_str(std::ostream& out) const
+{
+ out << "rgw::auth::WebIdentityApplier(sub =" << token_claims.sub
+ << ", user_name=" << token_claims.user_name
+ << ", aud =" << token_claims.aud
+ << ", provider_id =" << token_claims.iss << ")";
+}
+
+string rgw::auth::WebIdentityApplier::get_idp_url() const
+{
+ string idp_url = token_claims.iss;
+ auto pos = idp_url.find("http://");
+ if (pos == std::string::npos) {
+ pos = idp_url.find("https://");
+ if (pos != std::string::npos) {
+ idp_url.erase(pos, 8);
+ }
+ } else {
+ idp_url.erase(pos, 7);
+ }
+ return idp_url;
+}
+
+void rgw::auth::WebIdentityApplier::modify_request_state(const DoutPrefixProvider *dpp, req_state* s) const
+{
+ s->info.args.append("sub", token_claims.sub);
+ s->info.args.append("aud", token_claims.aud);
+ s->info.args.append("provider_id", token_claims.iss);
+
+ string idp_url = get_idp_url();
+ string condition = idp_url + ":app_id";
+ s->env.emplace(condition, token_claims.aud);
+}
+
+bool rgw::auth::WebIdentityApplier::is_identity(const idset_t& ids) const
+{
+ if (ids.size() > 1) {
+ return false;
+ }
+
+ for (auto id : ids) {
+ string idp_url = get_idp_url();
+ if (id.is_oidc_provider() && id.get_idp_url() == idp_url) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/* rgw::auth::RemoteAuthApplier */
+uint32_t rgw::auth::RemoteApplier::get_perms_from_aclspec(const DoutPrefixProvider* dpp, const aclspec_t& aclspec) const
+{
+ uint32_t perm = 0;
+
+ /* For backward compatibility with ACLOwner. */
+ perm |= rgw_perms_from_aclspec_default_strategy(info.acct_user,
+ aclspec);
+
+ /* We also need to cover cases where rgw_keystone_implicit_tenants
+ * was enabled. */
+ if (info.acct_user.tenant.empty()) {
+ const rgw_user tenanted_acct_user(info.acct_user.id, info.acct_user.id);
+
+ perm |= rgw_perms_from_aclspec_default_strategy(tenanted_acct_user,
+ aclspec);
+ }
+
+ /* Now it's a time for invoking additional strategy that was supplied by
+ * a specific auth engine. */
+ if (extra_acl_strategy) {
+ perm |= extra_acl_strategy(aclspec);
+ }
+
+ ldpp_dout(dpp, 20) << "from ACL got perm=" << perm << dendl;
+ return perm;
+}
+
+bool rgw::auth::RemoteApplier::is_admin_of(const rgw_user& uid) const
+{
+ return info.is_admin;
+}
+
+bool rgw::auth::RemoteApplier::is_owner_of(const rgw_user& uid) const
+{
+ if (info.acct_user.tenant.empty()) {
+ const rgw_user tenanted_acct_user(info.acct_user.id, info.acct_user.id);
+
+ if (tenanted_acct_user == uid) {
+ return true;
+ }
+ }
+
+ return info.acct_user == uid;
+}
+
+bool rgw::auth::RemoteApplier::is_identity(const idset_t& ids) const {
+ for (auto& id : ids) {
+ if (id.is_wildcard()) {
+ return true;
+
+ // We also need to cover cases where rgw_keystone_implicit_tenants
+ // was enabled. */
+ } else if (id.is_tenant() &&
+ (info.acct_user.tenant.empty() ?
+ info.acct_user.id :
+ info.acct_user.tenant) == id.get_tenant()) {
+ return true;
+ } else if (id.is_user() &&
+ info.acct_user.id == id.get_id() &&
+ (info.acct_user.tenant.empty() ?
+ info.acct_user.id :
+ info.acct_user.tenant) == id.get_tenant()) {
+ return true;
+ }
+ }
+ return false;
+}
+
+void rgw::auth::RemoteApplier::to_str(std::ostream& out) const
+{
+ out << "rgw::auth::RemoteApplier(acct_user=" << info.acct_user
+ << ", acct_name=" << info.acct_name
+ << ", perm_mask=" << info.perm_mask
+ << ", is_admin=" << info.is_admin << ")";
+}
+
+void rgw::auth::ImplicitTenants::recompute_value(const ConfigProxy& c)
+{
+ std::string s = c.get_val<std::string>("rgw_keystone_implicit_tenants");
+ int v = 0;
+ if (boost::iequals(s, "both")
+ || boost::iequals(s, "true")
+ || boost::iequals(s, "1")) {
+ v = IMPLICIT_TENANTS_S3|IMPLICIT_TENANTS_SWIFT;
+ } else if (boost::iequals(s, "0")
+ || boost::iequals(s, "none")
+ || boost::iequals(s, "false")) {
+ v = 0;
+ } else if (boost::iequals(s, "s3")) {
+ v = IMPLICIT_TENANTS_S3;
+ } else if (boost::iequals(s, "swift")) {
+ v = IMPLICIT_TENANTS_SWIFT;
+ } else { /* "" (and anything else) */
+ v = IMPLICIT_TENANTS_BAD;
+ // assert(0);
+ }
+ saved = v;
+}
+
+const char **rgw::auth::ImplicitTenants::get_tracked_conf_keys() const
+{
+ static const char *keys[] = {
+ "rgw_keystone_implicit_tenants",
+ nullptr };
+ return keys;
+}
+
+void rgw::auth::ImplicitTenants::handle_conf_change(const ConfigProxy& c,
+ const std::set <std::string> &changed)
+{
+ if (changed.count("rgw_keystone_implicit_tenants")) {
+ recompute_value(c);
+ }
+}
+
+void rgw::auth::RemoteApplier::create_account(const DoutPrefixProvider* dpp,
+ const rgw_user& acct_user,
+ bool implicit_tenant,
+ RGWUserInfo& user_info) const /* out */
+{
+ rgw_user new_acct_user = acct_user;
+
+ if (info.acct_type) {
+ //ldap/keystone for s3 users
+ user_info.type = info.acct_type;
+ }
+
+ /* An upper layer may enforce creating new accounts within their own
+ * tenants. */
+ if (new_acct_user.tenant.empty() && implicit_tenant) {
+ new_acct_user.tenant = new_acct_user.id;
+ }
+
+ user_info.user_id = new_acct_user;
+ user_info.display_name = info.acct_name;
+
+ user_info.max_buckets =
+ cct->_conf.get_val<int64_t>("rgw_user_max_buckets");
+ rgw_apply_default_bucket_quota(user_info.bucket_quota, cct->_conf);
+ rgw_apply_default_user_quota(user_info.user_quota, cct->_conf);
+
+ int ret = rgw_store_user_info(store, user_info, nullptr, nullptr,
+ real_time(), true);
+ if (ret < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: failed to store new user info: user="
+ << user_info.user_id << " ret=" << ret << dendl;
+ throw ret;
+ }
+}
+
+/* TODO(rzarzynski): we need to handle display_name changes. */
+void rgw::auth::RemoteApplier::load_acct_info(const DoutPrefixProvider* dpp, RGWUserInfo& user_info) const /* out */
+{
+ /* It's supposed that RGWRemoteAuthApplier tries to load account info
+ * that belongs to the authenticated identity. Another policy may be
+ * applied by using a RGWThirdPartyAccountAuthApplier decorator. */
+ const rgw_user& acct_user = info.acct_user;
+ auto implicit_value = implicit_tenant_context.get_value();
+ bool implicit_tenant = implicit_value.implicit_tenants_for_(implicit_tenant_bit);
+ bool split_mode = implicit_value.is_split_mode();
+
+ /* Normally, empty "tenant" field of acct_user means the authenticated
+ * identity has the legacy, global tenant. However, due to inclusion
+ * of multi-tenancy, we got some special compatibility kludge for remote
+ * backends like Keystone.
+ * If the global tenant is the requested one, we try the same tenant as
+ * the user name first. If that RGWUserInfo exists, we use it. This way,
+ * migrated OpenStack users can get their namespaced containers and nobody's
+ * the wiser.
+ * If that fails, we look up in the requested (possibly empty) tenant.
+ * If that fails too, we create the account within the global or separated
+ * namespace depending on rgw_keystone_implicit_tenants.
+ * For compatibility with previous versions of ceph, it is possible
+ * to enable implicit_tenants for only s3 or only swift.
+ * in this mode ("split_mode"), we must constrain the id lookups to
+ * only use the identifier space that would be used if the id were
+ * to be created. */
+
+ if (split_mode && !implicit_tenant)
+ ; /* suppress lookup for id used by "other" protocol */
+ else if (acct_user.tenant.empty()) {
+ const rgw_user tenanted_uid(acct_user.id, acct_user.id);
+
+ if (rgw_get_user_info_by_uid(store, tenanted_uid, user_info) >= 0) {
+ /* Succeeded. */
+ return;
+ }
+ }
+
+ if (split_mode && implicit_tenant)
+ ; /* suppress lookup for id used by "other" protocol */
+ else if (rgw_get_user_info_by_uid(store, acct_user, user_info) >= 0) {
+ /* Succeeded. */
+ return;
+ }
+
+ ldout(cct, 0) << "NOTICE: couldn't map swift user " << acct_user << dendl;
+ create_account(dpp, acct_user, implicit_tenant, user_info);
+
+ /* Succeeded if we are here (create_account() hasn't throwed). */
+}
+
+/* rgw::auth::LocalApplier */
+/* static declaration */
+const std::string rgw::auth::LocalApplier::NO_SUBUSER;
+
+uint32_t rgw::auth::LocalApplier::get_perms_from_aclspec(const DoutPrefixProvider* dpp, const aclspec_t& aclspec) const
+{
+ return rgw_perms_from_aclspec_default_strategy(user_info.user_id, aclspec);
+}
+
+bool rgw::auth::LocalApplier::is_admin_of(const rgw_user& uid) const
+{
+ return user_info.admin || user_info.system;
+}
+
+bool rgw::auth::LocalApplier::is_owner_of(const rgw_user& uid) const
+{
+ return uid == user_info.user_id;
+}
+
+bool rgw::auth::LocalApplier::is_identity(const idset_t& ids) const {
+ for (auto& id : ids) {
+ if (id.is_wildcard()) {
+ return true;
+ } else if (id.is_tenant() &&
+ id.get_tenant() == user_info.user_id.tenant) {
+ return true;
+ } else if (id.is_user() &&
+ (id.get_tenant() == user_info.user_id.tenant)) {
+ if (id.get_id() == user_info.user_id.id) {
+ return true;
+ }
+ std::string wildcard_subuser = user_info.user_id.id;
+ wildcard_subuser.append(":*");
+ if (wildcard_subuser == id.get_id()) {
+ return true;
+ } else if (subuser != NO_SUBUSER) {
+ std::string user = user_info.user_id.id;
+ user.append(":");
+ user.append(subuser);
+ if (user == id.get_id()) {
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+}
+
+void rgw::auth::LocalApplier::to_str(std::ostream& out) const {
+ out << "rgw::auth::LocalApplier(acct_user=" << user_info.user_id
+ << ", acct_name=" << user_info.display_name
+ << ", subuser=" << subuser
+ << ", perm_mask=" << get_perm_mask()
+ << ", is_admin=" << static_cast<bool>(user_info.admin) << ")";
+}
+
+uint32_t rgw::auth::LocalApplier::get_perm_mask(const std::string& subuser_name,
+ const RGWUserInfo &uinfo) const
+{
+ if (! subuser_name.empty() && subuser_name != NO_SUBUSER) {
+ const auto iter = uinfo.subusers.find(subuser_name);
+
+ if (iter != std::end(uinfo.subusers)) {
+ return iter->second.perm_mask;
+ } else {
+ /* Subuser specified but not found. */
+ return RGW_PERM_NONE;
+ }
+ } else {
+ /* Due to backward compatibility. */
+ return RGW_PERM_FULL_CONTROL;
+ }
+}
+
+void rgw::auth::LocalApplier::load_acct_info(const DoutPrefixProvider* dpp, RGWUserInfo& user_info) const /* out */
+{
+ /* Load the account that belongs to the authenticated identity. An extra call
+ * to RADOS may be safely skipped in this case. */
+ user_info = this->user_info;
+}
+
+void rgw::auth::RoleApplier::to_str(std::ostream& out) const {
+ out << "rgw::auth::LocalApplier(role name =" << role_name;
+ for (auto policy : role_policies) {
+ out << ", role policy =" << policy;
+ }
+ out << ")";
+}
+
+bool rgw::auth::RoleApplier::is_identity(const idset_t& ids) const {
+ for (auto& p : ids) {
+ string name;
+ string tenant = p.get_tenant();
+ if (tenant.empty()) {
+ name = p.get_id();
+ } else {
+ name = tenant + "$" + p.get_id();
+ }
+ if (p.is_wildcard()) {
+ return true;
+ } else if (p.is_role() && name == role_name) {
+ return true;
+ }
+ }
+ return false;
+}
+
+void rgw::auth::RoleApplier::load_acct_info(const DoutPrefixProvider* dpp, RGWUserInfo& user_info) const /* out */
+{
+ /* Load the user id */
+ user_info.user_id = this->user_id;
+}
+
+void rgw::auth::RoleApplier::modify_request_state(const DoutPrefixProvider *dpp, req_state* s) const
+{
+ for (auto it : role_policies) {
+ try {
+ bufferlist bl = bufferlist::static_from_string(it);
+ const rgw::IAM::Policy p(s->cct, s->user->user_id.tenant, bl);
+ s->iam_user_policies.push_back(std::move(p));
+ } catch (rgw::IAM::PolicyParseException& e) {
+ //Control shouldn't reach here as the policy has already been
+ //verified earlier
+ ldpp_dout(dpp, 20) << "failed to parse policy: " << e.what() << dendl;
+ }
+ }
+}
+
+rgw::auth::Engine::result_t
+rgw::auth::AnonymousEngine::authenticate(const DoutPrefixProvider* dpp, const req_state* const s) const
+{
+ if (! is_applicable(s)) {
+ return result_t::deny(-EPERM);
+ } else {
+ RGWUserInfo user_info;
+ rgw_get_anon_user(user_info);
+
+ auto apl = \
+ apl_factory->create_apl_local(cct, s, user_info,
+ rgw::auth::LocalApplier::NO_SUBUSER,
+ boost::none);
+ return result_t::grant(std::move(apl));
+ }
+}
diff --git a/src/rgw/rgw_auth.h b/src/rgw/rgw_auth.h
new file mode 100644
index 00000000..be7a102a
--- /dev/null
+++ b/src/rgw/rgw_auth.h
@@ -0,0 +1,696 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+
+#ifndef CEPH_RGW_AUTH_H
+#define CEPH_RGW_AUTH_H
+
+#include <functional>
+#include <ostream>
+#include <type_traits>
+#include <system_error>
+#include <utility>
+
+#include "rgw_common.h"
+#include "rgw_keystone.h"
+#include "rgw_web_idp.h"
+
+#define RGW_USER_ANON_ID "anonymous"
+
+namespace rgw {
+namespace auth {
+
+using Exception = std::system_error;
+
+
+/* Load information about identity that will be used by RGWOp to authorize
+ * any operation that comes from an authenticated user. */
+class Identity {
+public:
+ typedef std::map<std::string, int> aclspec_t;
+ using idset_t = boost::container::flat_set<Principal>;
+
+ virtual ~Identity() = default;
+
+ /* Translate the ACL provided in @aclspec into concrete permission set that
+ * can be used during the authorization phase (RGWOp::verify_permission).
+ * On error throws rgw::auth::Exception storing the reason.
+ *
+ * NOTE: an implementation is responsible for giving the real semantic to
+ * the items in @aclspec. That is, their meaning may depend on particular
+ * applier that is being used. */
+ virtual uint32_t get_perms_from_aclspec(const DoutPrefixProvider* dpp, const aclspec_t& aclspec) const = 0;
+
+ /* Verify whether a given identity *can be treated as* an admin of rgw_user
+ * (account in Swift's terminology) specified in @uid. On error throws
+ * rgw::auth::Exception storing the reason. */
+ virtual bool is_admin_of(const rgw_user& uid) const = 0;
+
+ /* Verify whether a given identity *is* the owner of the rgw_user (account
+ * in the Swift's terminology) specified in @uid. On internal error throws
+ * rgw::auth::Exception storing the reason. */
+ virtual bool is_owner_of(const rgw_user& uid) const = 0;
+
+ /* Return the permission mask that is used to narrow down the set of
+ * operations allowed for a given identity. This method reflects the idea
+ * of subuser tied to RGWUserInfo. On error throws rgw::auth::Exception
+ * with the reason. */
+ virtual uint32_t get_perm_mask() const = 0;
+
+ virtual bool is_anonymous() const {
+ /* If the identity owns the anonymous account (rgw_user), it's considered
+ * the anonymous identity. On error throws rgw::auth::Exception storing
+ * the reason. */
+ return is_owner_of(rgw_user(RGW_USER_ANON_ID));
+ }
+
+ virtual void to_str(std::ostream& out) const = 0;
+
+ /* Verify whether a given identity corresponds to an identity in the
+ provided set */
+ virtual bool is_identity(const idset_t& ids) const = 0;
+
+ /* Identity Type: RGW/ LDAP/ Keystone */
+ virtual uint32_t get_identity_type() const = 0;
+
+ /* Name of Account */
+ virtual string get_acct_name() const = 0;
+
+ /* Subuser of Account */
+ virtual string get_subuser() const = 0;
+};
+
+inline std::ostream& operator<<(std::ostream& out,
+ const rgw::auth::Identity& id) {
+ id.to_str(out);
+ return out;
+}
+
+
+std::unique_ptr<Identity> transform_old_authinfo(const req_state* const s);
+
+
+/* Interface for classes applying changes to request state/RADOS store
+ * imposed by a particular rgw::auth::Engine.
+ *
+ * In contrast to rgw::auth::Engine, implementations of this interface
+ * are allowed to handle req_state or RGWRados in the read-write manner.
+ *
+ * It's expected that most (if not all) of implementations will also
+ * conform to rgw::auth::Identity interface to provide authorization
+ * policy (ACLs, account's ownership and entitlement). */
+class IdentityApplier : public Identity {
+public:
+ typedef std::unique_ptr<IdentityApplier> aplptr_t;
+
+ virtual ~IdentityApplier() {};
+
+ /* Fill provided RGWUserInfo with information about the account that
+ * RGWOp will operate on. Errors are handled solely through exceptions.
+ *
+ * XXX: be aware that the "account" term refers to rgw_user. The naming
+ * is legacy. */
+ virtual void load_acct_info(const DoutPrefixProvider* dpp, RGWUserInfo& user_info) const = 0; /* out */
+
+ /* Apply any changes to request state. This method will be most useful for
+ * TempURL of Swift API. */
+ virtual void modify_request_state(const DoutPrefixProvider* dpp, req_state* s) const {} /* in/out */
+};
+
+
+/* Interface class for completing the two-step authentication process.
+ * Completer provides the second step - the complete() method that should
+ * be called after Engine::authenticate() but before *committing* results
+ * of an RGWOp (or sending a response in the case of non-mutating ops).
+ *
+ * The motivation driving the interface is to address those authentication
+ * schemas that require message integrity verification *without* in-memory
+ * data buffering. Typical examples are AWS Auth v4 and the auth mechanism
+ * of browser uploads facilities both in S3 and Swift APIs (see RGWPostObj).
+ * The workflow of request from the authentication point-of-view does look
+ * like following one:
+ * A. authenticate (Engine::authenticate),
+ * B. authorize (see RGWOp::verify_permissions),
+ * C. execute-prepare (init potential data modifications),
+ * D. authenticate-complete - (Completer::complete),
+ * E. execute-commit - commit the modifications from point C. */
+class Completer {
+public:
+ /* It's expected that Completers would tend to implement many interfaces
+ * and be used not only in req_state::auth::completer. Ref counting their
+ * instances would be helpful. */
+ typedef std::shared_ptr<Completer> cmplptr_t;
+
+ virtual ~Completer() = default;
+
+ /* Complete the authentication process. Return boolean indicating whether
+ * the completion succeeded. On error throws rgw::auth::Exception storing
+ * the reason. */
+ virtual bool complete() = 0;
+
+ /* Apply any changes to request state. The initial use case was injecting
+ * the AWSv4 filter over rgw::io::RestfulClient in req_state. */
+ virtual void modify_request_state(const DoutPrefixProvider* dpp, req_state* s) = 0; /* in/out */
+};
+
+
+/* Interface class for authentication backends (auth engines) in RadosGW.
+ *
+ * An engine is supposed only to authenticate (not authorize!) requests
+ * basing on their req_state and - if access has been granted - provide
+ * an upper layer with:
+ * - rgw::auth::IdentityApplier to commit all changes to the request state as
+ * well as to the RADOS store (creating an account, synchronizing
+ * user-related information with external databases and so on).
+ * - rgw::auth::Completer (optionally) to finish the authentication
+ * of the request. Typical use case is verifying message integrity
+ * in AWS Auth v4 and browser uploads (RGWPostObj).
+ *
+ * Both of them are supposed to be wrapped in Engine::AuthResult.
+ *
+ * The authentication process consists of two steps:
+ * - Engine::authenticate() which should be called before *initiating*
+ * any modifications to RADOS store that are related to an operation
+ * a client wants to perform (RGWOp::execute).
+ * - Completer::complete() supposed to be called, if completer has been
+ * returned, after the authenticate() step but before *committing*
+ * those modifications or sending a response (RGWOp::complete).
+ *
+ * An engine outlives both Applier and Completer. It's intended to live
+ * since RadosGW's initialization and handle multiple requests till
+ * a reconfiguration.
+ *
+ * Auth engine MUST NOT make any changes to req_state nor RADOS store.
+ * This is solely an Applier's responsibility!
+ *
+ * Separation between authentication and global state modification has
+ * been introduced because many auth engines are orthogonal to appliers
+ * and thus they can be decoupled. Additional motivation is to clearly
+ * distinguish all portions of code modifying data structures. */
+class Engine {
+public:
+ virtual ~Engine() = default;
+
+ class AuthResult {
+ struct rejection_mark_t {};
+ bool is_rejected = false;
+ int reason = 0;
+
+ std::pair<IdentityApplier::aplptr_t, Completer::cmplptr_t> result_pair;
+
+ explicit AuthResult(const int reason)
+ : reason(reason) {
+ }
+
+ AuthResult(rejection_mark_t&&, const int reason)
+ : is_rejected(true),
+ reason(reason) {
+ }
+
+ /* Allow only the reasonable combintations - returning just Completer
+ * without accompanying IdentityApplier is strictly prohibited! */
+ explicit AuthResult(IdentityApplier::aplptr_t&& applier)
+ : result_pair(std::move(applier), nullptr) {
+ }
+
+ AuthResult(IdentityApplier::aplptr_t&& applier,
+ Completer::cmplptr_t&& completer)
+ : result_pair(std::move(applier), std::move(completer)) {
+ }
+
+ public:
+ enum class Status {
+ /* Engine doesn't grant the access but also doesn't reject it. */
+ DENIED,
+
+ /* Engine successfully authenicated requester. */
+ GRANTED,
+
+ /* Engine strictly indicates that a request should be rejected
+ * without trying any further engine. */
+ REJECTED
+ };
+
+ Status get_status() const {
+ if (is_rejected) {
+ return Status::REJECTED;
+ } else if (! result_pair.first) {
+ return Status::DENIED;
+ } else {
+ return Status::GRANTED;
+ }
+ }
+
+ int get_reason() const {
+ return reason;
+ }
+
+ IdentityApplier::aplptr_t get_applier() {
+ return std::move(result_pair.first);
+ }
+
+ Completer::cmplptr_t&& get_completer() {
+ return std::move(result_pair.second);
+ }
+
+ static AuthResult reject(const int reason = -EACCES) {
+ return AuthResult(rejection_mark_t(), reason);
+ }
+
+ static AuthResult deny(const int reason = -EACCES) {
+ return AuthResult(reason);
+ }
+
+ static AuthResult grant(IdentityApplier::aplptr_t&& applier) {
+ return AuthResult(std::move(applier));
+ }
+
+ static AuthResult grant(IdentityApplier::aplptr_t&& applier,
+ Completer::cmplptr_t&& completer) {
+ return AuthResult(std::move(applier), std::move(completer));
+ }
+ };
+
+ using result_t = AuthResult;
+
+ /* Get name of the auth engine. */
+ virtual const char* get_name() const noexcept = 0;
+
+ /* Throwing method for identity verification. When the check is positive
+ * an implementation should return Engine::result_t containing:
+ * - a non-null pointer to an object conforming the Applier interface.
+ * Otherwise, the authentication is treated as failed.
+ * - a (potentially null) pointer to an object conforming the Completer
+ * interface.
+ *
+ * On error throws rgw::auth::Exception containing the reason. */
+ virtual result_t authenticate(const DoutPrefixProvider* dpp, const req_state* s) const = 0;
+};
+
+
+/* Interface for extracting a token basing from data carried by req_state. */
+class TokenExtractor {
+public:
+ virtual ~TokenExtractor() = default;
+ virtual std::string get_token(const req_state* s) const = 0;
+};
+
+
+/* Abstract class for stacking sub-engines to expose them as a single
+ * Engine. It is responsible for ordering its sub-engines and managing
+ * fall-backs between them. Derivatee is supposed to encapsulate engine
+ * instances and add them using the add_engine() method in the order it
+ * wants to be tried during the call to authenticate().
+ *
+ * Each new Strategy should be exposed to StrategyRegistry for handling
+ * the dynamic reconfiguration. */
+class Strategy : public Engine {
+public:
+ /* Specifiers controlling what happens when an associated engine fails.
+ * The names and semantic has been borrowed mostly from libpam. */
+ enum class Control {
+ /* Failure of an engine injected with the REQUISITE specifier aborts
+ * the strategy's authentication process immediately. No other engine
+ * will be tried. */
+ REQUISITE,
+
+ /* Success of an engine injected with the SUFFICIENT specifier ends
+ * strategy's authentication process successfully. However, denying
+ * doesn't abort it -- there will be fall-back to following engine
+ * if the one that failed wasn't the last one. */
+ SUFFICIENT,
+
+ /* Like SUFFICIENT with the exception that on failure the reason code
+ * is not overridden. Instead, it's taken directly from the last tried
+ * non-FALLBACK engine. If there was no previous non-FALLBACK engine
+ * in a Strategy, then the result_t::deny(reason = -EACCES) is used. */
+ FALLBACK,
+ };
+
+ Engine::result_t authenticate(const DoutPrefixProvider* dpp, const req_state* s) const override final;
+
+ bool is_empty() const {
+ return auth_stack.empty();
+ }
+
+ static int apply(const DoutPrefixProvider* dpp, const Strategy& auth_strategy, req_state* s) noexcept;
+
+private:
+ /* Using the reference wrapper here to explicitly point out we are not
+ * interested in storing nulls while preserving the dynamic polymorphism. */
+ using stack_item_t = std::pair<std::reference_wrapper<const Engine>,
+ Control>;
+ std::vector<stack_item_t> auth_stack;
+
+protected:
+ void add_engine(Control ctrl_flag, const Engine& engine) noexcept;
+};
+
+
+/* A class aggregating the knowledge about all Strategies in RadosGW. It is
+ * responsible for handling the dynamic reconfiguration on e.g. realm update.
+ * The definition is in rgw/rgw_auth_registry.h,
+ *
+ * Each new Strategy should be exposed to it. */
+class StrategyRegistry;
+
+class WebIdentityApplier : public IdentityApplier {
+protected:
+ CephContext* const cct;
+ RGWRados* const store;
+ rgw::web_idp::WebTokenClaims token_claims;
+
+ string get_idp_url() const;
+
+public:
+ WebIdentityApplier( CephContext* const cct,
+ RGWRados* const store,
+ const rgw::web_idp::WebTokenClaims& token_claims)
+ : cct(cct),
+ store(store),
+ token_claims(token_claims) {
+ }
+
+ void load_acct_info(const DoutPrefixProvider* dpp, RGWUserInfo& user_info) const override {
+ user_info.user_id = rgw_user(token_claims.sub);
+ user_info.display_name = token_claims.user_name;
+ }
+
+ void modify_request_state(const DoutPrefixProvider *dpp, req_state* s) const override;
+
+ uint32_t get_perms_from_aclspec(const DoutPrefixProvider* dpp, const aclspec_t& aclspec) const override {
+ return RGW_PERM_NONE;
+ }
+
+ bool is_admin_of(const rgw_user& uid) const override {
+ return false;
+ }
+
+ bool is_owner_of(const rgw_user& uid) const override {
+ return false;
+ }
+
+ uint32_t get_perm_mask() const override {
+ return RGW_PERM_NONE;
+ }
+
+ void to_str(std::ostream& out) const override;
+
+ bool is_identity(const idset_t& ids) const override;
+
+ uint32_t get_identity_type() const override {
+ return TYPE_WEB;
+ }
+
+ string get_acct_name() const override {
+ return token_claims.user_name;
+ }
+
+ string get_subuser() const override {
+ return {};
+ }
+
+ struct Factory {
+ virtual ~Factory() {}
+
+ virtual aplptr_t create_apl_web_identity( CephContext* cct,
+ const req_state* s,
+ const rgw::web_idp::WebTokenClaims& token) const = 0;
+ };
+};
+
+class ImplicitTenants: public md_config_obs_t {
+public:
+ enum implicit_tenant_flag_bits {IMPLICIT_TENANTS_SWIFT=1,
+ IMPLICIT_TENANTS_S3=2, IMPLICIT_TENANTS_BAD = -1, };
+private:
+ int saved;
+ void recompute_value(const ConfigProxy& );
+ class ImplicitTenantValue {
+ friend class ImplicitTenants;
+ int v;
+ ImplicitTenantValue(int v) : v(v) {};
+ public:
+ bool inline is_split_mode()
+ {
+ assert(v != IMPLICIT_TENANTS_BAD);
+ return v == IMPLICIT_TENANTS_SWIFT || v == IMPLICIT_TENANTS_S3;
+ }
+ bool inline implicit_tenants_for_(const implicit_tenant_flag_bits bit)
+ {
+ assert(v != IMPLICIT_TENANTS_BAD);
+ return static_cast<bool>(v&bit);
+ }
+ };
+public:
+ ImplicitTenants(const ConfigProxy& c) { recompute_value(c);}
+ ImplicitTenantValue get_value() {
+ return ImplicitTenantValue(saved);
+ }
+private:
+ const char** get_tracked_conf_keys() const override;
+ void handle_conf_change(const ConfigProxy& conf,
+ const std::set <std::string> &changed) override;
+};
+
+std::tuple<bool,bool> implicit_tenants_enabled_for_swift(CephContext * const cct);
+std::tuple<bool,bool> implicit_tenants_enabled_for_s3(CephContext * const cct);
+
+/* rgw::auth::RemoteApplier targets those authentication engines which don't
+ * need to ask the RADOS store while performing the auth process. Instead,
+ * they obtain credentials from an external source like Keystone or LDAP.
+ *
+ * As the authenticated user may not have an account yet, RGWRemoteAuthApplier
+ * must be able to create it basing on data passed by an auth engine. Those
+ * data will be used to fill RGWUserInfo structure. */
+class RemoteApplier : public IdentityApplier {
+public:
+ class AuthInfo {
+ friend class RemoteApplier;
+ protected:
+ const rgw_user acct_user;
+ const std::string acct_name;
+ const uint32_t perm_mask;
+ const bool is_admin;
+ const uint32_t acct_type;
+
+ public:
+ enum class acct_privilege_t {
+ IS_ADMIN_ACCT,
+ IS_PLAIN_ACCT
+ };
+
+ AuthInfo(const rgw_user& acct_user,
+ const std::string& acct_name,
+ const uint32_t perm_mask,
+ const acct_privilege_t level,
+ const uint32_t acct_type=TYPE_NONE)
+ : acct_user(acct_user),
+ acct_name(acct_name),
+ perm_mask(perm_mask),
+ is_admin(acct_privilege_t::IS_ADMIN_ACCT == level),
+ acct_type(acct_type) {
+ }
+ };
+
+ using aclspec_t = rgw::auth::Identity::aclspec_t;
+ typedef std::function<uint32_t(const aclspec_t&)> acl_strategy_t;
+
+protected:
+ CephContext* const cct;
+
+ /* Read-write is intensional here due to RGWUserInfo creation process. */
+ RGWRados* const store;
+
+ /* Supplemental strategy for extracting permissions from ACLs. Its results
+ * will be combined (ORed) with a default strategy that is responsible for
+ * handling backward compatibility. */
+ const acl_strategy_t extra_acl_strategy;
+
+ const AuthInfo info;
+ rgw::auth::ImplicitTenants& implicit_tenant_context;
+ const rgw::auth::ImplicitTenants::implicit_tenant_flag_bits implicit_tenant_bit;
+
+ virtual void create_account(const DoutPrefixProvider* dpp,
+ const rgw_user& acct_user,
+ bool implicit_tenant,
+ RGWUserInfo& user_info) const; /* out */
+
+public:
+ RemoteApplier(CephContext* const cct,
+ RGWRados* const store,
+ acl_strategy_t&& extra_acl_strategy,
+ const AuthInfo& info,
+ rgw::auth::ImplicitTenants& implicit_tenant_context,
+ rgw::auth::ImplicitTenants::implicit_tenant_flag_bits implicit_tenant_bit)
+ : cct(cct),
+ store(store),
+ extra_acl_strategy(std::move(extra_acl_strategy)),
+ info(info),
+ implicit_tenant_context(implicit_tenant_context),
+ implicit_tenant_bit(implicit_tenant_bit) {
+ }
+
+ uint32_t get_perms_from_aclspec(const DoutPrefixProvider* dpp, const aclspec_t& aclspec) const override;
+ bool is_admin_of(const rgw_user& uid) const override;
+ bool is_owner_of(const rgw_user& uid) const override;
+ bool is_identity(const idset_t& ids) const override;
+
+ uint32_t get_perm_mask() const override { return info.perm_mask; }
+ void to_str(std::ostream& out) const override;
+ void load_acct_info(const DoutPrefixProvider* dpp, RGWUserInfo& user_info) const override; /* out */
+ uint32_t get_identity_type() const override { return info.acct_type; }
+ string get_acct_name() const override { return info.acct_name; }
+ string get_subuser() const override { return {}; }
+
+ struct Factory {
+ virtual ~Factory() {}
+ /* Providing r-value reference here is required intensionally. Callee is
+ * thus disallowed to handle std::function in a way that could inhibit
+ * the move behaviour (like forgetting about std::moving a l-value). */
+ virtual aplptr_t create_apl_remote(CephContext* cct,
+ const req_state* s,
+ acl_strategy_t&& extra_acl_strategy,
+ const AuthInfo &info) const = 0;
+ };
+};
+
+
+/* rgw::auth::LocalApplier targets those auth engines that base on the data
+ * enclosed in the RGWUserInfo control structure. As a side effect of doing
+ * the authentication process, they must have it loaded. Leveraging this is
+ * a way to avoid unnecessary calls to underlying RADOS store. */
+class LocalApplier : public IdentityApplier {
+ using aclspec_t = rgw::auth::Identity::aclspec_t;
+
+protected:
+ const RGWUserInfo user_info;
+ const std::string subuser;
+ uint32_t perm_mask;
+
+ uint32_t get_perm_mask(const std::string& subuser_name,
+ const RGWUserInfo &uinfo) const;
+
+public:
+ static const std::string NO_SUBUSER;
+
+ LocalApplier(CephContext* const cct,
+ const RGWUserInfo& user_info,
+ std::string subuser,
+ const boost::optional<uint32_t>& perm_mask)
+ : user_info(user_info),
+ subuser(std::move(subuser)) {
+ if (perm_mask) {
+ this->perm_mask = perm_mask.get();
+ } else {
+ this->perm_mask = RGW_PERM_INVALID;
+ }
+ }
+
+
+ uint32_t get_perms_from_aclspec(const DoutPrefixProvider* dpp, const aclspec_t& aclspec) const override;
+ bool is_admin_of(const rgw_user& uid) const override;
+ bool is_owner_of(const rgw_user& uid) const override;
+ bool is_identity(const idset_t& ids) const override;
+ uint32_t get_perm_mask() const override {
+ if (this->perm_mask == RGW_PERM_INVALID) {
+ return get_perm_mask(subuser, user_info);
+ } else {
+ return this->perm_mask;
+ }
+ }
+ void to_str(std::ostream& out) const override;
+ void load_acct_info(const DoutPrefixProvider* dpp, RGWUserInfo& user_info) const override; /* out */
+ uint32_t get_identity_type() const override { return TYPE_RGW; }
+ string get_acct_name() const override { return {}; }
+ string get_subuser() const override { return subuser; }
+
+ struct Factory {
+ virtual ~Factory() {}
+ virtual aplptr_t create_apl_local(CephContext* cct,
+ const req_state* s,
+ const RGWUserInfo& user_info,
+ const std::string& subuser,
+ const boost::optional<uint32_t>& perm_mask) const = 0;
+ };
+};
+
+class RoleApplier : public IdentityApplier {
+protected:
+ const string role_name;
+ const rgw_user user_id;
+ vector<std::string> role_policies;
+
+public:
+
+ RoleApplier(CephContext* const cct,
+ const string& role_name,
+ const rgw_user& user_id,
+ const vector<std::string>& role_policies)
+ : role_name(role_name),
+ user_id(user_id),
+ role_policies(role_policies) {}
+
+ uint32_t get_perms_from_aclspec(const DoutPrefixProvider* dpp, const aclspec_t& aclspec) const override {
+ return 0;
+ }
+ bool is_admin_of(const rgw_user& uid) const override {
+ return false;
+ }
+ bool is_owner_of(const rgw_user& uid) const override {
+ return false;
+ }
+ bool is_identity(const idset_t& ids) const override;
+ uint32_t get_perm_mask() const override {
+ return RGW_PERM_NONE;
+ }
+ void to_str(std::ostream& out) const override;
+ void load_acct_info(const DoutPrefixProvider* dpp, RGWUserInfo& user_info) const override; /* out */
+ uint32_t get_identity_type() const override { return TYPE_ROLE; }
+ string get_acct_name() const override { return {}; }
+ string get_subuser() const override { return {}; }
+ void modify_request_state(const DoutPrefixProvider* dpp, req_state* s) const override;
+
+ struct Factory {
+ virtual ~Factory() {}
+ virtual aplptr_t create_apl_role( CephContext* cct,
+ const req_state* s,
+ const string& role_name,
+ const rgw_user& user_id,
+ const vector<std::string>& role_policies) const = 0;
+ };
+};
+
+/* The anonymous abstract engine. */
+class AnonymousEngine : public Engine {
+ CephContext* const cct;
+ const rgw::auth::LocalApplier::Factory* const apl_factory;
+
+public:
+ AnonymousEngine(CephContext* const cct,
+ const rgw::auth::LocalApplier::Factory* const apl_factory)
+ : cct(cct),
+ apl_factory(apl_factory) {
+ }
+
+ const char* get_name() const noexcept override {
+ return "rgw::auth::AnonymousEngine";
+ }
+
+ Engine::result_t authenticate(const DoutPrefixProvider* dpp, const req_state* s) const override final;
+
+protected:
+ virtual bool is_applicable(const req_state*) const noexcept {
+ return true;
+ }
+};
+
+} /* namespace auth */
+} /* namespace rgw */
+
+
+uint32_t rgw_perms_from_aclspec_default_strategy(
+ const rgw_user& uid,
+ const rgw::auth::Identity::aclspec_t& aclspec);
+
+#endif /* CEPH_RGW_AUTH_H */
diff --git a/src/rgw/rgw_auth_filters.h b/src/rgw/rgw_auth_filters.h
new file mode 100644
index 00000000..58436022
--- /dev/null
+++ b/src/rgw/rgw_auth_filters.h
@@ -0,0 +1,290 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_AUTH_FILTERS_H
+#define CEPH_RGW_AUTH_FILTERS_H
+
+#include <type_traits>
+
+#include <boost/logic/tribool.hpp>
+#include <boost/optional.hpp>
+
+#include "rgw_common.h"
+#include "rgw_auth.h"
+
+namespace rgw {
+namespace auth {
+
+/* Abstract decorator over any implementation of rgw::auth::IdentityApplier
+ * which could be provided both as a pointer-to-object or the object itself. */
+template <typename DecorateeT>
+class DecoratedApplier : public rgw::auth::IdentityApplier {
+ typedef typename std::remove_pointer<DecorateeT>::type DerefedDecorateeT;
+
+ static_assert(std::is_base_of<rgw::auth::IdentityApplier,
+ DerefedDecorateeT>::value,
+ "DecorateeT must be a subclass of rgw::auth::IdentityApplier");
+
+ DecorateeT decoratee;
+
+ /* There is an indirection layer over accessing decoratee to share the same
+ * code base between dynamic and static decorators. The difference is about
+ * what we store internally: pointer to a decorated object versus the whole
+ * object itself. Googling for "SFINAE" can help to understand the code. */
+ template <typename T = void,
+ typename std::enable_if<
+ std::is_pointer<DecorateeT>::value, T>::type* = nullptr>
+ DerefedDecorateeT& get_decoratee() {
+ return *decoratee;
+ }
+
+ template <typename T = void,
+ typename std::enable_if<
+ ! std::is_pointer<DecorateeT>::value, T>::type* = nullptr>
+ DerefedDecorateeT& get_decoratee() {
+ return decoratee;
+ }
+
+ template <typename T = void,
+ typename std::enable_if<
+ std::is_pointer<DecorateeT>::value, T>::type* = nullptr>
+ const DerefedDecorateeT& get_decoratee() const {
+ return *decoratee;
+ }
+
+ template <typename T = void,
+ typename std::enable_if<
+ ! std::is_pointer<DecorateeT>::value, T>::type* = nullptr>
+ const DerefedDecorateeT& get_decoratee() const {
+ return decoratee;
+ }
+
+public:
+ explicit DecoratedApplier(DecorateeT&& decoratee)
+ : decoratee(std::forward<DecorateeT>(decoratee)) {
+ }
+
+ uint32_t get_perms_from_aclspec(const DoutPrefixProvider* dpp, const aclspec_t& aclspec) const override {
+ return get_decoratee().get_perms_from_aclspec(dpp, aclspec);
+ }
+
+ bool is_admin_of(const rgw_user& uid) const override {
+ return get_decoratee().is_admin_of(uid);
+ }
+
+ bool is_owner_of(const rgw_user& uid) const override {
+ return get_decoratee().is_owner_of(uid);
+ }
+
+ bool is_anonymous() const override {
+ return get_decoratee().is_anonymous();
+ }
+
+ uint32_t get_perm_mask() const override {
+ return get_decoratee().get_perm_mask();
+ }
+
+ uint32_t get_identity_type() const override {
+ return get_decoratee().get_identity_type();
+ }
+
+ string get_acct_name() const override {
+ return get_decoratee().get_acct_name();
+ }
+
+ string get_subuser() const override {
+ return get_decoratee().get_subuser();
+ }
+
+ bool is_identity(
+ const boost::container::flat_set<Principal>& ids) const override {
+ return get_decoratee().is_identity(ids);
+ }
+
+ void to_str(std::ostream& out) const override {
+ get_decoratee().to_str(out);
+ }
+
+ void load_acct_info(const DoutPrefixProvider* dpp, RGWUserInfo& user_info) const override { /* out */
+ return get_decoratee().load_acct_info(dpp, user_info);
+ }
+
+ void modify_request_state(const DoutPrefixProvider* dpp, req_state * s) const override { /* in/out */
+ return get_decoratee().modify_request_state(dpp, s);
+ }
+};
+
+
+template <typename T>
+class ThirdPartyAccountApplier : public DecoratedApplier<T> {
+ /* const */RGWRados* const store;
+ const rgw_user acct_user_override;
+
+public:
+ /* A value representing situations where there is no requested account
+ * override. In other words, acct_user_override will be equal to this
+ * constant where the request isn't a cross-tenant one. */
+ static const rgw_user UNKNOWN_ACCT;
+
+ template <typename U>
+ ThirdPartyAccountApplier(RGWRados* const store,
+ const rgw_user &acct_user_override,
+ U&& decoratee)
+ : DecoratedApplier<T>(std::move(decoratee)),
+ store(store),
+ acct_user_override(acct_user_override) {
+ }
+
+ void to_str(std::ostream& out) const override;
+ void load_acct_info(const DoutPrefixProvider* dpp, RGWUserInfo& user_info) const override; /* out */
+};
+
+/* static declaration: UNKNOWN_ACCT will be an empty rgw_user that is a result
+ * of the default construction. */
+template <typename T>
+const rgw_user ThirdPartyAccountApplier<T>::UNKNOWN_ACCT;
+
+template <typename T>
+void ThirdPartyAccountApplier<T>::to_str(std::ostream& out) const
+{
+ out << "rgw::auth::ThirdPartyAccountApplier(" + acct_user_override.to_str() + ")"
+ << " -> ";
+ DecoratedApplier<T>::to_str(out);
+}
+
+template <typename T>
+void ThirdPartyAccountApplier<T>::load_acct_info(const DoutPrefixProvider* dpp, RGWUserInfo& user_info) const
+{
+ if (UNKNOWN_ACCT == acct_user_override) {
+ /* There is no override specified by the upper layer. This means that we'll
+ * load the account owned by the authenticated identity (aka auth_user). */
+ DecoratedApplier<T>::load_acct_info(dpp, user_info);
+ } else if (DecoratedApplier<T>::is_owner_of(acct_user_override)) {
+ /* The override has been specified but the account belongs to the authenticated
+ * identity. We may safely forward the call to a next stage. */
+ DecoratedApplier<T>::load_acct_info(dpp, user_info);
+ } else if (this->is_anonymous()) {
+ /* If the user was authed by the anonymous engine then scope the ANON user
+ * to the correct tenant */
+ if (acct_user_override.tenant.empty())
+ user_info.user_id = rgw_user(acct_user_override.id, RGW_USER_ANON_ID);
+ else
+ user_info.user_id = rgw_user(acct_user_override.tenant, RGW_USER_ANON_ID);
+ } else {
+ /* Compatibility mechanism for multi-tenancy. For more details refer to
+ * load_acct_info method of rgw::auth::RemoteApplier. */
+ if (acct_user_override.tenant.empty()) {
+ const rgw_user tenanted_uid(acct_user_override.id, acct_user_override.id);
+
+ if (rgw_get_user_info_by_uid(store, tenanted_uid, user_info) >= 0) {
+ /* Succeeded. */
+ return;
+ }
+ }
+
+ const int ret = rgw_get_user_info_by_uid(store, acct_user_override, user_info);
+ if (ret < 0) {
+ /* We aren't trying to recover from ENOENT here. It's supposed that creating
+ * someone else's account isn't a thing we want to support in this filter. */
+ if (ret == -ENOENT) {
+ throw -EACCES;
+ } else {
+ throw ret;
+ }
+ }
+
+ }
+}
+
+template <typename T> static inline
+ThirdPartyAccountApplier<T> add_3rdparty(RGWRados* const store,
+ const rgw_user &acct_user_override,
+ T&& t) {
+ return ThirdPartyAccountApplier<T>(store, acct_user_override,
+ std::forward<T>(t));
+}
+
+
+template <typename T>
+class SysReqApplier : public DecoratedApplier<T> {
+ CephContext* const cct;
+ /*const*/ RGWRados* const store;
+ const RGWHTTPArgs& args;
+ mutable boost::tribool is_system;
+
+public:
+ template <typename U>
+ SysReqApplier(CephContext* const cct,
+ /*const*/ RGWRados* const store,
+ const req_state* const s,
+ U&& decoratee)
+ : DecoratedApplier<T>(std::forward<T>(decoratee)),
+ cct(cct),
+ store(store),
+ args(s->info.args),
+ is_system(boost::logic::indeterminate) {
+ }
+
+ void to_str(std::ostream& out) const override;
+ void load_acct_info(const DoutPrefixProvider* dpp, RGWUserInfo& user_info) const override; /* out */
+ void modify_request_state(const DoutPrefixProvider* dpp, req_state* s) const override; /* in/out */
+};
+
+template <typename T>
+void SysReqApplier<T>::to_str(std::ostream& out) const
+{
+ out << "rgw::auth::SysReqApplier" << " -> ";
+ DecoratedApplier<T>::to_str(out);
+}
+
+template <typename T>
+void SysReqApplier<T>::load_acct_info(const DoutPrefixProvider* dpp, RGWUserInfo& user_info) const
+{
+ DecoratedApplier<T>::load_acct_info(dpp, user_info);
+ is_system = user_info.system;
+
+ if (is_system) {
+ //ldpp_dout(dpp, 20) << "system request" << dendl;
+
+ rgw_user effective_uid(args.sys_get(RGW_SYS_PARAM_PREFIX "uid"));
+ if (! effective_uid.empty()) {
+ /* We aren't writing directly to user_info for consistency and security
+ * reasons. rgw_get_user_info_by_uid doesn't trigger the operator=() but
+ * calls ::decode instead. */
+ RGWUserInfo euser_info;
+ if (rgw_get_user_info_by_uid(store, effective_uid, euser_info) < 0) {
+ //ldpp_dout(dpp, 0) << "User lookup failed!" << dendl;
+ throw -EACCES;
+ }
+ user_info = euser_info;
+ }
+ }
+}
+
+template <typename T>
+void SysReqApplier<T>::modify_request_state(const DoutPrefixProvider* dpp, req_state* const s) const
+{
+ if (boost::logic::indeterminate(is_system)) {
+ RGWUserInfo unused_info;
+ load_acct_info(dpp, unused_info);
+ }
+
+ if (is_system) {
+ s->info.args.set_system();
+ s->system_request = true;
+ }
+ DecoratedApplier<T>::modify_request_state(dpp, s);
+}
+
+template <typename T> static inline
+SysReqApplier<T> add_sysreq(CephContext* const cct,
+ /* const */ RGWRados* const store,
+ const req_state* const s,
+ T&& t) {
+ return SysReqApplier<T>(cct, store, s, std::forward<T>(t));
+}
+
+} /* namespace auth */
+} /* namespace rgw */
+
+#endif /* CEPH_RGW_AUTH_FILTERS_H */
diff --git a/src/rgw/rgw_auth_keystone.cc b/src/rgw/rgw_auth_keystone.cc
new file mode 100644
index 00000000..5a325425
--- /dev/null
+++ b/src/rgw/rgw_auth_keystone.cc
@@ -0,0 +1,491 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <string>
+#include <vector>
+
+#include <errno.h>
+#include <fnmatch.h>
+
+#include "rgw_b64.h"
+
+#include "common/errno.h"
+#include "common/ceph_json.h"
+#include "include/types.h"
+#include "include/str_list.h"
+
+#include "rgw_common.h"
+#include "rgw_keystone.h"
+#include "rgw_auth_keystone.h"
+#include "rgw_rest_s3.h"
+#include "rgw_auth_s3.h"
+
+#include "common/ceph_crypto_cms.h"
+#include "common/armor.h"
+#include "common/Cond.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+
+namespace rgw {
+namespace auth {
+namespace keystone {
+
+bool
+TokenEngine::is_applicable(const std::string& token) const noexcept
+{
+ return ! token.empty() && ! cct->_conf->rgw_keystone_url.empty();
+}
+
+TokenEngine::token_envelope_t
+TokenEngine::decode_pki_token(const DoutPrefixProvider* dpp, const std::string& token) const
+{
+ ceph::buffer::list token_body_bl;
+ int ret = rgw_decode_b64_cms(cct, token, token_body_bl);
+ if (ret < 0) {
+ ldpp_dout(dpp, 20) << "cannot decode pki token" << dendl;
+ throw ret;
+ } else {
+ ldpp_dout(dpp, 20) << "successfully decoded pki token" << dendl;
+ }
+
+ TokenEngine::token_envelope_t token_body;
+ ret = token_body.parse(cct, token, token_body_bl, config.get_api_version());
+ if (ret < 0) {
+ throw ret;
+ }
+
+ return token_body;
+}
+
+boost::optional<TokenEngine::token_envelope_t>
+TokenEngine::get_from_keystone(const DoutPrefixProvider* dpp, const std::string& token) const
+{
+ /* Unfortunately, we can't use the short form of "using" here. It's because
+ * we're aliasing a class' member, not namespace. */
+ using RGWValidateKeystoneToken = \
+ rgw::keystone::Service::RGWValidateKeystoneToken;
+
+ /* The container for plain response obtained from Keystone. It will be
+ * parsed token_envelope_t::parse method. */
+ ceph::bufferlist token_body_bl;
+ RGWValidateKeystoneToken validate(cct, "GET", "", &token_body_bl);
+
+ std::string url = config.get_endpoint_url();
+ if (url.empty()) {
+ throw -EINVAL;
+ }
+
+ const auto keystone_version = config.get_api_version();
+ if (keystone_version == rgw::keystone::ApiVersion::VER_2) {
+ url.append("v2.0/tokens/" + token);
+ } else if (keystone_version == rgw::keystone::ApiVersion::VER_3) {
+ url.append("v3/auth/tokens");
+ validate.append_header("X-Subject-Token", token);
+ }
+
+ std::string admin_token;
+ if (rgw::keystone::Service::get_admin_token(cct, token_cache, config,
+ admin_token) < 0) {
+ throw -EINVAL;
+ }
+
+ validate.append_header("X-Auth-Token", admin_token);
+ validate.set_send_length(0);
+
+ validate.set_url(url);
+
+ int ret = validate.process();
+ if (ret < 0) {
+ throw ret;
+ }
+
+ /* NULL terminate for debug output. */
+ token_body_bl.append(static_cast<char>(0));
+
+ /* Detect Keystone rejection earlier than during the token parsing.
+ * Although failure at the parsing phase doesn't impose a threat,
+ * this allows to return proper error code (EACCESS instead of EINVAL
+ * or similar) and thus improves logging. */
+ if (validate.get_http_status() ==
+ /* Most likely: wrong admin credentials or admin token. */
+ RGWValidateKeystoneToken::HTTP_STATUS_UNAUTHORIZED ||
+ validate.get_http_status() ==
+ /* Most likely: non-existent token supplied by the client. */
+ RGWValidateKeystoneToken::HTTP_STATUS_NOTFOUND) {
+ ldpp_dout(dpp, 5) << "Failed keystone auth from " << url << " with "
+ << validate.get_http_status() << dendl;
+ return boost::none;
+ }
+
+ ldpp_dout(dpp, 20) << "received response status=" << validate.get_http_status()
+ << ", body=" << token_body_bl.c_str() << dendl;
+
+ TokenEngine::token_envelope_t token_body;
+ ret = token_body.parse(cct, token, token_body_bl, config.get_api_version());
+ if (ret < 0) {
+ throw ret;
+ }
+
+ return token_body;
+}
+
+TokenEngine::auth_info_t
+TokenEngine::get_creds_info(const TokenEngine::token_envelope_t& token,
+ const std::vector<std::string>& admin_roles
+ ) const noexcept
+{
+ using acct_privilege_t = rgw::auth::RemoteApplier::AuthInfo::acct_privilege_t;
+
+ /* Check whether the user has an admin status. */
+ acct_privilege_t level = acct_privilege_t::IS_PLAIN_ACCT;
+ for (const auto& admin_role : admin_roles) {
+ if (token.has_role(admin_role)) {
+ level = acct_privilege_t::IS_ADMIN_ACCT;
+ break;
+ }
+ }
+
+ return auth_info_t {
+ /* Suggested account name for the authenticated user. */
+ rgw_user(token.get_project_id()),
+ /* User's display name (aka real name). */
+ token.get_project_name(),
+ /* Keystone doesn't support RGW's subuser concept, so we cannot cut down
+ * the access rights through the perm_mask. At least at this layer. */
+ RGW_PERM_FULL_CONTROL,
+ level,
+ TYPE_KEYSTONE,
+ };
+}
+
+static inline const std::string
+make_spec_item(const std::string& tenant, const std::string& id)
+{
+ return tenant + ":" + id;
+}
+
+TokenEngine::acl_strategy_t
+TokenEngine::get_acl_strategy(const TokenEngine::token_envelope_t& token) const
+{
+ /* The primary identity is constructed upon UUIDs. */
+ const auto& tenant_uuid = token.get_project_id();
+ const auto& user_uuid = token.get_user_id();
+
+ /* For Keystone v2 an alias may be also used. */
+ const auto& tenant_name = token.get_project_name();
+ const auto& user_name = token.get_user_name();
+
+ /* Construct all possible combinations including Swift's wildcards. */
+ const std::array<std::string, 6> allowed_items = {
+ make_spec_item(tenant_uuid, user_uuid),
+ make_spec_item(tenant_name, user_name),
+
+ /* Wildcards. */
+ make_spec_item(tenant_uuid, "*"),
+ make_spec_item(tenant_name, "*"),
+ make_spec_item("*", user_uuid),
+ make_spec_item("*", user_name),
+ };
+
+ /* Lambda will obtain a copy of (not a reference to!) allowed_items. */
+ return [allowed_items](const rgw::auth::Identity::aclspec_t& aclspec) {
+ uint32_t perm = 0;
+
+ for (const auto& allowed_item : allowed_items) {
+ const auto iter = aclspec.find(allowed_item);
+
+ if (std::end(aclspec) != iter) {
+ perm |= iter->second;
+ }
+ }
+
+ return perm;
+ };
+}
+
+TokenEngine::result_t
+TokenEngine::authenticate(const DoutPrefixProvider* dpp,
+ const std::string& token,
+ const req_state* const s) const
+{
+ boost::optional<TokenEngine::token_envelope_t> t;
+
+ /* This will be initialized on the first call to this method. In C++11 it's
+ * also thread-safe. */
+ static const struct RolesCacher {
+ explicit RolesCacher(CephContext* const cct) {
+ get_str_vec(cct->_conf->rgw_keystone_accepted_roles, plain);
+ get_str_vec(cct->_conf->rgw_keystone_accepted_admin_roles, admin);
+
+ /* Let's suppose that having an admin role implies also a regular one. */
+ plain.insert(std::end(plain), std::begin(admin), std::end(admin));
+ }
+
+ std::vector<std::string> plain;
+ std::vector<std::string> admin;
+ } roles(cct);
+
+ if (! is_applicable(token)) {
+ return result_t::deny();
+ }
+
+ /* Token ID is a concept that makes dealing with PKI tokens more effective.
+ * Instead of storing several kilobytes, a short hash can be burried. */
+ const auto& token_id = rgw_get_token_id(token);
+ ldpp_dout(dpp, 20) << "token_id=" << token_id << dendl;
+
+ /* Check cache first. */
+ t = token_cache.find(token_id);
+ if (t) {
+ ldpp_dout(dpp, 20) << "cached token.project.id=" << t->get_project_id()
+ << dendl;
+ auto apl = apl_factory->create_apl_remote(cct, s, get_acl_strategy(*t),
+ get_creds_info(*t, roles.admin));
+ return result_t::grant(std::move(apl));
+ }
+
+ /* Retrieve token. */
+ if (rgw_is_pki_token(token)) {
+ try {
+ t = decode_pki_token(dpp, token);
+ } catch (...) {
+ /* Last resort. */
+ t = get_from_keystone(dpp, token);
+ }
+ } else {
+ /* Can't decode, just go to the Keystone server for validation. */
+ t = get_from_keystone(dpp, token);
+ }
+
+ if (! t) {
+ return result_t::deny(-EACCES);
+ }
+
+ /* Verify expiration. */
+ if (t->expired()) {
+ ldpp_dout(dpp, 0) << "got expired token: " << t->get_project_name()
+ << ":" << t->get_user_name()
+ << " expired: " << t->get_expires() << dendl;
+ return result_t::deny(-EPERM);
+ }
+
+ /* Check for necessary roles. */
+ for (const auto& role : roles.plain) {
+ if (t->has_role(role) == true) {
+ ldpp_dout(dpp, 0) << "validated token: " << t->get_project_name()
+ << ":" << t->get_user_name()
+ << " expires: " << t->get_expires() << dendl;
+ token_cache.add(token_id, *t);
+ auto apl = apl_factory->create_apl_remote(cct, s, get_acl_strategy(*t),
+ get_creds_info(*t, roles.admin));
+ return result_t::grant(std::move(apl));
+ }
+ }
+
+ ldpp_dout(dpp, 0) << "user does not hold a matching role; required roles: "
+ << g_conf()->rgw_keystone_accepted_roles << dendl;
+
+ return result_t::deny(-EPERM);
+}
+
+
+/*
+ * Try to validate S3 auth against keystone s3token interface
+ */
+std::pair<boost::optional<rgw::keystone::TokenEnvelope>, int>
+EC2Engine::get_from_keystone(const DoutPrefixProvider* dpp, const boost::string_view& access_key_id,
+ const std::string& string_to_sign,
+ const boost::string_view& signature) const
+{
+ /* prepare keystone url */
+ std::string keystone_url = config.get_endpoint_url();
+ if (keystone_url.empty()) {
+ throw -EINVAL;
+ }
+
+ const auto api_version = config.get_api_version();
+ if (config.get_api_version() == rgw::keystone::ApiVersion::VER_3) {
+ keystone_url.append("v3/s3tokens");
+ } else {
+ keystone_url.append("v2.0/s3tokens");
+ }
+
+ /* get authentication token for Keystone. */
+ std::string admin_token;
+ int ret = rgw::keystone::Service::get_admin_token(cct, token_cache, config,
+ admin_token);
+ if (ret < 0) {
+ ldpp_dout(dpp, 2) << "s3 keystone: cannot get token for keystone access"
+ << dendl;
+ throw ret;
+ }
+
+ using RGWValidateKeystoneToken
+ = rgw::keystone::Service::RGWValidateKeystoneToken;
+
+ /* The container for plain response obtained from Keystone. It will be
+ * parsed token_envelope_t::parse method. */
+ ceph::bufferlist token_body_bl;
+ RGWValidateKeystoneToken validate(cct, "POST", keystone_url, &token_body_bl);
+
+ /* set required headers for keystone request */
+ validate.append_header("X-Auth-Token", admin_token);
+ validate.append_header("Content-Type", "application/json");
+
+ /* check if we want to verify keystone's ssl certs */
+ validate.set_verify_ssl(cct->_conf->rgw_keystone_verify_ssl);
+
+ /* create json credentials request body */
+ JSONFormatter credentials(false);
+ credentials.open_object_section("");
+ credentials.open_object_section("credentials");
+ credentials.dump_string("access", sview2cstr(access_key_id).data());
+ credentials.dump_string("token", rgw::to_base64(string_to_sign));
+ credentials.dump_string("signature", sview2cstr(signature).data());
+ credentials.close_section();
+ credentials.close_section();
+
+ std::stringstream os;
+ credentials.flush(os);
+ validate.set_post_data(os.str());
+ validate.set_send_length(os.str().length());
+
+ /* send request */
+ ret = validate.process();
+ if (ret < 0) {
+ ldpp_dout(dpp, 2) << "s3 keystone: token validation ERROR: "
+ << token_body_bl.c_str() << dendl;
+ throw ret;
+ }
+
+ /* if the supplied signature is wrong, we will get 401 from Keystone */
+ if (validate.get_http_status() ==
+ decltype(validate)::HTTP_STATUS_UNAUTHORIZED) {
+ return std::make_pair(boost::none, -ERR_SIGNATURE_NO_MATCH);
+ } else if (validate.get_http_status() ==
+ decltype(validate)::HTTP_STATUS_NOTFOUND) {
+ return std::make_pair(boost::none, -ERR_INVALID_ACCESS_KEY);
+ }
+
+ /* now parse response */
+ rgw::keystone::TokenEnvelope token_envelope;
+ ret = token_envelope.parse(cct, std::string(), token_body_bl, api_version);
+ if (ret < 0) {
+ ldpp_dout(dpp, 2) << "s3 keystone: token parsing failed, ret=0" << ret
+ << dendl;
+ throw ret;
+ }
+
+ return std::make_pair(std::move(token_envelope), 0);
+}
+
+EC2Engine::acl_strategy_t
+EC2Engine::get_acl_strategy(const EC2Engine::token_envelope_t&) const
+{
+ /* This is based on the assumption that the default acl strategy in
+ * get_perms_from_aclspec, will take care. Extra acl spec is not required. */
+ return nullptr;
+}
+
+EC2Engine::auth_info_t
+EC2Engine::get_creds_info(const EC2Engine::token_envelope_t& token,
+ const std::vector<std::string>& admin_roles
+ ) const noexcept
+{
+ using acct_privilege_t = \
+ rgw::auth::RemoteApplier::AuthInfo::acct_privilege_t;
+
+ /* Check whether the user has an admin status. */
+ acct_privilege_t level = acct_privilege_t::IS_PLAIN_ACCT;
+ for (const auto& admin_role : admin_roles) {
+ if (token.has_role(admin_role)) {
+ level = acct_privilege_t::IS_ADMIN_ACCT;
+ break;
+ }
+ }
+
+ return auth_info_t {
+ /* Suggested account name for the authenticated user. */
+ rgw_user(token.get_project_id()),
+ /* User's display name (aka real name). */
+ token.get_project_name(),
+ /* Keystone doesn't support RGW's subuser concept, so we cannot cut down
+ * the access rights through the perm_mask. At least at this layer. */
+ RGW_PERM_FULL_CONTROL,
+ level,
+ TYPE_KEYSTONE,
+ };
+}
+
+rgw::auth::Engine::result_t EC2Engine::authenticate(
+ const DoutPrefixProvider* dpp,
+ const boost::string_view& access_key_id,
+ const boost::string_view& signature,
+ const boost::string_view& session_token,
+ const string_to_sign_t& string_to_sign,
+ const signature_factory_t&,
+ const completer_factory_t& completer_factory,
+ /* Passthorugh only! */
+ const req_state* s) const
+{
+ /* This will be initialized on the first call to this method. In C++11 it's
+ * also thread-safe. */
+ static const struct RolesCacher {
+ explicit RolesCacher(CephContext* const cct) {
+ get_str_vec(cct->_conf->rgw_keystone_accepted_roles, plain);
+ get_str_vec(cct->_conf->rgw_keystone_accepted_admin_roles, admin);
+
+ /* Let's suppose that having an admin role implies also a regular one. */
+ plain.insert(std::end(plain), std::begin(admin), std::end(admin));
+ }
+
+ std::vector<std::string> plain;
+ std::vector<std::string> admin;
+ } accepted_roles(cct);
+
+ boost::optional<token_envelope_t> t;
+ int failure_reason;
+ std::tie(t, failure_reason) = \
+ get_from_keystone(dpp, access_key_id, string_to_sign, signature);
+ if (! t) {
+ return result_t::deny(failure_reason);
+ }
+
+ /* Verify expiration. */
+ if (t->expired()) {
+ ldpp_dout(dpp, 0) << "got expired token: " << t->get_project_name()
+ << ":" << t->get_user_name()
+ << " expired: " << t->get_expires() << dendl;
+ return result_t::deny();
+ }
+
+ /* check if we have a valid role */
+ bool found = false;
+ for (const auto& role : accepted_roles.plain) {
+ if (t->has_role(role) == true) {
+ found = true;
+ break;
+ }
+ }
+
+ if (! found) {
+ ldpp_dout(dpp, 5) << "s3 keystone: user does not hold a matching role;"
+ " required roles: "
+ << cct->_conf->rgw_keystone_accepted_roles << dendl;
+ return result_t::deny();
+ } else {
+ /* everything seems fine, continue with this user */
+ ldpp_dout(dpp, 5) << "s3 keystone: validated token: " << t->get_project_name()
+ << ":" << t->get_user_name()
+ << " expires: " << t->get_expires() << dendl;
+
+ auto apl = apl_factory->create_apl_remote(cct, s, get_acl_strategy(*t),
+ get_creds_info(*t, accepted_roles.admin));
+ return result_t::grant(std::move(apl), completer_factory(boost::none));
+ }
+}
+
+}; /* namespace keystone */
+}; /* namespace auth */
+}; /* namespace rgw */
diff --git a/src/rgw/rgw_auth_keystone.h b/src/rgw/rgw_auth_keystone.h
new file mode 100644
index 00000000..e63ba1e3
--- /dev/null
+++ b/src/rgw/rgw_auth_keystone.h
@@ -0,0 +1,130 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+
+#ifndef CEPH_RGW_AUTH_KEYSTONE_H
+#define CEPH_RGW_AUTH_KEYSTONE_H
+
+#include <utility>
+#include <boost/optional.hpp>
+#include <boost/utility/string_view.hpp>
+
+#include "rgw_auth.h"
+#include "rgw_rest_s3.h"
+#include "rgw_common.h"
+#include "rgw_keystone.h"
+
+namespace rgw {
+namespace auth {
+namespace keystone {
+
+/* Dedicated namespace for Keystone-related auth engines. We need it because
+ * Keystone offers three different authentication mechanisms (token, EC2 and
+ * regular user/pass). RadosGW actually does support the first two. */
+
+class TokenEngine : public rgw::auth::Engine {
+ CephContext* const cct;
+
+ using acl_strategy_t = rgw::auth::RemoteApplier::acl_strategy_t;
+ using auth_info_t = rgw::auth::RemoteApplier::AuthInfo;
+ using result_t = rgw::auth::Engine::result_t;
+ using token_envelope_t = rgw::keystone::TokenEnvelope;
+
+ const rgw::auth::TokenExtractor* const extractor;
+ const rgw::auth::RemoteApplier::Factory* const apl_factory;
+ rgw::keystone::Config& config;
+ rgw::keystone::TokenCache& token_cache;
+
+ /* Helper methods. */
+ bool is_applicable(const std::string& token) const noexcept;
+ token_envelope_t decode_pki_token(const DoutPrefixProvider* dpp, const std::string& token) const;
+
+ boost::optional<token_envelope_t>
+ get_from_keystone(const DoutPrefixProvider* dpp, const std::string& token) const;
+
+ acl_strategy_t get_acl_strategy(const token_envelope_t& token) const;
+ auth_info_t get_creds_info(const token_envelope_t& token,
+ const std::vector<std::string>& admin_roles
+ ) const noexcept;
+ result_t authenticate(const DoutPrefixProvider* dpp,
+ const std::string& token,
+ const req_state* s) const;
+
+public:
+ TokenEngine(CephContext* const cct,
+ const rgw::auth::TokenExtractor* const extractor,
+ const rgw::auth::RemoteApplier::Factory* const apl_factory,
+ rgw::keystone::Config& config,
+ rgw::keystone::TokenCache& token_cache)
+ : cct(cct),
+ extractor(extractor),
+ apl_factory(apl_factory),
+ config(config),
+ token_cache(token_cache) {
+ }
+
+ const char* get_name() const noexcept override {
+ return "rgw::auth::keystone::TokenEngine";
+ }
+
+ result_t authenticate(const DoutPrefixProvider* dpp, const req_state* const s) const override {
+ return authenticate(dpp, extractor->get_token(s), s);
+ }
+}; /* class TokenEngine */
+
+
+class EC2Engine : public rgw::auth::s3::AWSEngine {
+ using acl_strategy_t = rgw::auth::RemoteApplier::acl_strategy_t;
+ using auth_info_t = rgw::auth::RemoteApplier::AuthInfo;
+ using result_t = rgw::auth::Engine::result_t;
+ using token_envelope_t = rgw::keystone::TokenEnvelope;
+
+ const rgw::auth::RemoteApplier::Factory* const apl_factory;
+ rgw::keystone::Config& config;
+ rgw::keystone::TokenCache& token_cache;
+
+ /* Helper methods. */
+ acl_strategy_t get_acl_strategy(const token_envelope_t& token) const;
+ auth_info_t get_creds_info(const token_envelope_t& token,
+ const std::vector<std::string>& admin_roles
+ ) const noexcept;
+ std::pair<boost::optional<token_envelope_t>, int>
+ get_from_keystone(const DoutPrefixProvider* dpp, const boost::string_view& access_key_id,
+ const std::string& string_to_sign,
+ const boost::string_view& signature) const;
+ result_t authenticate(const DoutPrefixProvider* dpp,
+ const boost::string_view& access_key_id,
+ const boost::string_view& signature,
+ const boost::string_view& session_token,
+ const string_to_sign_t& string_to_sign,
+ const signature_factory_t&,
+ const completer_factory_t& completer_factory,
+ const req_state* s) const override;
+public:
+ EC2Engine(CephContext* const cct,
+ const rgw::auth::s3::AWSEngine::VersionAbstractor* const ver_abstractor,
+ const rgw::auth::RemoteApplier::Factory* const apl_factory,
+ rgw::keystone::Config& config,
+ /* The token cache is used ONLY for the retrieving admin token.
+ * Due to the architecture of AWS Auth S3 credentials cannot be
+ * cached at all. */
+ rgw::keystone::TokenCache& token_cache)
+ : AWSEngine(cct, *ver_abstractor),
+ apl_factory(apl_factory),
+ config(config),
+ token_cache(token_cache) {
+ }
+
+ using AWSEngine::authenticate;
+
+ const char* get_name() const noexcept override {
+ return "rgw::auth::keystone::EC2Engine";
+ }
+
+}; /* class EC2Engine */
+
+}; /* namespace keystone */
+}; /* namespace auth */
+}; /* namespace rgw */
+
+#endif /* CEPH_RGW_AUTH_KEYSTONE_H */
diff --git a/src/rgw/rgw_auth_registry.h b/src/rgw/rgw_auth_registry.h
new file mode 100644
index 00000000..696f40cd
--- /dev/null
+++ b/src/rgw/rgw_auth_registry.h
@@ -0,0 +1,101 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+
+#ifndef CEPH_RGW_AUTH_REGISTRY_H
+#define CEPH_RGW_AUTH_REGISTRY_H
+
+#include <functional>
+#include <memory>
+#include <ostream>
+#include <type_traits>
+#include <utility>
+
+#include "rgw_auth.h"
+#include "rgw_auth_s3.h"
+#include "rgw_swift_auth.h"
+#include "rgw_rest_sts.h"
+
+namespace rgw {
+namespace auth {
+
+/* A class aggregating the knowledge about all Strategies in RadosGW. It is
+ * responsible for handling the dynamic reconfiguration on e.g. realm update. */
+class StrategyRegistry {
+ template <class AbstractorT,
+ bool AllowAnonAccessT = false>
+ using s3_strategy_t = \
+ rgw::auth::s3::AWSAuthStrategy<AbstractorT, AllowAnonAccessT>;
+
+ struct s3_main_strategy_t : public Strategy {
+ using s3_main_strategy_plain_t = \
+ s3_strategy_t<rgw::auth::s3::AWSGeneralAbstractor, true>;
+ using s3_main_strategy_boto2_t = \
+ s3_strategy_t<rgw::auth::s3::AWSGeneralBoto2Abstractor>;
+
+ s3_main_strategy_plain_t s3_main_strategy_plain;
+ s3_main_strategy_boto2_t s3_main_strategy_boto2;
+
+ s3_main_strategy_t(CephContext* const cct,
+ ImplicitTenants& implicit_tenant_context,
+ RGWRados* const store)
+ : s3_main_strategy_plain(cct, implicit_tenant_context, store),
+ s3_main_strategy_boto2(cct, implicit_tenant_context, store) {
+ add_engine(Strategy::Control::SUFFICIENT, s3_main_strategy_plain);
+ add_engine(Strategy::Control::FALLBACK, s3_main_strategy_boto2);
+ }
+
+ const char* get_name() const noexcept override {
+ return "rgw::auth::StrategyRegistry::s3_main_strategy_t";
+ }
+ } s3_main_strategy;
+
+ using s3_post_strategy_t = \
+ s3_strategy_t<rgw::auth::s3::AWSBrowserUploadAbstractor>;
+ s3_post_strategy_t s3_post_strategy;
+
+ rgw::auth::swift::DefaultStrategy swift_strategy;
+
+ rgw::auth::sts::DefaultStrategy sts_strategy;
+
+public:
+ StrategyRegistry(CephContext* const cct,
+ ImplicitTenants& implicit_tenant_context,
+ RGWRados* const store)
+ : s3_main_strategy(cct, implicit_tenant_context, store),
+ s3_post_strategy(cct, implicit_tenant_context, store),
+ swift_strategy(cct, implicit_tenant_context, store),
+ sts_strategy(cct, store) {
+ }
+
+ const s3_main_strategy_t& get_s3_main() const {
+ return s3_main_strategy;
+ }
+
+ const s3_post_strategy_t& get_s3_post() const {
+ return s3_post_strategy;
+ }
+
+ const rgw::auth::swift::DefaultStrategy& get_swift() const {
+ return swift_strategy;
+ }
+
+ const rgw::auth::sts::DefaultStrategy& get_sts() const {
+ return sts_strategy;
+ }
+
+ static std::shared_ptr<StrategyRegistry>
+ create(CephContext* const cct,
+ ImplicitTenants& implicit_tenant_context,
+ RGWRados* const store) {
+ return std::make_shared<StrategyRegistry>(cct, implicit_tenant_context, store);
+ }
+};
+
+} /* namespace auth */
+} /* namespace rgw */
+
+using rgw_auth_registry_t = rgw::auth::StrategyRegistry;
+using rgw_auth_registry_ptr_t = std::shared_ptr<rgw_auth_registry_t>;
+
+#endif /* CEPH_RGW_AUTH_REGISTRY_H */
diff --git a/src/rgw/rgw_auth_s3.cc b/src/rgw/rgw_auth_s3.cc
new file mode 100644
index 00000000..f7a8af67
--- /dev/null
+++ b/src/rgw/rgw_auth_s3.cc
@@ -0,0 +1,1135 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <algorithm>
+#include <map>
+#include <iterator>
+#include <string>
+#include <vector>
+
+#include "common/armor.h"
+#include "common/utf8.h"
+#include "rgw_rest_s3.h"
+#include "rgw_auth_s3.h"
+#include "rgw_common.h"
+#include "rgw_client_io.h"
+#include "rgw_rest.h"
+#include "rgw_crypt_sanitize.h"
+
+#include <boost/container/small_vector.hpp>
+#include <boost/utility/string_view.hpp>
+#include <boost/algorithm/string/trim_all.hpp>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+static const auto signed_subresources = {
+ "acl",
+ "cors",
+ "delete",
+ "lifecycle",
+ "location",
+ "logging",
+ "notification",
+ "partNumber",
+ "policy",
+ "requestPayment",
+ "response-cache-control",
+ "response-content-disposition",
+ "response-content-encoding",
+ "response-content-language",
+ "response-content-type",
+ "response-expires",
+ "tagging",
+ "torrent",
+ "uploadId",
+ "uploads",
+ "versionId",
+ "versioning",
+ "versions",
+ "website",
+ "object-lock"
+};
+
+/*
+ * ?get the canonical amazon-style header for something?
+ */
+
+static std::string
+get_canon_amz_hdr(const meta_map_t& meta_map)
+{
+ std::string dest;
+
+ for (const auto& kv : meta_map) {
+ dest.append(kv.first);
+ dest.append(":");
+ dest.append(kv.second);
+ dest.append("\n");
+ }
+
+ return dest;
+}
+
+/*
+ * ?get the canonical representation of the object's location
+ */
+static std::string
+get_canon_resource(const char* const request_uri,
+ const std::map<std::string, std::string>& sub_resources)
+{
+ std::string dest;
+
+ if (request_uri) {
+ dest.append(request_uri);
+ }
+
+ bool initial = true;
+ for (const auto& subresource : signed_subresources) {
+ const auto iter = sub_resources.find(subresource);
+ if (iter == std::end(sub_resources)) {
+ continue;
+ }
+
+ if (initial) {
+ dest.append("?");
+ initial = false;
+ } else {
+ dest.append("&");
+ }
+
+ dest.append(iter->first);
+ if (! iter->second.empty()) {
+ dest.append("=");
+ dest.append(iter->second);
+ }
+ }
+
+ dout(10) << "get_canon_resource(): dest=" << dest << dendl;
+ return dest;
+}
+
+/*
+ * get the header authentication information required to
+ * compute a request's signature
+ */
+void rgw_create_s3_canonical_header(
+ const char* const method,
+ const char* const content_md5,
+ const char* const content_type,
+ const char* const date,
+ const meta_map_t& meta_map,
+ const meta_map_t& qs_map,
+ const char* const request_uri,
+ const std::map<std::string, std::string>& sub_resources,
+ std::string& dest_str)
+{
+ std::string dest;
+
+ if (method) {
+ dest = method;
+ }
+ dest.append("\n");
+
+ if (content_md5) {
+ dest.append(content_md5);
+ }
+ dest.append("\n");
+
+ if (content_type) {
+ dest.append(content_type);
+ }
+ dest.append("\n");
+
+ if (date) {
+ dest.append(date);
+ }
+ dest.append("\n");
+
+ dest.append(get_canon_amz_hdr(meta_map));
+ dest.append(get_canon_amz_hdr(qs_map));
+ dest.append(get_canon_resource(request_uri, sub_resources));
+
+ dest_str = dest;
+}
+
+static inline bool is_base64_for_content_md5(unsigned char c) {
+ return (isalnum(c) || isspace(c) || (c == '+') || (c == '/') || (c == '='));
+}
+
+static inline void get_v2_qs_map(const req_info& info,
+ meta_map_t& qs_map) {
+ const auto& params = const_cast<RGWHTTPArgs&>(info.args).get_params();
+ for (const auto& elt : params) {
+ std::string k = boost::algorithm::to_lower_copy(elt.first);
+ if (k.find("x-amz-meta-") == /* offset */ 0) {
+ add_amz_meta_header(qs_map, k, elt.second);
+ }
+ }
+}
+
+/*
+ * get the header authentication information required to
+ * compute a request's signature
+ */
+bool rgw_create_s3_canonical_header(const req_info& info,
+ utime_t* const header_time,
+ std::string& dest,
+ const bool qsr)
+{
+ const char* const content_md5 = info.env->get("HTTP_CONTENT_MD5");
+ if (content_md5) {
+ for (const char *p = content_md5; *p; p++) {
+ if (!is_base64_for_content_md5(*p)) {
+ dout(0) << "NOTICE: bad content-md5 provided (not base64),"
+ << " aborting request p=" << *p << " " << (int)*p << dendl;
+ return false;
+ }
+ }
+ }
+
+ const char *content_type = info.env->get("CONTENT_TYPE");
+
+ std::string date;
+ meta_map_t qs_map;
+
+ if (qsr) {
+ get_v2_qs_map(info, qs_map); // handle qs metadata
+ date = info.args.get("Expires");
+ } else {
+ const char *str = info.env->get("HTTP_X_AMZ_DATE");
+ const char *req_date = str;
+ if (str == NULL) {
+ req_date = info.env->get("HTTP_DATE");
+ if (!req_date) {
+ dout(0) << "NOTICE: missing date for auth header" << dendl;
+ return false;
+ }
+ date = req_date;
+ }
+
+ if (header_time) {
+ struct tm t;
+ if (!parse_rfc2616(req_date, &t)) {
+ dout(0) << "NOTICE: failed to parse date for auth header" << dendl;
+ return false;
+ }
+ if (t.tm_year < 70) {
+ dout(0) << "NOTICE: bad date (predates epoch): " << req_date << dendl;
+ return false;
+ }
+ *header_time = utime_t(internal_timegm(&t), 0);
+ }
+ }
+
+ const auto& meta_map = info.x_meta_map;
+ const auto& sub_resources = info.args.get_sub_resources();
+
+ std::string request_uri;
+ if (info.effective_uri.empty()) {
+ request_uri = info.request_uri;
+ } else {
+ request_uri = info.effective_uri;
+ }
+
+ rgw_create_s3_canonical_header(info.method, content_md5, content_type,
+ date.c_str(), meta_map, qs_map,
+ request_uri.c_str(), sub_resources, dest);
+ return true;
+}
+
+
+namespace rgw {
+namespace auth {
+namespace s3 {
+
+bool is_time_skew_ok(time_t t)
+{
+ auto req_tp = ceph::coarse_real_clock::from_time_t(t);
+ auto cur_tp = ceph::coarse_real_clock::now();
+
+ if (std::chrono::abs(cur_tp - req_tp) > RGW_AUTH_GRACE) {
+ dout(10) << "NOTICE: request time skew too big." << dendl;
+ using ceph::operator<<;
+ dout(10) << "req_tp=" << req_tp << ", cur_tp=" << cur_tp << dendl;
+ return false;
+ }
+
+ return true;
+}
+
+static inline int parse_v4_query_string(const req_info& info, /* in */
+ boost::string_view& credential, /* out */
+ boost::string_view& signedheaders, /* out */
+ boost::string_view& signature, /* out */
+ boost::string_view& date, /* out */
+ boost::string_view& sessiontoken) /* out */
+{
+ /* auth ships with req params ... */
+
+ /* look for required params */
+ credential = info.args.get("X-Amz-Credential");
+ if (credential.size() == 0) {
+ return -EPERM;
+ }
+
+ date = info.args.get("X-Amz-Date");
+ struct tm date_t;
+ if (!parse_iso8601(sview2cstr(date).data(), &date_t, nullptr, false)) {
+ return -EPERM;
+ }
+
+ boost::string_view expires = info.args.get("X-Amz-Expires");
+ if (expires.empty()) {
+ return -EPERM;
+ }
+ /* X-Amz-Expires provides the time period, in seconds, for which
+ the generated presigned URL is valid. The minimum value
+ you can set is 1, and the maximum is 604800 (seven days) */
+ time_t exp = atoll(expires.data());
+ if ((exp < 1) || (exp > 7*24*60*60)) {
+ dout(10) << "NOTICE: exp out of range, exp = " << exp << dendl;
+ return -EPERM;
+ }
+ /* handle expiration in epoch time */
+ uint64_t req_sec = (uint64_t)internal_timegm(&date_t);
+ uint64_t now = ceph_clock_now();
+ if (now >= req_sec + exp) {
+ dout(10) << "NOTICE: now = " << now << ", req_sec = " << req_sec << ", exp = " << exp << dendl;
+ return -EPERM;
+ }
+
+ signedheaders = info.args.get("X-Amz-SignedHeaders");
+ if (signedheaders.size() == 0) {
+ return -EPERM;
+ }
+
+ signature = info.args.get("X-Amz-Signature");
+ if (signature.size() == 0) {
+ return -EPERM;
+ }
+
+ if (info.args.exists("X-Amz-Security-Token")) {
+ sessiontoken = info.args.get("X-Amz-Security-Token");
+ if (sessiontoken.size() == 0) {
+ return -EPERM;
+ }
+ }
+
+ return 0;
+}
+
+static bool get_next_token(const boost::string_view& s,
+ size_t& pos,
+ const char* const delims,
+ boost::string_view& token)
+{
+ const size_t start = s.find_first_not_of(delims, pos);
+ if (start == boost::string_view::npos) {
+ pos = s.size();
+ return false;
+ }
+
+ size_t end = s.find_first_of(delims, start);
+ if (end != boost::string_view::npos)
+ pos = end + 1;
+ else {
+ pos = end = s.size();
+ }
+
+ token = s.substr(start, end - start);
+ return true;
+}
+
+template<std::size_t ExpectedStrNum>
+boost::container::small_vector<boost::string_view, ExpectedStrNum>
+get_str_vec(const boost::string_view& str, const char* const delims)
+{
+ boost::container::small_vector<boost::string_view, ExpectedStrNum> str_vec;
+
+ size_t pos = 0;
+ boost::string_view token;
+ while (pos < str.size()) {
+ if (get_next_token(str, pos, delims, token)) {
+ if (token.size() > 0) {
+ str_vec.push_back(token);
+ }
+ }
+ }
+
+ return str_vec;
+}
+
+template<std::size_t ExpectedStrNum>
+boost::container::small_vector<boost::string_view, ExpectedStrNum>
+get_str_vec(const boost::string_view& str)
+{
+ const char delims[] = ";,= \t";
+ return get_str_vec<ExpectedStrNum>(str, delims);
+}
+
+static inline int parse_v4_auth_header(const req_info& info, /* in */
+ boost::string_view& credential, /* out */
+ boost::string_view& signedheaders, /* out */
+ boost::string_view& signature, /* out */
+ boost::string_view& date, /* out */
+ boost::string_view& sessiontoken) /* out */
+{
+ boost::string_view input(info.env->get("HTTP_AUTHORIZATION", ""));
+ try {
+ input = input.substr(::strlen(AWS4_HMAC_SHA256_STR) + 1);
+ } catch (std::out_of_range&) {
+ /* We should never ever run into this situation as the presence of
+ * AWS4_HMAC_SHA256_STR had been verified earlier. */
+ dout(10) << "credentials string is too short" << dendl;
+ return -EINVAL;
+ }
+
+ std::map<boost::string_view, boost::string_view> kv;
+ for (const auto& s : get_str_vec<4>(input, ",")) {
+ const auto parsed_pair = parse_key_value(s);
+ if (parsed_pair) {
+ kv[parsed_pair->first] = parsed_pair->second;
+ } else {
+ dout(10) << "NOTICE: failed to parse auth header (s=" << s << ")"
+ << dendl;
+ return -EINVAL;
+ }
+ }
+
+ static const std::array<boost::string_view, 3> required_keys = {
+ "Credential",
+ "SignedHeaders",
+ "Signature"
+ };
+
+ /* Ensure that the presigned required keys are really there. */
+ for (const auto& k : required_keys) {
+ if (kv.find(k) == std::end(kv)) {
+ dout(10) << "NOTICE: auth header missing key: " << k << dendl;
+ return -EINVAL;
+ }
+ }
+
+ credential = kv["Credential"];
+ signedheaders = kv["SignedHeaders"];
+ signature = kv["Signature"];
+
+ /* sig hex str */
+ dout(10) << "v4 signature format = " << signature << dendl;
+
+ /* ------------------------- handle x-amz-date header */
+
+ /* grab date */
+
+ const char *d = info.env->get("HTTP_X_AMZ_DATE");
+ struct tm t;
+ if (!parse_iso8601(d, &t, NULL, false)) {
+ dout(10) << "error reading date via http_x_amz_date" << dendl;
+ return -EACCES;
+ }
+ date = d;
+
+ if (!is_time_skew_ok(internal_timegm(&t))) {
+ return -ERR_REQUEST_TIME_SKEWED;
+ }
+
+ if (info.env->exists("HTTP_X_AMZ_SECURITY_TOKEN")) {
+ sessiontoken = info.env->get("HTTP_X_AMZ_SECURITY_TOKEN");
+ }
+
+ return 0;
+}
+
+int parse_v4_credentials(const req_info& info, /* in */
+ boost::string_view& access_key_id, /* out */
+ boost::string_view& credential_scope, /* out */
+ boost::string_view& signedheaders, /* out */
+ boost::string_view& signature, /* out */
+ boost::string_view& date, /* out */
+ boost::string_view& session_token, /* out */
+ const bool using_qs) /* in */
+{
+ boost::string_view credential;
+ int ret;
+ if (using_qs) {
+ ret = parse_v4_query_string(info, credential, signedheaders,
+ signature, date, session_token);
+ } else {
+ ret = parse_v4_auth_header(info, credential, signedheaders,
+ signature, date, session_token);
+ }
+
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* access_key/YYYYMMDD/region/service/aws4_request */
+ dout(10) << "v4 credential format = " << credential << dendl;
+
+ if (std::count(credential.begin(), credential.end(), '/') != 4) {
+ return -EINVAL;
+ }
+
+ /* credential must end with 'aws4_request' */
+ if (credential.find("aws4_request") == std::string::npos) {
+ return -EINVAL;
+ }
+
+ /* grab access key id */
+ const size_t pos = credential.find("/");
+ access_key_id = credential.substr(0, pos);
+ dout(10) << "access key id = " << access_key_id << dendl;
+
+ /* grab credential scope */
+ credential_scope = credential.substr(pos + 1);
+ dout(10) << "credential scope = " << credential_scope << dendl;
+
+ return 0;
+}
+
+std::string get_v4_canonical_qs(const req_info& info, const bool using_qs)
+{
+ const std::string *params = &info.request_params;
+ std::string copy_params;
+ if (params->empty()) {
+ /* Optimize the typical flow. */
+ return std::string();
+ }
+ if (params->find_first_of('+') != std::string::npos) {
+ copy_params = *params;
+ boost::replace_all(copy_params, "+", "%20");
+ params = &copy_params;
+ }
+
+ /* Handle case when query string exists. Step 3 described in: http://docs.
+ * aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html */
+ std::map<std::string, std::string> canonical_qs_map;
+ for (const auto& s : get_str_vec<5>(*params, "&")) {
+ boost::string_view key, val;
+ const auto parsed_pair = parse_key_value(s);
+ if (parsed_pair) {
+ std::tie(key, val) = *parsed_pair;
+ } else {
+ /* Handling a parameter without any value (even the empty one). That's
+ * it, we've encountered something like "this_param&other_param=val"
+ * which is used by S3 for subresources. */
+ key = s;
+ }
+
+ if (using_qs && key == "X-Amz-Signature") {
+ /* Preserving the original behaviour of get_v4_canonical_qs() here. */
+ continue;
+ }
+
+ // while awsv4 specs ask for all slashes to be encoded, s3 itself is relaxed
+ // in its implementation allowing non-url-encoded slashes to be present in
+ // presigned urls for instance
+ canonical_qs_map[aws4_uri_recode(key, true)] = aws4_uri_recode(val, true);
+ }
+
+ /* Thanks to the early exist we have the guarantee that canonical_qs_map has
+ * at least one element. */
+ auto iter = std::begin(canonical_qs_map);
+ std::string canonical_qs;
+ canonical_qs.append(iter->first)
+ .append("=", ::strlen("="))
+ .append(iter->second);
+
+ for (iter++; iter != std::end(canonical_qs_map); iter++) {
+ canonical_qs.append("&", ::strlen("&"))
+ .append(iter->first)
+ .append("=", ::strlen("="))
+ .append(iter->second);
+ }
+
+ return canonical_qs;
+}
+
+boost::optional<std::string>
+get_v4_canonical_headers(const req_info& info,
+ const boost::string_view& signedheaders,
+ const bool using_qs,
+ const bool force_boto2_compat)
+{
+ std::map<boost::string_view, std::string> canonical_hdrs_map;
+ for (const auto& token : get_str_vec<5>(signedheaders, ";")) {
+ /* TODO(rzarzynski): we'd like to switch to sstring here but it should
+ * get push_back() and reserve() first. */
+ std::string token_env = "HTTP_";
+ token_env.reserve(token.length() + std::strlen("HTTP_") + 1);
+
+ std::transform(std::begin(token), std::end(token),
+ std::back_inserter(token_env), [](const int c) {
+ return c == '-' ? '_' : std::toupper(c);
+ });
+
+ if (token_env == "HTTP_CONTENT_LENGTH") {
+ token_env = "CONTENT_LENGTH";
+ } else if (token_env == "HTTP_CONTENT_TYPE") {
+ token_env = "CONTENT_TYPE";
+ }
+ const char* const t = info.env->get(token_env.c_str());
+ if (!t) {
+ dout(10) << "warning env var not available" << dendl;
+ continue;
+ }
+
+ std::string token_value(t);
+ if (token_env == "HTTP_CONTENT_MD5" &&
+ !std::all_of(std::begin(token_value), std::end(token_value),
+ is_base64_for_content_md5)) {
+ dout(0) << "NOTICE: bad content-md5 provided (not base64)"
+ << ", aborting request" << dendl;
+ return boost::none;
+ }
+
+ if (force_boto2_compat && using_qs && token == "host") {
+ boost::string_view port = info.env->get("SERVER_PORT", "");
+ boost::string_view secure_port = info.env->get("SERVER_PORT_SECURE", "");
+
+ if (!secure_port.empty()) {
+ if (secure_port != "443")
+ token_value.append(":", std::strlen(":"))
+ .append(secure_port.data(), secure_port.length());
+ } else if (!port.empty()) {
+ if (port != "80")
+ token_value.append(":", std::strlen(":"))
+ .append(port.data(), port.length());
+ }
+ }
+
+ canonical_hdrs_map[token] = rgw_trim_whitespace(token_value);
+ }
+
+ std::string canonical_hdrs;
+ for (const auto& header : canonical_hdrs_map) {
+ const boost::string_view& name = header.first;
+ std::string value = header.second;
+ boost::trim_all<std::string>(value);
+
+ canonical_hdrs.append(name.data(), name.length())
+ .append(":", std::strlen(":"))
+ .append(value)
+ .append("\n", std::strlen("\n"));
+ }
+
+ return canonical_hdrs;
+}
+
+/*
+ * create canonical request for signature version 4
+ *
+ * http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
+ */
+sha256_digest_t
+get_v4_canon_req_hash(CephContext* cct,
+ const boost::string_view& http_verb,
+ const std::string& canonical_uri,
+ const std::string& canonical_qs,
+ const std::string& canonical_hdrs,
+ const boost::string_view& signed_hdrs,
+ const boost::string_view& request_payload_hash)
+{
+ ldout(cct, 10) << "payload request hash = " << request_payload_hash << dendl;
+
+ const auto canonical_req = string_join_reserve("\n",
+ http_verb,
+ canonical_uri,
+ canonical_qs,
+ canonical_hdrs,
+ signed_hdrs,
+ request_payload_hash);
+
+ const auto canonical_req_hash = calc_hash_sha256(canonical_req);
+
+ using sanitize = rgw::crypt_sanitize::log_content;
+ ldout(cct, 10) << "canonical request = " << sanitize{canonical_req} << dendl;
+ ldout(cct, 10) << "canonical request hash = "
+ << canonical_req_hash << dendl;
+
+ return canonical_req_hash;
+}
+
+/*
+ * create string to sign for signature version 4
+ *
+ * http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html
+ */
+AWSEngine::VersionAbstractor::string_to_sign_t
+get_v4_string_to_sign(CephContext* const cct,
+ const boost::string_view& algorithm,
+ const boost::string_view& request_date,
+ const boost::string_view& credential_scope,
+ const sha256_digest_t& canonreq_hash)
+{
+ const auto hexed_cr_hash = canonreq_hash.to_str();
+ const boost::string_view hexed_cr_hash_str(hexed_cr_hash);
+
+ const auto string_to_sign = string_join_reserve("\n",
+ algorithm,
+ request_date,
+ credential_scope,
+ hexed_cr_hash_str);
+
+ ldout(cct, 10) << "string to sign = "
+ << rgw::crypt_sanitize::log_content{string_to_sign}
+ << dendl;
+
+ return string_to_sign;
+}
+
+
+static inline std::tuple<boost::string_view, /* date */
+ boost::string_view, /* region */
+ boost::string_view> /* service */
+parse_cred_scope(boost::string_view credential_scope)
+{
+ /* date cred */
+ size_t pos = credential_scope.find("/");
+ const auto date_cs = credential_scope.substr(0, pos);
+ credential_scope = credential_scope.substr(pos + 1);
+
+ /* region cred */
+ pos = credential_scope.find("/");
+ const auto region_cs = credential_scope.substr(0, pos);
+ credential_scope = credential_scope.substr(pos + 1);
+
+ /* service cred */
+ pos = credential_scope.find("/");
+ const auto service_cs = credential_scope.substr(0, pos);
+
+ return std::make_tuple(date_cs, region_cs, service_cs);
+}
+
+static inline std::vector<unsigned char>
+transform_secret_key(const boost::string_view& secret_access_key)
+{
+ /* TODO(rzarzynski): switch to constexpr when C++14 becomes available. */
+ static const std::initializer_list<unsigned char> AWS4 { 'A', 'W', 'S', '4' };
+
+ /* boost::container::small_vector might be used here if someone wants to
+ * optimize out even more dynamic allocations. */
+ std::vector<unsigned char> secret_key_utf8;
+ secret_key_utf8.reserve(AWS4.size() + secret_access_key.size());
+ secret_key_utf8.assign(AWS4);
+
+ for (const auto c : secret_access_key) {
+ std::array<unsigned char, MAX_UTF8_SZ> buf;
+ const size_t n = encode_utf8(c, buf.data());
+ secret_key_utf8.insert(std::end(secret_key_utf8),
+ std::begin(buf), std::begin(buf) + n);
+ }
+
+ return secret_key_utf8;
+}
+
+/*
+ * calculate the SigningKey of AWS auth version 4
+ */
+static sha256_digest_t
+get_v4_signing_key(CephContext* const cct,
+ const boost::string_view& credential_scope,
+ const boost::string_view& secret_access_key)
+{
+ boost::string_view date, region, service;
+ std::tie(date, region, service) = parse_cred_scope(credential_scope);
+
+ const auto utfed_sec_key = transform_secret_key(secret_access_key);
+ const auto date_k = calc_hmac_sha256(utfed_sec_key, date);
+ const auto region_k = calc_hmac_sha256(date_k, region);
+ const auto service_k = calc_hmac_sha256(region_k, service);
+
+ /* aws4_request */
+ const auto signing_key = calc_hmac_sha256(service_k,
+ boost::string_view("aws4_request"));
+
+ ldout(cct, 10) << "date_k = " << date_k << dendl;
+ ldout(cct, 10) << "region_k = " << region_k << dendl;
+ ldout(cct, 10) << "service_k = " << service_k << dendl;
+ ldout(cct, 10) << "signing_k = " << signing_key << dendl;
+
+ return signing_key;
+}
+
+/*
+ * calculate the AWS signature version 4
+ *
+ * http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html
+ *
+ * srv_signature_t is an alias over Ceph's basic_sstring. We're using
+ * it to keep everything within the stack boundaries instead of doing
+ * dynamic allocations.
+ */
+AWSEngine::VersionAbstractor::server_signature_t
+get_v4_signature(const boost::string_view& credential_scope,
+ CephContext* const cct,
+ const boost::string_view& secret_key,
+ const AWSEngine::VersionAbstractor::string_to_sign_t& string_to_sign)
+{
+ auto signing_key = get_v4_signing_key(cct, credential_scope, secret_key);
+
+ /* The server-side generated digest for comparison. */
+ const auto digest = calc_hmac_sha256(signing_key, string_to_sign);
+
+ /* TODO(rzarzynski): I would love to see our sstring having reserve() and
+ * the non-const data() variant like C++17's std::string. */
+ using srv_signature_t = AWSEngine::VersionAbstractor::server_signature_t;
+ srv_signature_t signature(srv_signature_t::initialized_later(),
+ digest.SIZE * 2);
+ buf_to_hex(digest.v, digest.SIZE, signature.begin());
+
+ ldout(cct, 10) << "generated signature = " << signature << dendl;
+
+ return signature;
+}
+
+AWSEngine::VersionAbstractor::server_signature_t
+get_v2_signature(CephContext* const cct,
+ const std::string& secret_key,
+ const AWSEngine::VersionAbstractor::string_to_sign_t& string_to_sign)
+{
+ if (secret_key.empty()) {
+ throw -EINVAL;
+ }
+
+ const auto digest = calc_hmac_sha1(secret_key, string_to_sign);
+
+ /* 64 is really enough */;
+ char buf[64];
+ const int ret = ceph_armor(std::begin(buf),
+ std::begin(buf) + 64,
+ reinterpret_cast<const char *>(digest.v),
+ reinterpret_cast<const char *>(digest.v + digest.SIZE));
+ if (ret < 0) {
+ ldout(cct, 10) << "ceph_armor failed" << dendl;
+ throw ret;
+ } else {
+ buf[ret] = '\0';
+ using srv_signature_t = AWSEngine::VersionAbstractor::server_signature_t;
+ return srv_signature_t(buf, ret);
+ }
+}
+
+bool AWSv4ComplMulti::ChunkMeta::is_new_chunk_in_stream(size_t stream_pos) const
+{
+ return stream_pos >= (data_offset_in_stream + data_length);
+}
+
+size_t AWSv4ComplMulti::ChunkMeta::get_data_size(size_t stream_pos) const
+{
+ if (stream_pos > (data_offset_in_stream + data_length)) {
+ /* Data in parsing_buf. */
+ return data_length;
+ } else {
+ return data_offset_in_stream + data_length - stream_pos;
+ }
+}
+
+
+/* AWSv4 completers begin. */
+std::pair<AWSv4ComplMulti::ChunkMeta, size_t /* consumed */>
+AWSv4ComplMulti::ChunkMeta::create_next(CephContext* const cct,
+ ChunkMeta&& old,
+ const char* const metabuf,
+ const size_t metabuf_len)
+{
+ boost::string_ref metastr(metabuf, metabuf_len);
+
+ const size_t semicolon_pos = metastr.find(";");
+ if (semicolon_pos == boost::string_ref::npos) {
+ ldout(cct, 20) << "AWSv4ComplMulti cannot find the ';' separator"
+ << dendl;
+ throw rgw::io::Exception(EINVAL, std::system_category());
+ }
+
+ char* data_field_end;
+ /* strtoull ignores the "\r\n" sequence after each non-first chunk. */
+ const size_t data_length = std::strtoull(metabuf, &data_field_end, 16);
+ if (data_length == 0 && data_field_end == metabuf) {
+ ldout(cct, 20) << "AWSv4ComplMulti: cannot parse the data size"
+ << dendl;
+ throw rgw::io::Exception(EINVAL, std::system_category());
+ }
+
+ /* Parse the chunk_signature=... part. */
+ const auto signature_part = metastr.substr(semicolon_pos + 1);
+ const size_t eq_sign_pos = signature_part.find("=");
+ if (eq_sign_pos == boost::string_ref::npos) {
+ ldout(cct, 20) << "AWSv4ComplMulti: cannot find the '=' separator"
+ << dendl;
+ throw rgw::io::Exception(EINVAL, std::system_category());
+ }
+
+ /* OK, we have at least the beginning of a signature. */
+ const size_t data_sep_pos = signature_part.find("\r\n");
+ if (data_sep_pos == boost::string_ref::npos) {
+ ldout(cct, 20) << "AWSv4ComplMulti: no new line at signature end"
+ << dendl;
+ throw rgw::io::Exception(EINVAL, std::system_category());
+ }
+
+ const auto signature = \
+ signature_part.substr(eq_sign_pos + 1, data_sep_pos - 1 - eq_sign_pos);
+ if (signature.length() != SIG_SIZE) {
+ ldout(cct, 20) << "AWSv4ComplMulti: signature.length() != 64"
+ << dendl;
+ throw rgw::io::Exception(EINVAL, std::system_category());
+ }
+
+ const size_t data_starts_in_stream = \
+ + semicolon_pos + strlen(";") + data_sep_pos + strlen("\r\n")
+ + old.data_offset_in_stream + old.data_length;
+
+ ldout(cct, 20) << "parsed new chunk; signature=" << signature
+ << ", data_length=" << data_length
+ << ", data_starts_in_stream=" << data_starts_in_stream
+ << dendl;
+
+ return std::make_pair(ChunkMeta(data_starts_in_stream,
+ data_length,
+ signature),
+ semicolon_pos + 83);
+}
+
+std::string
+AWSv4ComplMulti::calc_chunk_signature(const std::string& payload_hash) const
+{
+ const auto string_to_sign = string_join_reserve("\n",
+ AWS4_HMAC_SHA256_PAYLOAD_STR,
+ date,
+ credential_scope,
+ prev_chunk_signature,
+ AWS4_EMPTY_PAYLOAD_HASH,
+ payload_hash);
+
+ ldout(cct, 20) << "AWSv4ComplMulti: string_to_sign=\n" << string_to_sign
+ << dendl;
+
+ /* new chunk signature */
+ const auto sig = calc_hmac_sha256(signing_key, string_to_sign);
+ /* FIXME(rzarzynski): std::string here is really unnecessary. */
+ return sig.to_str();
+}
+
+
+bool AWSv4ComplMulti::is_signature_mismatched()
+{
+ /* The validity of previous chunk can be verified only after getting meta-
+ * data of the next one. */
+ const auto payload_hash = calc_hash_sha256_restart_stream(&sha256_hash);
+ const auto calc_signature = calc_chunk_signature(payload_hash);
+
+ if (chunk_meta.get_signature() != calc_signature) {
+ ldout(cct, 20) << "AWSv4ComplMulti: ERROR: chunk signature mismatch"
+ << dendl;
+ ldout(cct, 20) << "AWSv4ComplMulti: declared signature="
+ << chunk_meta.get_signature() << dendl;
+ ldout(cct, 20) << "AWSv4ComplMulti: calculated signature="
+ << calc_signature << dendl;
+
+ return true;
+ } else {
+ prev_chunk_signature = chunk_meta.get_signature();
+ return false;
+ }
+}
+
+size_t AWSv4ComplMulti::recv_body(char* const buf, const size_t buf_max)
+{
+ /* Buffer stores only parsed stream. Raw values reflect the stream
+ * we're getting from a client. */
+ size_t buf_pos = 0;
+
+ if (chunk_meta.is_new_chunk_in_stream(stream_pos)) {
+ /* Verify signature of the previous chunk. We aren't doing that for new
+ * one as the procedure requires calculation of payload hash. This code
+ * won't be triggered for the last, zero-length chunk. Instead, is will
+ * be checked in the complete() method. */
+ if (stream_pos >= ChunkMeta::META_MAX_SIZE && is_signature_mismatched()) {
+ throw rgw::io::Exception(ERR_SIGNATURE_NO_MATCH, std::system_category());
+ }
+
+ /* We don't have metadata for this range. This means a new chunk, so we
+ * need to parse a fresh portion of the stream. Let's start. */
+ size_t to_extract = parsing_buf.capacity() - parsing_buf.size();
+ do {
+ const size_t orig_size = parsing_buf.size();
+ parsing_buf.resize(parsing_buf.size() + to_extract);
+ const size_t received = io_base_t::recv_body(parsing_buf.data() + orig_size,
+ to_extract);
+ parsing_buf.resize(parsing_buf.size() - (to_extract - received));
+ if (received == 0) {
+ break;
+ }
+
+ stream_pos += received;
+ to_extract -= received;
+ } while (to_extract > 0);
+
+ size_t consumed;
+ std::tie(chunk_meta, consumed) = \
+ ChunkMeta::create_next(cct, std::move(chunk_meta),
+ parsing_buf.data(), parsing_buf.size());
+
+ /* We can drop the bytes consumed during metadata parsing. The remainder
+ * can be chunk's data plus possibly beginning of next chunks' metadata. */
+ parsing_buf.erase(std::begin(parsing_buf),
+ std::begin(parsing_buf) + consumed);
+ }
+
+ size_t stream_pos_was = stream_pos - parsing_buf.size();
+
+ size_t to_extract = \
+ std::min(chunk_meta.get_data_size(stream_pos_was), buf_max);
+ dout(30) << "AWSv4ComplMulti: stream_pos_was=" << stream_pos_was << ", to_extract=" << to_extract << dendl;
+
+ /* It's quite probable we have a couple of real data bytes stored together
+ * with meta-data in the parsing_buf. We need to extract them and move to
+ * the final buffer. This is a trade-off between frontend's read overhead
+ * and memcpy. */
+ if (to_extract > 0 && parsing_buf.size() > 0) {
+ const auto data_len = std::min(to_extract, parsing_buf.size());
+ const auto data_end_iter = std::begin(parsing_buf) + data_len;
+ dout(30) << "AWSv4ComplMulti: to_extract=" << to_extract << ", data_len=" << data_len << dendl;
+
+ std::copy(std::begin(parsing_buf), data_end_iter, buf);
+ parsing_buf.erase(std::begin(parsing_buf), data_end_iter);
+
+ calc_hash_sha256_update_stream(sha256_hash, buf, data_len);
+
+ to_extract -= data_len;
+ buf_pos += data_len;
+ }
+
+ /* Now we can do the bulk read directly from RestfulClient without any extra
+ * buffering. */
+ while (to_extract > 0) {
+ const size_t received = io_base_t::recv_body(buf + buf_pos, to_extract);
+ dout(30) << "AWSv4ComplMulti: to_extract=" << to_extract << ", received=" << received << dendl;
+
+ if (received == 0) {
+ break;
+ }
+
+ calc_hash_sha256_update_stream(sha256_hash, buf + buf_pos, received);
+
+ buf_pos += received;
+ stream_pos += received;
+ to_extract -= received;
+ }
+
+ dout(20) << "AWSv4ComplMulti: filled=" << buf_pos << dendl;
+ return buf_pos;
+}
+
+void AWSv4ComplMulti::modify_request_state(const DoutPrefixProvider* dpp, req_state* const s_rw)
+{
+ const char* const decoded_length = \
+ s_rw->info.env->get("HTTP_X_AMZ_DECODED_CONTENT_LENGTH");
+
+ if (!decoded_length) {
+ throw -EINVAL;
+ } else {
+ s_rw->length = decoded_length;
+ s_rw->content_length = parse_content_length(decoded_length);
+
+ if (s_rw->content_length < 0) {
+ ldpp_dout(dpp, 10) << "negative AWSv4's content length, aborting" << dendl;
+ throw -EINVAL;
+ }
+ }
+
+ /* Install the filter over rgw::io::RestfulClient. */
+ AWS_AUTHv4_IO(s_rw)->add_filter(
+ std::static_pointer_cast<io_base_t>(shared_from_this()));
+}
+
+bool AWSv4ComplMulti::complete()
+{
+ /* Now it's time to verify the signature of the last, zero-length chunk. */
+ if (is_signature_mismatched()) {
+ ldout(cct, 10) << "ERROR: signature of last chunk does not match"
+ << dendl;
+ return false;
+ } else {
+ return true;
+ }
+}
+
+rgw::auth::Completer::cmplptr_t
+AWSv4ComplMulti::create(const req_state* const s,
+ boost::string_view date,
+ boost::string_view credential_scope,
+ boost::string_view seed_signature,
+ const boost::optional<std::string>& secret_key)
+{
+ if (!secret_key) {
+ /* Some external authorizers (like Keystone) aren't fully compliant with
+ * AWSv4. They do not provide the secret_key which is necessary to handle
+ * the streamed upload. */
+ throw -ERR_NOT_IMPLEMENTED;
+ }
+
+ const auto signing_key = \
+ rgw::auth::s3::get_v4_signing_key(s->cct, credential_scope, *secret_key);
+
+ return std::make_shared<AWSv4ComplMulti>(s,
+ std::move(date),
+ std::move(credential_scope),
+ std::move(seed_signature),
+ signing_key);
+}
+
+size_t AWSv4ComplSingle::recv_body(char* const buf, const size_t max)
+{
+ const auto received = io_base_t::recv_body(buf, max);
+ calc_hash_sha256_update_stream(sha256_hash, buf, received);
+
+ return received;
+}
+
+void AWSv4ComplSingle::modify_request_state(const DoutPrefixProvider* dpp, req_state* const s_rw)
+{
+ /* Install the filter over rgw::io::RestfulClient. */
+ AWS_AUTHv4_IO(s_rw)->add_filter(
+ std::static_pointer_cast<io_base_t>(shared_from_this()));
+}
+
+bool AWSv4ComplSingle::complete()
+{
+ /* The completer is only for the cases where signed payload has been
+ * requested. It won't be used, for instance, during the query string-based
+ * authentication. */
+ const auto payload_hash = calc_hash_sha256_close_stream(&sha256_hash);
+
+ /* Validate x-amz-sha256 */
+ if (payload_hash.compare(expected_request_payload_hash) == 0) {
+ return true;
+ } else {
+ ldout(cct, 10) << "ERROR: x-amz-content-sha256 does not match"
+ << dendl;
+ ldout(cct, 10) << "ERROR: grab_aws4_sha256_hash()="
+ << payload_hash << dendl;
+ ldout(cct, 10) << "ERROR: expected_request_payload_hash="
+ << expected_request_payload_hash << dendl;
+ return false;
+ }
+}
+
+AWSv4ComplSingle::AWSv4ComplSingle(const req_state* const s)
+ : io_base_t(nullptr),
+ cct(s->cct),
+ expected_request_payload_hash(get_v4_exp_payload_hash(s->info)),
+ sha256_hash(calc_hash_sha256_open_stream()) {
+}
+
+rgw::auth::Completer::cmplptr_t
+AWSv4ComplSingle::create(const req_state* const s,
+ const boost::optional<std::string>&)
+{
+ return std::make_shared<AWSv4ComplSingle>(s);
+}
+
+} /* namespace s3 */
+} /* namespace auth */
+} /* namespace rgw */
diff --git a/src/rgw/rgw_auth_s3.h b/src/rgw/rgw_auth_s3.h
new file mode 100644
index 00000000..519f8395
--- /dev/null
+++ b/src/rgw/rgw_auth_s3.h
@@ -0,0 +1,615 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_AUTH_S3_H
+#define CEPH_RGW_AUTH_S3_H
+
+#include <array>
+#include <memory>
+#include <string>
+#include <tuple>
+
+#include <boost/algorithm/string.hpp>
+#include <boost/container/static_vector.hpp>
+#include <boost/utility/string_ref.hpp>
+#include <boost/utility/string_view.hpp>
+
+#include "common/sstring.hh"
+#include "rgw_common.h"
+#include "rgw_rest_s3.h"
+#include "rgw_auth.h"
+#include "rgw_auth_filters.h"
+#include "rgw_auth_keystone.h"
+
+
+namespace rgw {
+namespace auth {
+namespace s3 {
+
+static constexpr auto RGW_AUTH_GRACE = std::chrono::minutes{15};
+
+// returns true if the request time is within RGW_AUTH_GRACE of the current time
+bool is_time_skew_ok(time_t t);
+
+class STSAuthStrategy : public rgw::auth::Strategy,
+ public rgw::auth::RemoteApplier::Factory,
+ public rgw::auth::LocalApplier::Factory,
+ public rgw::auth::RoleApplier::Factory {
+ typedef rgw::auth::IdentityApplier::aplptr_t aplptr_t;
+ RGWRados* const store;
+ rgw::auth::ImplicitTenants& implicit_tenant_context;
+
+ STSEngine sts_engine;
+
+ aplptr_t create_apl_remote(CephContext* const cct,
+ const req_state* const s,
+ rgw::auth::RemoteApplier::acl_strategy_t&& acl_alg,
+ const rgw::auth::RemoteApplier::AuthInfo &info
+ ) const override {
+ auto apl = rgw::auth::add_sysreq(cct, store, s,
+ rgw::auth::RemoteApplier(cct, store, std::move(acl_alg), info,
+ implicit_tenant_context,
+ rgw::auth::ImplicitTenants::IMPLICIT_TENANTS_S3));
+ return aplptr_t(new decltype(apl)(std::move(apl)));
+ }
+
+ aplptr_t create_apl_local(CephContext* const cct,
+ const req_state* const s,
+ const RGWUserInfo& user_info,
+ const std::string& subuser,
+ const boost::optional<uint32_t>& perm_mask) const override {
+ auto apl = rgw::auth::add_sysreq(cct, store, s,
+ rgw::auth::LocalApplier(cct, user_info, subuser, perm_mask));
+ return aplptr_t(new decltype(apl)(std::move(apl)));
+ }
+
+ aplptr_t create_apl_role(CephContext* const cct,
+ const req_state* const s,
+ const string& role_name,
+ const rgw_user& user_id,
+ const vector<std::string>& role_policies) const override {
+ auto apl = rgw::auth::add_sysreq(cct, store, s,
+ rgw::auth::RoleApplier(cct, role_name, user_id, role_policies));
+ return aplptr_t(new decltype(apl)(std::move(apl)));
+ }
+
+public:
+ STSAuthStrategy(CephContext* const cct,
+ RGWRados* const store,
+ rgw::auth::ImplicitTenants& implicit_tenant_context,
+ AWSEngine::VersionAbstractor* const ver_abstractor)
+ : store(store),
+ implicit_tenant_context(implicit_tenant_context),
+ sts_engine(cct, store, *ver_abstractor,
+ static_cast<rgw::auth::LocalApplier::Factory*>(this),
+ static_cast<rgw::auth::RemoteApplier::Factory*>(this),
+ static_cast<rgw::auth::RoleApplier::Factory*>(this)) {
+ if (cct->_conf->rgw_s3_auth_use_sts) {
+ add_engine(Control::SUFFICIENT, sts_engine);
+ }
+ }
+
+ const char* get_name() const noexcept override {
+ return "rgw::auth::s3::STSAuthStrategy";
+ }
+};
+
+class ExternalAuthStrategy : public rgw::auth::Strategy,
+ public rgw::auth::RemoteApplier::Factory {
+ typedef rgw::auth::IdentityApplier::aplptr_t aplptr_t;
+ RGWRados* const store;
+ rgw::auth::ImplicitTenants& implicit_tenant_context;
+
+ using keystone_config_t = rgw::keystone::CephCtxConfig;
+ using keystone_cache_t = rgw::keystone::TokenCache;
+ using EC2Engine = rgw::auth::keystone::EC2Engine;
+
+ boost::optional <EC2Engine> keystone_engine;
+ LDAPEngine ldap_engine;
+
+ aplptr_t create_apl_remote(CephContext* const cct,
+ const req_state* const s,
+ rgw::auth::RemoteApplier::acl_strategy_t&& acl_alg,
+ const rgw::auth::RemoteApplier::AuthInfo &info
+ ) const override {
+ auto apl = rgw::auth::add_sysreq(cct, store, s,
+ rgw::auth::RemoteApplier(cct, store, std::move(acl_alg), info,
+ implicit_tenant_context,
+ rgw::auth::ImplicitTenants::IMPLICIT_TENANTS_S3));
+ /* TODO(rzarzynski): replace with static_ptr. */
+ return aplptr_t(new decltype(apl)(std::move(apl)));
+ }
+
+public:
+ ExternalAuthStrategy(CephContext* const cct,
+ RGWRados* const store,
+ rgw::auth::ImplicitTenants& implicit_tenant_context,
+ AWSEngine::VersionAbstractor* const ver_abstractor)
+ : store(store),
+ implicit_tenant_context(implicit_tenant_context),
+ ldap_engine(cct, store, *ver_abstractor,
+ static_cast<rgw::auth::RemoteApplier::Factory*>(this)) {
+
+ if (cct->_conf->rgw_s3_auth_use_keystone &&
+ ! cct->_conf->rgw_keystone_url.empty()) {
+
+ keystone_engine.emplace(cct, ver_abstractor,
+ static_cast<rgw::auth::RemoteApplier::Factory*>(this),
+ keystone_config_t::get_instance(),
+ keystone_cache_t::get_instance<keystone_config_t>());
+ add_engine(Control::SUFFICIENT, *keystone_engine);
+
+ }
+
+ if (ldap_engine.valid()) {
+ add_engine(Control::SUFFICIENT, ldap_engine);
+ }
+ }
+
+ const char* get_name() const noexcept override {
+ return "rgw::auth::s3::AWSv2ExternalAuthStrategy";
+ }
+};
+
+
+template <class AbstractorT,
+ bool AllowAnonAccessT = false>
+class AWSAuthStrategy : public rgw::auth::Strategy,
+ public rgw::auth::LocalApplier::Factory {
+ typedef rgw::auth::IdentityApplier::aplptr_t aplptr_t;
+
+ static_assert(std::is_base_of<rgw::auth::s3::AWSEngine::VersionAbstractor,
+ AbstractorT>::value,
+ "AbstractorT must be a subclass of rgw::auth::s3::VersionAbstractor");
+
+ RGWRados* const store;
+ AbstractorT ver_abstractor;
+
+ S3AnonymousEngine anonymous_engine;
+ ExternalAuthStrategy external_engines;
+ STSAuthStrategy sts_engine;
+ LocalEngine local_engine;
+
+ aplptr_t create_apl_local(CephContext* const cct,
+ const req_state* const s,
+ const RGWUserInfo& user_info,
+ const std::string& subuser,
+ const boost::optional<uint32_t>& perm_mask) const override {
+ auto apl = rgw::auth::add_sysreq(cct, store, s,
+ rgw::auth::LocalApplier(cct, user_info, subuser, perm_mask));
+ /* TODO(rzarzynski): replace with static_ptr. */
+ return aplptr_t(new decltype(apl)(std::move(apl)));
+ }
+
+public:
+ using engine_map_t = std::map <std::string, std::reference_wrapper<const Engine>>;
+ void add_engines(const std::vector <std::string>& auth_order,
+ engine_map_t eng_map)
+ {
+ auto ctrl_flag = Control::SUFFICIENT;
+ for (const auto &eng : auth_order) {
+ // fallback to the last engine, in case of multiple engines, since ctrl
+ // flag is sufficient for others, error from earlier engine is returned
+ if (&eng == &auth_order.back() && eng_map.size() > 1) {
+ ctrl_flag = Control::FALLBACK;
+ }
+ if (const auto kv = eng_map.find(eng);
+ kv != eng_map.end()) {
+ add_engine(ctrl_flag, kv->second);
+ }
+ }
+ }
+
+ auto parse_auth_order(CephContext* const cct)
+ {
+ std::vector <std::string> result;
+
+ const std::set <std::string_view> allowed_auth = { "sts", "external", "local" };
+ std::vector <std::string> default_order = { "sts", "external", "local" };
+ // supplied strings may contain a space, so let's bypass that
+ boost::split(result, cct->_conf->rgw_s3_auth_order,
+ boost::is_any_of(", "), boost::token_compress_on);
+
+ if (std::any_of(result.begin(), result.end(),
+ [allowed_auth](std::string_view s)
+ { return allowed_auth.find(s) == allowed_auth.end();})){
+ return default_order;
+ }
+ return result;
+ }
+
+ AWSAuthStrategy(CephContext* const cct,
+ rgw::auth::ImplicitTenants& implicit_tenant_context,
+ RGWRados* const store)
+ : store(store),
+ ver_abstractor(cct),
+ anonymous_engine(cct,
+ static_cast<rgw::auth::LocalApplier::Factory*>(this)),
+ external_engines(cct, store, implicit_tenant_context, &ver_abstractor),
+ sts_engine(cct, store, implicit_tenant_context, &ver_abstractor),
+ local_engine(cct, store, ver_abstractor,
+ static_cast<rgw::auth::LocalApplier::Factory*>(this)) {
+ /* The anonymous auth. */
+ if (AllowAnonAccessT) {
+ add_engine(Control::SUFFICIENT, anonymous_engine);
+ }
+
+ auto auth_order = parse_auth_order(cct);
+ engine_map_t engine_map;
+
+ /* STS Auth*/
+ if (! sts_engine.is_empty()) {
+ engine_map.insert(std::make_pair("sts", std::cref(sts_engine)));
+ }
+
+ /* The external auth. */
+ if (! external_engines.is_empty()) {
+ engine_map.insert(std::make_pair("external", std::cref(external_engines)));
+ }
+ /* The local auth. */
+ if (cct->_conf->rgw_s3_auth_use_rados) {
+ engine_map.insert(std::make_pair("local", std::cref(local_engine)));
+ }
+
+ add_engines(auth_order, engine_map);
+ }
+
+ const char* get_name() const noexcept override {
+ return "rgw::auth::s3::AWSAuthStrategy";
+ }
+};
+
+
+class AWSv4ComplMulti : public rgw::auth::Completer,
+ public rgw::io::DecoratedRestfulClient<rgw::io::RestfulClient*>,
+ public std::enable_shared_from_this<AWSv4ComplMulti> {
+ using io_base_t = rgw::io::DecoratedRestfulClient<rgw::io::RestfulClient*>;
+ using signing_key_t = sha256_digest_t;
+
+ CephContext* const cct;
+
+ const boost::string_view date;
+ const boost::string_view credential_scope;
+ const signing_key_t signing_key;
+
+ class ChunkMeta {
+ size_t data_offset_in_stream = 0;
+ size_t data_length = 0;
+ std::string signature;
+
+ ChunkMeta(const size_t data_starts_in_stream,
+ const size_t data_length,
+ const boost::string_ref signature)
+ : data_offset_in_stream(data_starts_in_stream),
+ data_length(data_length),
+ signature(signature.to_string()) {
+ }
+
+ explicit ChunkMeta(const boost::string_view& signature)
+ : signature(signature.to_string()) {
+ }
+
+ public:
+ static constexpr size_t SIG_SIZE = 64;
+
+ /* Let's suppose the data length fields can't exceed uint64_t. */
+ static constexpr size_t META_MAX_SIZE = \
+ sarrlen("\r\nffffffffffffffff;chunk-signature=") + SIG_SIZE + sarrlen("\r\n");
+
+ /* The metadata size of for the last, empty chunk. */
+ static constexpr size_t META_MIN_SIZE = \
+ sarrlen("0;chunk-signature=") + SIG_SIZE + sarrlen("\r\n");
+
+ /* Detect whether a given stream_pos fits in boundaries of a chunk. */
+ bool is_new_chunk_in_stream(size_t stream_pos) const;
+
+ /* Get the remaining data size. */
+ size_t get_data_size(size_t stream_pos) const;
+
+ const std::string& get_signature() const {
+ return signature;
+ }
+
+ /* Factory: create an object representing metadata of first, initial chunk
+ * in a stream. */
+ static ChunkMeta create_first(const boost::string_view& seed_signature) {
+ return ChunkMeta(seed_signature);
+ }
+
+ /* Factory: parse a block of META_MAX_SIZE bytes and creates an object
+ * representing non-first chunk in a stream. As the process is sequential
+ * and depends on the previous chunk, caller must pass it. */
+ static std::pair<ChunkMeta, size_t> create_next(CephContext* cct,
+ ChunkMeta&& prev,
+ const char* metabuf,
+ size_t metabuf_len);
+ } chunk_meta;
+
+ size_t stream_pos;
+ boost::container::static_vector<char, ChunkMeta::META_MAX_SIZE> parsing_buf;
+ ceph::crypto::SHA256* sha256_hash;
+ std::string prev_chunk_signature;
+
+ bool is_signature_mismatched();
+ std::string calc_chunk_signature(const std::string& payload_hash) const;
+
+public:
+ /* We need the constructor to be public because of the std::make_shared that
+ * is employed by the create() method. */
+ AWSv4ComplMulti(const req_state* const s,
+ boost::string_view date,
+ boost::string_view credential_scope,
+ boost::string_view seed_signature,
+ const signing_key_t& signing_key)
+ : io_base_t(nullptr),
+ cct(s->cct),
+ date(std::move(date)),
+ credential_scope(std::move(credential_scope)),
+ signing_key(signing_key),
+
+ /* The evolving state. */
+ chunk_meta(ChunkMeta::create_first(seed_signature)),
+ stream_pos(0),
+ sha256_hash(calc_hash_sha256_open_stream()),
+ prev_chunk_signature(std::move(seed_signature)) {
+ }
+
+ ~AWSv4ComplMulti() {
+ if (sha256_hash) {
+ calc_hash_sha256_close_stream(&sha256_hash);
+ }
+ }
+
+ /* rgw::io::DecoratedRestfulClient. */
+ size_t recv_body(char* buf, size_t max) override;
+
+ /* rgw::auth::Completer. */
+ void modify_request_state(const DoutPrefixProvider* dpp, req_state* s_rw) override;
+ bool complete() override;
+
+ /* Factories. */
+ static cmplptr_t create(const req_state* s,
+ boost::string_view date,
+ boost::string_view credential_scope,
+ boost::string_view seed_signature,
+ const boost::optional<std::string>& secret_key);
+
+};
+
+class AWSv4ComplSingle : public rgw::auth::Completer,
+ public rgw::io::DecoratedRestfulClient<rgw::io::RestfulClient*>,
+ public std::enable_shared_from_this<AWSv4ComplSingle> {
+ using io_base_t = rgw::io::DecoratedRestfulClient<rgw::io::RestfulClient*>;
+
+ CephContext* const cct;
+ const char* const expected_request_payload_hash;
+ ceph::crypto::SHA256* sha256_hash = nullptr;
+
+public:
+ /* Defined in rgw_auth_s3.cc because of get_v4_exp_payload_hash(). We need
+ * the constructor to be public because of the std::make_shared employed by
+ * the create() method. */
+ explicit AWSv4ComplSingle(const req_state* const s);
+
+ ~AWSv4ComplSingle() {
+ if (sha256_hash) {
+ calc_hash_sha256_close_stream(&sha256_hash);
+ }
+ }
+
+ /* rgw::io::DecoratedRestfulClient. */
+ size_t recv_body(char* buf, size_t max) override;
+
+ /* rgw::auth::Completer. */
+ void modify_request_state(const DoutPrefixProvider* dpp, req_state* s_rw) override;
+ bool complete() override;
+
+ /* Factories. */
+ static cmplptr_t create(const req_state* s,
+ const boost::optional<std::string>&);
+
+};
+
+} /* namespace s3 */
+} /* namespace auth */
+} /* namespace rgw */
+
+void rgw_create_s3_canonical_header(
+ const char *method,
+ const char *content_md5,
+ const char *content_type,
+ const char *date,
+ const meta_map_t& meta_map,
+ const meta_map_t& qs_map,
+ const char *request_uri,
+ const std::map<std::string, std::string>& sub_resources,
+ std::string& dest_str);
+bool rgw_create_s3_canonical_header(const req_info& info,
+ utime_t *header_time, /* out */
+ std::string& dest, /* out */
+ bool qsr);
+static inline std::tuple<bool, std::string, utime_t>
+rgw_create_s3_canonical_header(const req_info& info, const bool qsr) {
+ std::string dest;
+ utime_t header_time;
+
+ const bool ok = rgw_create_s3_canonical_header(info, &header_time, dest, qsr);
+ return std::make_tuple(ok, dest, header_time);
+}
+
+namespace rgw {
+namespace auth {
+namespace s3 {
+
+static constexpr char AWS4_HMAC_SHA256_STR[] = "AWS4-HMAC-SHA256";
+static constexpr char AWS4_HMAC_SHA256_PAYLOAD_STR[] = "AWS4-HMAC-SHA256-PAYLOAD";
+
+static constexpr char AWS4_EMPTY_PAYLOAD_HASH[] = \
+ "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855";
+
+static constexpr char AWS4_UNSIGNED_PAYLOAD_HASH[] = "UNSIGNED-PAYLOAD";
+
+static constexpr char AWS4_STREAMING_PAYLOAD_HASH[] = \
+ "STREAMING-AWS4-HMAC-SHA256-PAYLOAD";
+
+int parse_v4_credentials(const req_info& info, /* in */
+ boost::string_view& access_key_id, /* out */
+ boost::string_view& credential_scope, /* out */
+ boost::string_view& signedheaders, /* out */
+ boost::string_view& signature, /* out */
+ boost::string_view& date, /* out */
+ boost::string_view& session_token, /* out */
+ const bool using_qs); /* in */
+
+static inline bool char_needs_aws4_escaping(const char c, bool encode_slash)
+{
+ if ((c >= 'a' && c <= 'z') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c >= '0' && c <= '9')) {
+ return false;
+ }
+
+ switch (c) {
+ case '-':
+ case '_':
+ case '.':
+ case '~':
+ return false;
+ }
+
+ if (c == '/' && !encode_slash)
+ return false;
+
+ return true;
+}
+
+static inline std::string aws4_uri_encode(const std::string& src, bool encode_slash)
+{
+ std::string result;
+
+ for (const std::string::value_type c : src) {
+ if (char_needs_aws4_escaping(c, encode_slash)) {
+ rgw_uri_escape_char(c, result);
+ } else {
+ result.push_back(c);
+ }
+ }
+
+ return result;
+}
+
+static inline std::string aws4_uri_recode(const boost::string_view& src, bool encode_slash)
+{
+ std::string decoded = url_decode(src);
+ return aws4_uri_encode(decoded, encode_slash);
+}
+
+static inline std::string get_v4_canonical_uri(const req_info& info) {
+ /* The code should normalize according to RFC 3986 but S3 does NOT do path
+ * normalization that SigV4 typically does. This code follows the same
+ * approach that boto library. See auth.py:canonical_uri(...). */
+
+ std::string canonical_uri = aws4_uri_recode(info.request_uri_aws4, false);
+
+ if (canonical_uri.empty()) {
+ canonical_uri = "/";
+ } else {
+ boost::replace_all(canonical_uri, "+", "%20");
+ }
+
+ return canonical_uri;
+}
+
+static inline const string calc_v4_payload_hash(const string& payload)
+{
+ ceph::crypto::SHA256* sha256_hash = calc_hash_sha256_open_stream();
+ calc_hash_sha256_update_stream(sha256_hash, payload.c_str(), payload.length());
+ const auto payload_hash = calc_hash_sha256_close_stream(&sha256_hash);
+ return payload_hash;
+}
+
+static inline const char* get_v4_exp_payload_hash(const req_info& info)
+{
+ /* In AWSv4 the hash of real, transferred payload IS NOT necessary to form
+ * a Canonical Request, and thus verify a Signature. x-amz-content-sha256
+ * header lets get the information very early -- before seeing first byte
+ * of HTTP body. As a consequence, we can decouple Signature verification
+ * from payload's fingerprint check. */
+ const char *expected_request_payload_hash = \
+ info.env->get("HTTP_X_AMZ_CONTENT_SHA256");
+
+ if (!expected_request_payload_hash) {
+ /* An HTTP client MUST send x-amz-content-sha256. The single exception
+ * is the case of using the Query Parameters where "UNSIGNED-PAYLOAD"
+ * literals are used for crafting Canonical Request:
+ *
+ * You don't include a payload hash in the Canonical Request, because
+ * when you create a presigned URL, you don't know the payload content
+ * because the URL is used to upload an arbitrary payload. Instead, you
+ * use a constant string UNSIGNED-PAYLOAD. */
+ expected_request_payload_hash = AWS4_UNSIGNED_PAYLOAD_HASH;
+ }
+
+ return expected_request_payload_hash;
+}
+
+static inline bool is_v4_payload_unsigned(const char* const exp_payload_hash)
+{
+ return boost::equals(exp_payload_hash, AWS4_UNSIGNED_PAYLOAD_HASH);
+}
+
+static inline bool is_v4_payload_empty(const req_state* const s)
+{
+ /* from rfc2616 - 4.3 Message Body
+ *
+ * "The presence of a message-body in a request is signaled by the inclusion
+ * of a Content-Length or Transfer-Encoding header field in the request's
+ * message-headers." */
+ return s->content_length == 0 &&
+ s->info.env->get("HTTP_TRANSFER_ENCODING") == nullptr;
+}
+
+static inline bool is_v4_payload_streamed(const char* const exp_payload_hash)
+{
+ return boost::equals(exp_payload_hash, AWS4_STREAMING_PAYLOAD_HASH);
+}
+
+std::string get_v4_canonical_qs(const req_info& info, bool using_qs);
+
+boost::optional<std::string>
+get_v4_canonical_headers(const req_info& info,
+ const boost::string_view& signedheaders,
+ bool using_qs,
+ bool force_boto2_compat);
+
+extern sha256_digest_t
+get_v4_canon_req_hash(CephContext* cct,
+ const boost::string_view& http_verb,
+ const std::string& canonical_uri,
+ const std::string& canonical_qs,
+ const std::string& canonical_hdrs,
+ const boost::string_view& signed_hdrs,
+ const boost::string_view& request_payload_hash);
+
+AWSEngine::VersionAbstractor::string_to_sign_t
+get_v4_string_to_sign(CephContext* cct,
+ const boost::string_view& algorithm,
+ const boost::string_view& request_date,
+ const boost::string_view& credential_scope,
+ const sha256_digest_t& canonreq_hash);
+
+extern AWSEngine::VersionAbstractor::server_signature_t
+get_v4_signature(const boost::string_view& credential_scope,
+ CephContext* const cct,
+ const boost::string_view& secret_key,
+ const AWSEngine::VersionAbstractor::string_to_sign_t& string_to_sign);
+
+extern AWSEngine::VersionAbstractor::server_signature_t
+get_v2_signature(CephContext*,
+ const std::string& secret_key,
+ const AWSEngine::VersionAbstractor::string_to_sign_t& string_to_sign);
+} /* namespace s3 */
+} /* namespace auth */
+} /* namespace rgw */
+
+#endif
diff --git a/src/rgw/rgw_b64.h b/src/rgw/rgw_b64.h
new file mode 100644
index 00000000..c4ad9880
--- /dev/null
+++ b/src/rgw/rgw_b64.h
@@ -0,0 +1,87 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RGW_B64_H
+#define RGW_B64_H
+
+#include <boost/utility/string_ref.hpp>
+#include <boost/utility/string_view.hpp>
+#include <boost/archive/iterators/base64_from_binary.hpp>
+#include <boost/archive/iterators/binary_from_base64.hpp>
+#include <boost/archive/iterators/insert_linebreaks.hpp>
+#include <boost/archive/iterators/transform_width.hpp>
+#include <boost/archive/iterators/remove_whitespace.hpp>
+#include <limits>
+
+namespace rgw {
+
+ /*
+ * A header-only Base64 encoder built on boost::archive. The
+ * formula is based on a class poposed for inclusion in boost in
+ * 2011 by Denis Shevchenko (abandoned), updated slightly
+ * (e.g., uses boost::string_view).
+ *
+ * Also, wrap_width added as template argument, based on
+ * feedback from Marcus.
+ */
+
+ template<int wrap_width = std::numeric_limits<int>::max()>
+ inline std::string to_base64(boost::string_view sview)
+ {
+ using namespace boost::archive::iterators;
+
+ // output must be =padded modulo 3
+ auto psize = sview.size();
+ while ((psize % 3) != 0) {
+ ++psize;
+ }
+
+ /* RFC 2045 requires linebreaks to be present in the output
+ * sequence every at-most 76 characters (MIME-compliance),
+ * but we could likely omit it. */
+ typedef
+ insert_linebreaks<
+ base64_from_binary<
+ transform_width<
+ boost::string_view::const_iterator
+ ,6,8>
+ >
+ ,wrap_width
+ > b64_iter;
+
+ std::string outstr(b64_iter(sview.data()),
+ b64_iter(sview.data() + sview.size()));
+
+ // pad outstr with '=' to a length that is a multiple of 3
+ for (size_t ix = 0; ix < (psize-sview.size()); ++ix)
+ outstr.push_back('=');
+
+ return outstr;
+ }
+
+ inline std::string from_base64(boost::string_view sview)
+ {
+ using namespace boost::archive::iterators;
+ if (sview.empty())
+ return std::string();
+ /* MIME-compliant input will have line-breaks, so we have to
+ * filter WS */
+ typedef
+ transform_width<
+ binary_from_base64<
+ remove_whitespace<
+ boost::string_view::const_iterator>>
+ ,8,6
+ > b64_iter;
+
+ while (sview.back() == '=')
+ sview.remove_suffix(1);
+
+ std::string outstr(b64_iter(sview.data()),
+ b64_iter(sview.data() + sview.size()));
+
+ return outstr;
+ }
+} /* namespace */
+
+#endif /* RGW_B64_H */
diff --git a/src/rgw/rgw_basic_types.cc b/src/rgw/rgw_basic_types.cc
new file mode 100644
index 00000000..b1db690b
--- /dev/null
+++ b/src/rgw/rgw_basic_types.cc
@@ -0,0 +1,44 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <iostream>
+#include <sstream>
+#include <string>
+
+#include "rgw_basic_types.h"
+#include "rgw_xml.h"
+#include "common/ceph_json.h"
+
+using std::string;
+using std::stringstream;
+
+void decode_json_obj(rgw_user& val, JSONObj *obj)
+{
+ val.from_str(obj->get_data());
+}
+
+void encode_json(const char *name, const rgw_user& val, Formatter *f)
+{
+ f->dump_string(name, val.to_str());
+}
+
+void encode_xml(const char *name, const rgw_user& val, Formatter *f)
+{
+ encode_xml(name, val.to_str(), f);
+}
+
+namespace rgw {
+namespace auth {
+ostream& operator <<(ostream& m, const Principal& p) {
+ if (p.is_wildcard()) {
+ return m << "*";
+ }
+
+ m << "arn:aws:iam:" << p.get_tenant() << ":";
+ if (p.is_tenant()) {
+ return m << "root";
+ }
+ return m << (p.is_user() ? "user/" : "role/") << p.get_id();
+}
+}
+}
diff --git a/src/rgw/rgw_basic_types.h b/src/rgw/rgw_basic_types.h
new file mode 100644
index 00000000..c8d3abb7
--- /dev/null
+++ b/src/rgw/rgw_basic_types.h
@@ -0,0 +1,213 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_BASIC_TYPES_H
+#define CEPH_RGW_BASIC_TYPES_H
+
+#include <string>
+
+#include "include/types.h"
+
+struct rgw_user {
+ std::string tenant;
+ std::string id;
+
+ rgw_user() {}
+ // cppcheck-suppress noExplicitConstructor
+ rgw_user(const std::string& s) {
+ from_str(s);
+ }
+ rgw_user(const std::string& tenant, const std::string& id)
+ : tenant(tenant),
+ id(id) {
+ }
+ rgw_user(std::string&& tenant, std::string&& id)
+ : tenant(std::move(tenant)),
+ id(std::move(id)) {
+ }
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(tenant, bl);
+ encode(id, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(tenant, bl);
+ decode(id, bl);
+ DECODE_FINISH(bl);
+ }
+
+ void to_str(std::string& str) const {
+ if (!tenant.empty()) {
+ str = tenant + '$' + id;
+ } else {
+ str = id;
+ }
+ }
+
+ void clear() {
+ tenant.clear();
+ id.clear();
+ }
+
+ bool empty() const {
+ return id.empty();
+ }
+
+ string to_str() const {
+ string s;
+ to_str(s);
+ return s;
+ }
+
+ void from_str(const std::string& str) {
+ size_t pos = str.find('$');
+ if (pos != std::string::npos) {
+ tenant = str.substr(0, pos);
+ id = str.substr(pos + 1);
+ } else {
+ tenant.clear();
+ id = str;
+ }
+ }
+
+ rgw_user& operator=(const string& str) {
+ from_str(str);
+ return *this;
+ }
+
+ int compare(const rgw_user& u) const {
+ int r = tenant.compare(u.tenant);
+ if (r != 0)
+ return r;
+
+ return id.compare(u.id);
+ }
+ int compare(const string& str) const {
+ rgw_user u(str);
+ return compare(u);
+ }
+
+ bool operator!=(const rgw_user& rhs) const {
+ return (compare(rhs) != 0);
+ }
+ bool operator==(const rgw_user& rhs) const {
+ return (compare(rhs) == 0);
+ }
+ bool operator<(const rgw_user& rhs) const {
+ if (tenant < rhs.tenant) {
+ return true;
+ } else if (tenant > rhs.tenant) {
+ return false;
+ }
+ return (id < rhs.id);
+ }
+};
+WRITE_CLASS_ENCODER(rgw_user)
+
+// Represents an identity. This is more wide-ranging than a
+// 'User'. Its purposes is to be matched against by an
+// IdentityApplier. The internal representation will doubtless change as
+// more types are added. We may want to expose the type enum and make
+// the member public so people can switch/case on it.
+
+namespace rgw {
+namespace auth {
+class Principal {
+ enum types { User, Role, Tenant, Wildcard, OidcProvider };
+ types t;
+ rgw_user u;
+ string idp_url;
+
+ explicit Principal(types t)
+ : t(t) {}
+
+ Principal(types t, std::string&& n, std::string i)
+ : t(t), u(std::move(n), std::move(i)) {}
+
+ Principal(string&& idp_url)
+ : t(OidcProvider), idp_url(std::move(idp_url)) {}
+
+public:
+
+ static Principal wildcard() {
+ return Principal(Wildcard);
+ }
+
+ static Principal user(std::string&& t, std::string&& u) {
+ return Principal(User, std::move(t), std::move(u));
+ }
+
+ static Principal role(std::string&& t, std::string&& u) {
+ return Principal(Role, std::move(t), std::move(u));
+ }
+
+ static Principal tenant(std::string&& t) {
+ return Principal(Tenant, std::move(t), {});
+ }
+
+ static Principal oidc_provider(string&& idp_url) {
+ return Principal(std::move(idp_url));
+ }
+
+ bool is_wildcard() const {
+ return t == Wildcard;
+ }
+
+ bool is_user() const {
+ return t == User;
+ }
+
+ bool is_role() const {
+ return t == Role;
+ }
+
+ bool is_tenant() const {
+ return t == Tenant;
+ }
+
+ bool is_oidc_provider() const {
+ return t == OidcProvider;
+ }
+
+ const std::string& get_tenant() const {
+ return u.tenant;
+ }
+
+ const std::string& get_id() const {
+ return u.id;
+ }
+
+ const string& get_idp_url() const {
+ return idp_url;
+ }
+
+ bool operator ==(const Principal& o) const {
+ return (t == o.t) && (u == o.u);
+ }
+
+ bool operator <(const Principal& o) const {
+ return (t < o.t) || ((t == o.t) && (u < o.u));
+ }
+};
+
+std::ostream& operator <<(std::ostream& m, const Principal& p);
+}
+}
+
+class JSONObj;
+
+void decode_json_obj(rgw_user& val, JSONObj *obj);
+void encode_json(const char *name, const rgw_user& val, Formatter *f);
+void encode_xml(const char *name, const rgw_user& val, Formatter *f);
+
+inline ostream& operator<<(ostream& out, const rgw_user &u) {
+ string s;
+ u.to_str(s);
+ return out << s;
+}
+
+
+#endif
diff --git a/src/rgw/rgw_bucket.cc b/src/rgw/rgw_bucket.cc
new file mode 100644
index 00000000..f022222f
--- /dev/null
+++ b/src/rgw/rgw_bucket.cc
@@ -0,0 +1,3178 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <errno.h>
+
+#include <string>
+#include <map>
+#include <sstream>
+
+#include <boost/utility/string_ref.hpp>
+#include <boost/format.hpp>
+
+#include "common/errno.h"
+#include "common/ceph_json.h"
+#include "include/scope_guard.h"
+#include "rgw_rados.h"
+#include "rgw_zone.h"
+#include "rgw_acl.h"
+#include "rgw_acl_s3.h"
+
+#include "include/types.h"
+#include "rgw_bucket.h"
+#include "rgw_user.h"
+#include "rgw_string.h"
+#include "rgw_multi.h"
+
+#include "services/svc_zone.h"
+#include "services/svc_sys_obj.h"
+
+#include "include/rados/librados.hpp"
+// until everything is moved from rgw_common
+#include "rgw_common.h"
+#include "rgw_reshard.h"
+#include "rgw_lc.h"
+#include "cls/user/cls_user_types.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+#define BUCKET_TAG_TIMEOUT 30
+
+// default number of entries to list with each bucket listing call
+// (use marker to bridge between calls)
+static constexpr size_t listing_max_entries = 1000;
+
+
+static RGWMetadataHandler *bucket_meta_handler = NULL;
+static RGWMetadataHandler *bucket_instance_meta_handler = NULL;
+
+// define as static when RGWBucket implementation completes
+void rgw_get_buckets_obj(const rgw_user& user_id, string& buckets_obj_id)
+{
+ buckets_obj_id = user_id.to_str();
+ buckets_obj_id += RGW_BUCKETS_OBJ_SUFFIX;
+}
+
+/*
+ * Note that this is not a reversal of parse_bucket(). That one deals
+ * with the syntax we need in metadata and such. This one deals with
+ * the representation in RADOS pools. We chose '/' because it's not
+ * acceptable in bucket names and thus qualified buckets cannot conflict
+ * with the legacy or S3 buckets.
+ */
+std::string rgw_make_bucket_entry_name(const std::string& tenant_name,
+ const std::string& bucket_name) {
+ std::string bucket_entry;
+
+ if (bucket_name.empty()) {
+ bucket_entry.clear();
+ } else if (tenant_name.empty()) {
+ bucket_entry = bucket_name;
+ } else {
+ bucket_entry = tenant_name + "/" + bucket_name;
+ }
+
+ return bucket_entry;
+}
+
+/*
+ * Tenants are separated from buckets in URLs by a colon in S3.
+ * This function is not to be used on Swift URLs, not even for COPY arguments.
+ */
+void rgw_parse_url_bucket(const string &bucket, const string& auth_tenant,
+ string &tenant_name, string &bucket_name) {
+
+ int pos = bucket.find(':');
+ if (pos >= 0) {
+ /*
+ * N.B.: We allow ":bucket" syntax with explicit empty tenant in order
+ * to refer to the legacy tenant, in case users in new named tenants
+ * want to access old global buckets.
+ */
+ tenant_name = bucket.substr(0, pos);
+ bucket_name = bucket.substr(pos + 1);
+ } else {
+ tenant_name = auth_tenant;
+ bucket_name = bucket;
+ }
+}
+
+/**
+ * Get all the buckets owned by a user and fill up an RGWUserBuckets with them.
+ * Returns: 0 on success, -ERR# on failure.
+ */
+int rgw_read_user_buckets(RGWRados * store,
+ const rgw_user& user_id,
+ RGWUserBuckets& buckets,
+ const string& marker,
+ const string& end_marker,
+ uint64_t max,
+ bool need_stats,
+ bool *is_truncated,
+ uint64_t default_amount)
+{
+ int ret;
+ buckets.clear();
+ std::string buckets_obj_id;
+ rgw_get_buckets_obj(user_id, buckets_obj_id);
+ rgw_raw_obj obj(store->svc.zone->get_zone_params().user_uid_pool, buckets_obj_id);
+
+ bool truncated = false;
+ string m = marker;
+
+ uint64_t total = 0;
+
+ if (!max) {
+ max = default_amount;
+ }
+
+ do {
+ std::list<cls_user_bucket_entry> entries;
+ ret = store->cls_user_list_buckets(obj, m, end_marker, max - total, entries, &m, &truncated);
+ if (ret == -ENOENT) {
+ ret = 0;
+ }
+
+ if (ret < 0) {
+ return ret;
+ }
+
+ for (auto& entry : entries) {
+ buckets.add(RGWBucketEnt(user_id, std::move(entry)));
+ total++;
+ }
+
+ } while (truncated && total < max);
+
+ if (is_truncated != nullptr) {
+ *is_truncated = truncated;
+ }
+
+ if (need_stats) {
+ map<string, RGWBucketEnt>& m = buckets.get_buckets();
+ ret = store->update_containers_stats(m);
+ if (ret < 0 && ret != -ENOENT) {
+ ldout(store->ctx(), 0) << "ERROR: could not get stats for buckets" << dendl;
+ return ret;
+ }
+ }
+ return 0;
+}
+
+int rgw_bucket_sync_user_stats(RGWRados *store, const rgw_user& user_id, const RGWBucketInfo& bucket_info)
+{
+ string buckets_obj_id;
+ rgw_get_buckets_obj(user_id, buckets_obj_id);
+ rgw_raw_obj obj(store->svc.zone->get_zone_params().user_uid_pool, buckets_obj_id);
+
+ return store->cls_user_sync_bucket_stats(obj, bucket_info);
+}
+
+int rgw_bucket_sync_user_stats(RGWRados *store, const string& tenant_name, const string& bucket_name)
+{
+ RGWBucketInfo bucket_info;
+ RGWSysObjectCtx obj_ctx = store->svc.sysobj->init_obj_ctx();
+ int ret = store->get_bucket_info(obj_ctx, tenant_name, bucket_name, bucket_info, NULL);
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "ERROR: could not fetch bucket info: ret=" << ret << dendl;
+ return ret;
+ }
+
+ ret = rgw_bucket_sync_user_stats(store, bucket_info.owner, bucket_info);
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "ERROR: could not sync user stats for bucket " << bucket_name << ": ret=" << ret << dendl;
+ return ret;
+ }
+
+ return 0;
+}
+
+int rgw_link_bucket(RGWRados* const store,
+ const rgw_user& user_id,
+ rgw_bucket& bucket,
+ ceph::real_time creation_time,
+ bool update_entrypoint)
+{
+ int ret;
+ string& tenant_name = bucket.tenant;
+ string& bucket_name = bucket.name;
+
+ cls_user_bucket_entry new_bucket;
+
+ RGWBucketEntryPoint ep;
+ RGWObjVersionTracker ot;
+
+ bucket.convert(&new_bucket.bucket);
+ new_bucket.size = 0;
+ if (real_clock::is_zero(creation_time))
+ new_bucket.creation_time = real_clock::now();
+ else
+ new_bucket.creation_time = creation_time;
+
+ map<string, bufferlist> attrs;
+ RGWSysObjectCtx obj_ctx = store->svc.sysobj->init_obj_ctx();
+
+ if (update_entrypoint) {
+ ret = store->get_bucket_entrypoint_info(obj_ctx, tenant_name, bucket_name, ep, &ot, NULL, &attrs);
+ if (ret < 0 && ret != -ENOENT) {
+ ldout(store->ctx(), 0) << "ERROR: store->get_bucket_entrypoint_info() returned: "
+ << cpp_strerror(-ret) << dendl;
+ }
+ }
+
+ string buckets_obj_id;
+ rgw_get_buckets_obj(user_id, buckets_obj_id);
+
+ rgw_raw_obj obj(store->svc.zone->get_zone_params().user_uid_pool, buckets_obj_id);
+ ret = store->cls_user_add_bucket(obj, new_bucket);
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "ERROR: error adding bucket to directory: "
+ << cpp_strerror(-ret) << dendl;
+ goto done_err;
+ }
+
+ if (!update_entrypoint)
+ return 0;
+
+ ep.linked = true;
+ ep.owner = user_id;
+ ep.bucket = bucket;
+ ret = store->put_bucket_entrypoint_info(tenant_name, bucket_name, ep, false, ot, real_time(), &attrs);
+ if (ret < 0)
+ goto done_err;
+
+ return 0;
+done_err:
+ int r = rgw_unlink_bucket(store, user_id, bucket.tenant, bucket.name);
+ if (r < 0) {
+ ldout(store->ctx(), 0) << "ERROR: failed unlinking bucket on error cleanup: "
+ << cpp_strerror(-r) << dendl;
+ }
+ return ret;
+}
+
+int rgw_unlink_bucket(RGWRados *store, const rgw_user& user_id, const string& tenant_name, const string& bucket_name, bool update_entrypoint)
+{
+ int ret;
+
+ string buckets_obj_id;
+ rgw_get_buckets_obj(user_id, buckets_obj_id);
+
+ cls_user_bucket bucket;
+ bucket.name = bucket_name;
+ rgw_raw_obj obj(store->svc.zone->get_zone_params().user_uid_pool, buckets_obj_id);
+ ret = store->cls_user_remove_bucket(obj, bucket);
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "ERROR: error removing bucket from directory: "
+ << cpp_strerror(-ret)<< dendl;
+ }
+
+ if (!update_entrypoint)
+ return 0;
+
+ RGWBucketEntryPoint ep;
+ RGWObjVersionTracker ot;
+ map<string, bufferlist> attrs;
+ RGWSysObjectCtx obj_ctx = store->svc.sysobj->init_obj_ctx();
+ ret = store->get_bucket_entrypoint_info(obj_ctx, tenant_name, bucket_name, ep, &ot, NULL, &attrs);
+ if (ret == -ENOENT)
+ return 0;
+ if (ret < 0)
+ return ret;
+
+ if (!ep.linked)
+ return 0;
+
+ if (ep.owner != user_id) {
+ ldout(store->ctx(), 0) << "bucket entry point user mismatch, can't unlink bucket: " << ep.owner << " != " << user_id << dendl;
+ return -EINVAL;
+ }
+
+ ep.linked = false;
+ return store->put_bucket_entrypoint_info(tenant_name, bucket_name, ep, false, ot, real_time(), &attrs);
+}
+
+int rgw_bucket_store_info(RGWRados *store, const string& bucket_name, bufferlist& bl, bool exclusive,
+ map<string, bufferlist> *pattrs, RGWObjVersionTracker *objv_tracker,
+ real_time mtime) {
+ return store->meta_mgr->put_entry(bucket_meta_handler, bucket_name, bl, exclusive, objv_tracker, mtime, pattrs);
+}
+
+int rgw_bucket_instance_store_info(RGWRados *store, string& entry, bufferlist& bl, bool exclusive,
+ map<string, bufferlist> *pattrs, RGWObjVersionTracker *objv_tracker,
+ real_time mtime) {
+ return store->meta_mgr->put_entry(bucket_instance_meta_handler, entry, bl, exclusive, objv_tracker, mtime, pattrs);
+}
+
+int rgw_bucket_instance_remove_entry(RGWRados *store, const string& entry,
+ RGWObjVersionTracker *objv_tracker) {
+ return store->meta_mgr->remove_entry(bucket_instance_meta_handler, entry, objv_tracker);
+}
+
+// 'tenant/' is used in bucket instance keys for sync to avoid parsing ambiguity
+// with the existing instance[:shard] format. once we parse the shard, the / is
+// replaced with a : to match the [tenant:]instance format
+void rgw_bucket_instance_key_to_oid(string& key)
+{
+ // replace tenant/ with tenant:
+ auto c = key.find('/');
+ if (c != string::npos) {
+ key[c] = ':';
+ }
+}
+
+// convert bucket instance oids back to the tenant/ format for metadata keys.
+// it's safe to parse 'tenant:' only for oids, because they won't contain the
+// optional :shard at the end
+void rgw_bucket_instance_oid_to_key(string& oid)
+{
+ // find first : (could be tenant:bucket or bucket:instance)
+ auto c = oid.find(':');
+ if (c != string::npos) {
+ // if we find another :, the first one was for tenant
+ if (oid.find(':', c + 1) != string::npos) {
+ oid[c] = '/';
+ }
+ }
+}
+
+int rgw_bucket_parse_bucket_instance(const string& bucket_instance, string *target_bucket_instance, int *shard_id)
+{
+ ssize_t pos = bucket_instance.rfind(':');
+ if (pos < 0) {
+ return -EINVAL;
+ }
+
+ string first = bucket_instance.substr(0, pos);
+ string second = bucket_instance.substr(pos + 1);
+
+ if (first.find(':') == string::npos) {
+ *shard_id = -1;
+ *target_bucket_instance = bucket_instance;
+ return 0;
+ }
+
+ *target_bucket_instance = first;
+ string err;
+ *shard_id = strict_strtol(second.c_str(), 10, &err);
+ if (!err.empty()) {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+// parse key in format: [tenant/]name:instance[:shard_id]
+int rgw_bucket_parse_bucket_key(CephContext *cct, const string& key,
+ rgw_bucket *bucket, int *shard_id)
+{
+ boost::string_ref name{key};
+ boost::string_ref instance;
+
+ // split tenant/name
+ auto pos = name.find('/');
+ if (pos != boost::string_ref::npos) {
+ auto tenant = name.substr(0, pos);
+ bucket->tenant.assign(tenant.begin(), tenant.end());
+ name = name.substr(pos + 1);
+ } else {
+ bucket->tenant.clear();
+ }
+
+ // split name:instance
+ pos = name.find(':');
+ if (pos != boost::string_ref::npos) {
+ instance = name.substr(pos + 1);
+ name = name.substr(0, pos);
+ }
+ bucket->name.assign(name.begin(), name.end());
+
+ // split instance:shard
+ pos = instance.find(':');
+ if (pos == boost::string_ref::npos) {
+ bucket->bucket_id.assign(instance.begin(), instance.end());
+ *shard_id = -1;
+ return 0;
+ }
+
+ // parse shard id
+ auto shard = instance.substr(pos + 1);
+ string err;
+ auto id = strict_strtol(shard.data(), 10, &err);
+ if (!err.empty()) {
+ ldout(cct, 0) << "ERROR: failed to parse bucket shard '"
+ << instance.data() << "': " << err << dendl;
+ return -EINVAL;
+ }
+
+ *shard_id = id;
+ instance = instance.substr(0, pos);
+ bucket->bucket_id.assign(instance.begin(), instance.end());
+ return 0;
+}
+
+int rgw_bucket_set_attrs(RGWRados *store, RGWBucketInfo& bucket_info,
+ map<string, bufferlist>& attrs,
+ RGWObjVersionTracker *objv_tracker)
+{
+ rgw_bucket& bucket = bucket_info.bucket;
+
+ if (!bucket_info.has_instance_obj) {
+ /* an old bucket object, need to convert it */
+ RGWSysObjectCtx obj_ctx = store->svc.sysobj->init_obj_ctx();
+ int ret = store->convert_old_bucket_info(obj_ctx, bucket.tenant, bucket.name);
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "ERROR: failed converting old bucket info: " << ret << dendl;
+ return ret;
+ }
+ }
+
+ /* we want the bucket instance name without the oid prefix cruft */
+ string key = bucket.get_key();
+ bufferlist bl;
+
+ encode(bucket_info, bl);
+
+ return rgw_bucket_instance_store_info(store, key, bl, false, &attrs, objv_tracker, real_time());
+}
+
+static void dump_mulipart_index_results(list<rgw_obj_index_key>& objs_to_unlink,
+ Formatter *f)
+{
+ for (const auto& o : objs_to_unlink) {
+ f->dump_string("object", o.name);
+ }
+}
+
+void check_bad_user_bucket_mapping(RGWRados *store, const rgw_user& user_id,
+ bool fix)
+{
+ RGWUserBuckets user_buckets;
+ bool is_truncated = false;
+ string marker;
+
+ CephContext *cct = store->ctx();
+
+ size_t max_entries = cct->_conf->rgw_list_buckets_max_chunk;
+
+ do {
+ int ret = rgw_read_user_buckets(store, user_id, user_buckets, marker,
+ string(), max_entries, false,
+ &is_truncated);
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "failed to read user buckets: "
+ << cpp_strerror(-ret) << dendl;
+ return;
+ }
+
+ map<string, RGWBucketEnt>& buckets = user_buckets.get_buckets();
+ for (map<string, RGWBucketEnt>::iterator i = buckets.begin();
+ i != buckets.end();
+ ++i) {
+ marker = i->first;
+
+ RGWBucketEnt& bucket_ent = i->second;
+ rgw_bucket& bucket = bucket_ent.bucket;
+
+ RGWBucketInfo bucket_info;
+ real_time mtime;
+ RGWSysObjectCtx obj_ctx = store->svc.sysobj->init_obj_ctx();
+ int r = store->get_bucket_info(obj_ctx, user_id.tenant, bucket.name, bucket_info, &mtime);
+ if (r < 0) {
+ ldout(store->ctx(), 0) << "could not get bucket info for bucket=" << bucket << dendl;
+ continue;
+ }
+
+ rgw_bucket& actual_bucket = bucket_info.bucket;
+
+ if (actual_bucket.name.compare(bucket.name) != 0 ||
+ actual_bucket.tenant.compare(bucket.tenant) != 0 ||
+ actual_bucket.marker.compare(bucket.marker) != 0 ||
+ actual_bucket.bucket_id.compare(bucket.bucket_id) != 0) {
+ cout << "bucket info mismatch: expected " << actual_bucket << " got " << bucket << std::endl;
+ if (fix) {
+ cout << "fixing" << std::endl;
+ r = rgw_link_bucket(store, user_id, actual_bucket,
+ bucket_info.creation_time);
+ if (r < 0) {
+ cerr << "failed to fix bucket: " << cpp_strerror(-r) << std::endl;
+ }
+ }
+ }
+ }
+ } while (is_truncated);
+}
+
+static bool bucket_object_check_filter(const string& oid)
+{
+ rgw_obj_key key;
+ string ns;
+ return rgw_obj_key::oid_to_key_in_ns(oid, &key, ns);
+}
+
+int rgw_remove_object(RGWRados *store, const RGWBucketInfo& bucket_info, const rgw_bucket& bucket, rgw_obj_key& key)
+{
+ RGWObjectCtx rctx(store);
+
+ if (key.instance.empty()) {
+ key.instance = "null";
+ }
+
+ rgw_obj obj(bucket, key);
+
+ return store->delete_obj(rctx, bucket_info, obj, bucket_info.versioning_status());
+}
+
+int rgw_remove_bucket(RGWRados *store, rgw_bucket& bucket, bool delete_children)
+{
+ int ret;
+ map<RGWObjCategory, RGWStorageStats> stats;
+ std::vector<rgw_bucket_dir_entry> objs;
+ map<string, bool> common_prefixes;
+ RGWBucketInfo info;
+ RGWSysObjectCtx obj_ctx = store->svc.sysobj->init_obj_ctx();
+
+ string bucket_ver, master_ver;
+
+ ret = store->get_bucket_info(obj_ctx, bucket.tenant, bucket.name, info, NULL);
+ if (ret < 0)
+ return ret;
+
+ ret = store->get_bucket_stats(info, RGW_NO_SHARD, &bucket_ver, &master_ver, stats, NULL);
+ if (ret < 0)
+ return ret;
+
+ RGWRados::Bucket target(store, info);
+ RGWRados::Bucket::List list_op(&target);
+ CephContext *cct = store->ctx();
+ int max = 1000;
+
+ list_op.params.list_versions = true;
+ list_op.params.allow_unordered = true;
+
+ bool is_truncated = false;
+ do {
+ objs.clear();
+
+ ret = list_op.list_objects(max, &objs, &common_prefixes, &is_truncated);
+ if (ret < 0)
+ return ret;
+
+ if (!objs.empty() && !delete_children) {
+ lderr(store->ctx()) << "ERROR: could not remove non-empty bucket " << bucket.name << dendl;
+ return -ENOTEMPTY;
+ }
+
+ for (const auto& obj : objs) {
+ rgw_obj_key key(obj.key);
+ ret = rgw_remove_object(store, info, bucket, key);
+ if (ret < 0 && ret != -ENOENT) {
+ return ret;
+ }
+ }
+ } while(is_truncated);
+
+ string prefix, delimiter;
+
+ ret = abort_bucket_multiparts(store, cct, info, prefix, delimiter);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = rgw_bucket_sync_user_stats(store, info.owner, info);
+ if ( ret < 0) {
+ dout(1) << "WARNING: failed sync user stats before bucket delete. ret=" << ret << dendl;
+ }
+
+ RGWObjVersionTracker objv_tracker;
+
+ // if we deleted children above we will force delete, as any that
+ // remain is detrius from a prior bug
+ ret = store->delete_bucket(info, objv_tracker, !delete_children);
+ if (ret < 0) {
+ lderr(store->ctx()) << "ERROR: could not remove bucket " <<
+ bucket.name << dendl;
+ return ret;
+ }
+
+ ret = rgw_unlink_bucket(store, info.owner, bucket.tenant, bucket.name, false);
+ if (ret < 0) {
+ lderr(store->ctx()) << "ERROR: unable to remove user bucket information" << dendl;
+ }
+
+ return ret;
+}
+
+static int aio_wait(librados::AioCompletion *handle)
+{
+ librados::AioCompletion *c = (librados::AioCompletion *)handle;
+ c->wait_for_safe();
+ int ret = c->get_return_value();
+ c->release();
+ return ret;
+}
+
+static int drain_handles(list<librados::AioCompletion *>& pending)
+{
+ int ret = 0;
+ while (!pending.empty()) {
+ librados::AioCompletion *handle = pending.front();
+ pending.pop_front();
+ int r = aio_wait(handle);
+ if (r < 0) {
+ ret = r;
+ }
+ }
+ return ret;
+}
+
+int rgw_remove_bucket_bypass_gc(RGWRados *store, rgw_bucket& bucket,
+ int concurrent_max, bool keep_index_consistent)
+{
+ int ret;
+ map<RGWObjCategory, RGWStorageStats> stats;
+ std::vector<rgw_bucket_dir_entry> objs;
+ map<string, bool> common_prefixes;
+ RGWBucketInfo info;
+ RGWObjectCtx obj_ctx(store);
+ RGWSysObjectCtx sysobj_ctx = store->svc.sysobj->init_obj_ctx();
+ CephContext *cct = store->ctx();
+
+ string bucket_ver, master_ver;
+
+ ret = store->get_bucket_info(sysobj_ctx, bucket.tenant, bucket.name, info, NULL);
+ if (ret < 0)
+ return ret;
+
+ ret = store->get_bucket_stats(info, RGW_NO_SHARD, &bucket_ver, &master_ver, stats, NULL);
+ if (ret < 0)
+ return ret;
+
+ string prefix, delimiter;
+
+ ret = abort_bucket_multiparts(store, cct, info, prefix, delimiter);
+ if (ret < 0) {
+ return ret;
+ }
+
+ RGWRados::Bucket target(store, info);
+ RGWRados::Bucket::List list_op(&target);
+
+ list_op.params.list_versions = true;
+ list_op.params.allow_unordered = true;
+
+ std::list<librados::AioCompletion*> handles;
+
+ int max = 1000;
+ int max_aio = concurrent_max;
+ bool is_truncated = true;
+
+ while (is_truncated) {
+ objs.clear();
+ ret = list_op.list_objects(max, &objs, &common_prefixes, &is_truncated);
+ if (ret < 0)
+ return ret;
+
+ std::vector<rgw_bucket_dir_entry>::iterator it = objs.begin();
+ for (; it != objs.end(); ++it) {
+ RGWObjState *astate = NULL;
+ rgw_obj obj(bucket, (*it).key);
+
+ ret = store->get_obj_state(&obj_ctx, info, obj, &astate, false);
+ if (ret == -ENOENT) {
+ dout(1) << "WARNING: cannot find obj state for obj " << obj.get_oid() << dendl;
+ continue;
+ }
+ if (ret < 0) {
+ lderr(store->ctx()) << "ERROR: get obj state returned with error " << ret << dendl;
+ return ret;
+ }
+
+ if (astate->has_manifest) {
+ RGWObjManifest& manifest = astate->manifest;
+ RGWObjManifest::obj_iterator miter = manifest.obj_begin();
+ rgw_obj head_obj = manifest.get_obj();
+ rgw_raw_obj raw_head_obj;
+ store->obj_to_raw(info.placement_rule, head_obj, &raw_head_obj);
+
+
+ for (; miter != manifest.obj_end() && max_aio--; ++miter) {
+ if (!max_aio) {
+ ret = drain_handles(handles);
+ if (ret < 0 && ret != -ENOENT) {
+ lderr(store->ctx()) << "ERROR: could not drain handles as aio completion returned with " << ret << dendl;
+ return ret;
+ }
+ max_aio = concurrent_max;
+ }
+
+ rgw_raw_obj last_obj = miter.get_location().get_raw_obj(store);
+ if (last_obj == raw_head_obj) {
+ // have the head obj deleted at the end
+ continue;
+ }
+
+ ret = store->delete_raw_obj_aio(last_obj, handles);
+ if (ret < 0) {
+ lderr(store->ctx()) << "ERROR: delete obj aio failed with " << ret << dendl;
+ return ret;
+ }
+ } // for all shadow objs
+
+ ret = store->delete_obj_aio(head_obj, info, astate, handles, keep_index_consistent);
+ if (ret < 0) {
+ lderr(store->ctx()) << "ERROR: delete obj aio failed with " << ret << dendl;
+ return ret;
+ }
+ }
+
+ if (!max_aio) {
+ ret = drain_handles(handles);
+ if (ret < 0 && ret != -ENOENT) {
+ lderr(store->ctx()) << "ERROR: could not drain handles as aio completion returned with " << ret << dendl;
+ return ret;
+ }
+ max_aio = concurrent_max;
+ }
+ obj_ctx.invalidate(obj);
+ } // for all RGW objects
+ }
+
+ ret = drain_handles(handles);
+ if (ret < 0 && ret != -ENOENT) {
+ lderr(store->ctx()) << "ERROR: could not drain handles as aio completion returned with " << ret << dendl;
+ return ret;
+ }
+
+ ret = rgw_bucket_sync_user_stats(store, info.owner, info);
+ if (ret < 0) {
+ dout(1) << "WARNING: failed sync user stats before bucket delete. ret=" << ret << dendl;
+ }
+
+ RGWObjVersionTracker objv_tracker;
+
+ // this function can only be run if caller wanted children to be
+ // deleted, so we can ignore the check for children as any that
+ // remain are detritus from a prior bug
+ ret = store->delete_bucket(info, objv_tracker, false);
+ if (ret < 0) {
+ lderr(store->ctx()) << "ERROR: could not remove bucket " << bucket.name << dendl;
+ return ret;
+ }
+
+ ret = rgw_unlink_bucket(store, info.owner, bucket.tenant, bucket.name, false);
+ if (ret < 0) {
+ lderr(store->ctx()) << "ERROR: unable to remove user bucket information" << dendl;
+ }
+
+ return ret;
+}
+
+int rgw_bucket_delete_bucket_obj(RGWRados *store,
+ const string& tenant_name,
+ const string& bucket_name,
+ RGWObjVersionTracker& objv_tracker)
+{
+ string key;
+
+ rgw_make_bucket_entry_name(tenant_name, bucket_name, key);
+ return store->meta_mgr->remove_entry(bucket_meta_handler, key, &objv_tracker);
+}
+
+static void set_err_msg(std::string *sink, std::string msg)
+{
+ if (sink && !msg.empty())
+ *sink = msg;
+}
+
+int RGWBucket::init(RGWRados *storage, RGWBucketAdminOpState& op_state)
+{
+ if (!storage)
+ return -EINVAL;
+
+ store = storage;
+
+ rgw_user user_id = op_state.get_user_id();
+ tenant = user_id.tenant;
+ bucket_name = op_state.get_bucket_name();
+ RGWUserBuckets user_buckets;
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+
+ if (bucket_name.empty() && user_id.empty())
+ return -EINVAL;
+
+ if (!bucket_name.empty()) {
+ int r = store->get_bucket_info(obj_ctx, tenant, bucket_name, bucket_info, NULL);
+ if (r < 0) {
+ ldout(store->ctx(), 0) << "could not get bucket info for bucket=" << bucket_name << dendl;
+ return r;
+ }
+
+ op_state.set_bucket(bucket_info.bucket);
+ }
+
+ if (!user_id.empty()) {
+ int r = rgw_get_user_info_by_uid(store, user_id, user_info);
+ if (r < 0)
+ return r;
+
+ op_state.display_name = user_info.display_name;
+ }
+
+ clear_failure();
+ return 0;
+}
+
+bool rgw_find_bucket_by_id(CephContext *cct, RGWMetadataManager *mgr,
+ const string& marker, const string& bucket_id, rgw_bucket* bucket_out)
+{
+ void *handle = NULL;
+ bool truncated = false;
+ int shard_id;
+ string s;
+
+ int ret = mgr->list_keys_init("bucket.instance", marker, &handle);
+ if (ret < 0) {
+ cerr << "ERROR: can't get key: " << cpp_strerror(-ret) << std::endl;
+ mgr->list_keys_complete(handle);
+ return -ret;
+ }
+ do {
+ list<string> keys;
+ ret = mgr->list_keys_next(handle, 1000, keys, &truncated);
+ if (ret < 0) {
+ cerr << "ERROR: lists_keys_next(): " << cpp_strerror(-ret) << std::endl;
+ mgr->list_keys_complete(handle);
+ return -ret;
+ }
+ for (list<string>::iterator iter = keys.begin(); iter != keys.end(); ++iter) {
+ s = *iter;
+ ret = rgw_bucket_parse_bucket_key(cct, s, bucket_out, &shard_id);
+ if (ret < 0) {
+ continue;
+ }
+ if (bucket_id == bucket_out->bucket_id) {
+ mgr->list_keys_complete(handle);
+ return true;
+ }
+ }
+ } while (truncated);
+ mgr->list_keys_complete(handle);
+ return false;
+}
+
+int RGWBucket::link(RGWBucketAdminOpState& op_state, std::string *err_msg)
+{
+ if (!op_state.is_user_op()) {
+ set_err_msg(err_msg, "empty user id");
+ return -EINVAL;
+ }
+
+ string bucket_id = op_state.get_bucket_id();
+ if (bucket_id.empty()) {
+ set_err_msg(err_msg, "empty bucket instance id");
+ return -EINVAL;
+ }
+
+ std::string display_name = op_state.get_user_display_name();
+ rgw_bucket bucket = op_state.get_bucket();
+
+ const rgw_pool& root_pool = store->svc.zone->get_zone_params().domain_root;
+ std::string bucket_entry;
+ rgw_make_bucket_entry_name(tenant, bucket_name, bucket_entry);
+ rgw_raw_obj obj(root_pool, bucket_entry);
+ RGWObjVersionTracker objv_tracker;
+
+ map<string, bufferlist> attrs;
+ RGWBucketInfo bucket_info;
+
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+ int r = store->get_bucket_instance_info(obj_ctx, bucket, bucket_info, NULL, &attrs);
+ if (r < 0) {
+ return r;
+ }
+
+ map<string, bufferlist>::iterator aiter = attrs.find(RGW_ATTR_ACL);
+ if (aiter != attrs.end()) {
+ bufferlist aclbl = aiter->second;
+ RGWAccessControlPolicy policy;
+ ACLOwner owner;
+ try {
+ auto iter = aclbl.cbegin();
+ decode(policy, iter);
+ owner = policy.get_owner();
+ } catch (buffer::error& err) {
+ set_err_msg(err_msg, "couldn't decode policy");
+ return -EIO;
+ }
+
+ r = rgw_unlink_bucket(store, owner.get_id(), bucket.tenant, bucket.name, false);
+ if (r < 0) {
+ set_err_msg(err_msg, "could not unlink policy from user " + owner.get_id().to_str());
+ return r;
+ }
+
+ // now update the user for the bucket...
+ if (display_name.empty()) {
+ ldout(store->ctx(), 0) << "WARNING: user " << user_info.user_id << " has no display name set" << dendl;
+ }
+ policy.create_default(user_info.user_id, display_name);
+
+ owner = policy.get_owner();
+ r = store->set_bucket_owner(bucket_info.bucket, owner);
+ if (r < 0) {
+ set_err_msg(err_msg, "failed to set bucket owner: " + cpp_strerror(-r));
+ return r;
+ }
+
+ // ...and encode the acl
+ aclbl.clear();
+ policy.encode(aclbl);
+
+ auto sysobj = obj_ctx.get_obj(obj);
+ r = sysobj.wop()
+ .set_objv_tracker(&objv_tracker)
+ .write_attr(RGW_ATTR_ACL, aclbl);
+ if (r < 0) {
+ return r;
+ }
+
+ RGWAccessControlPolicy policy_instance;
+ policy_instance.create_default(user_info.user_id, display_name);
+ aclbl.clear();
+ policy_instance.encode(aclbl);
+
+ rgw_raw_obj obj_bucket_instance;
+ store->get_bucket_instance_obj(bucket, obj_bucket_instance);
+ auto inst_sysobj = obj_ctx.get_obj(obj_bucket_instance);
+ r = inst_sysobj.wop()
+ .set_objv_tracker(&objv_tracker)
+ .write_attr(RGW_ATTR_ACL, aclbl);
+ if (r < 0) {
+ return r;
+ }
+
+ r = rgw_link_bucket(store, user_info.user_id, bucket_info.bucket,
+ ceph::real_time());
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+int RGWBucket::unlink(RGWBucketAdminOpState& op_state, std::string *err_msg)
+{
+ rgw_bucket bucket = op_state.get_bucket();
+
+ if (!op_state.is_user_op()) {
+ set_err_msg(err_msg, "could not fetch user or user bucket info");
+ return -EINVAL;
+ }
+
+ int r = rgw_unlink_bucket(store, user_info.user_id, bucket.tenant, bucket.name);
+ if (r < 0) {
+ set_err_msg(err_msg, "error unlinking bucket" + cpp_strerror(-r));
+ }
+
+ return r;
+}
+
+int RGWBucket::set_quota(RGWBucketAdminOpState& op_state, std::string *err_msg)
+{
+ rgw_bucket bucket = op_state.get_bucket();
+ RGWBucketInfo bucket_info;
+ map<string, bufferlist> attrs;
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+ int r = store->get_bucket_info(obj_ctx, bucket.tenant, bucket.name, bucket_info, NULL, &attrs);
+ if (r < 0) {
+ set_err_msg(err_msg, "could not get bucket info for bucket=" + bucket.name + ": " + cpp_strerror(-r));
+ return r;
+ }
+
+ bucket_info.quota = op_state.quota;
+ r = store->put_bucket_instance_info(bucket_info, false, real_time(), &attrs);
+ if (r < 0) {
+ set_err_msg(err_msg, "ERROR: failed writing bucket instance info: " + cpp_strerror(-r));
+ return r;
+ }
+ return r;
+}
+
+int RGWBucket::remove(RGWBucketAdminOpState& op_state, bool bypass_gc,
+ bool keep_index_consistent, std::string *err_msg)
+{
+ bool delete_children = op_state.will_delete_children();
+ rgw_bucket bucket = op_state.get_bucket();
+ int ret;
+
+ if (bypass_gc) {
+ if (delete_children) {
+ ret = rgw_remove_bucket_bypass_gc(store, bucket, op_state.get_max_aio(), keep_index_consistent);
+ } else {
+ set_err_msg(err_msg, "purge objects should be set for gc to be bypassed");
+ return -EINVAL;
+ }
+ } else {
+ ret = rgw_remove_bucket(store, bucket, delete_children);
+ }
+
+ if (ret < 0) {
+ set_err_msg(err_msg, "unable to remove bucket" + cpp_strerror(-ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWBucket::remove_object(RGWBucketAdminOpState& op_state, std::string *err_msg)
+{
+ rgw_bucket bucket = op_state.get_bucket();
+ std::string object_name = op_state.get_object_name();
+
+ rgw_obj_key key(object_name);
+
+ int ret = rgw_remove_object(store, bucket_info, bucket, key);
+ if (ret < 0) {
+ set_err_msg(err_msg, "unable to remove object" + cpp_strerror(-ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+static void dump_bucket_index(map<string, rgw_bucket_dir_entry> result, Formatter *f)
+{
+ map<string, rgw_bucket_dir_entry>::iterator iter;
+ for (iter = result.begin(); iter != result.end(); ++iter) {
+ f->dump_string("object", iter->first);
+ }
+}
+
+static void dump_bucket_usage(map<RGWObjCategory, RGWStorageStats>& stats, Formatter *formatter)
+{
+ map<RGWObjCategory, RGWStorageStats>::iterator iter;
+
+ formatter->open_object_section("usage");
+ for (iter = stats.begin(); iter != stats.end(); ++iter) {
+ RGWStorageStats& s = iter->second;
+ const char *cat_name = rgw_obj_category_name(iter->first);
+ formatter->open_object_section(cat_name);
+ s.dump(formatter);
+ formatter->close_section();
+ }
+ formatter->close_section();
+}
+
+static void dump_index_check(map<RGWObjCategory, RGWStorageStats> existing_stats,
+ map<RGWObjCategory, RGWStorageStats> calculated_stats,
+ Formatter *formatter)
+{
+ formatter->open_object_section("check_result");
+ formatter->open_object_section("existing_header");
+ dump_bucket_usage(existing_stats, formatter);
+ formatter->close_section();
+ formatter->open_object_section("calculated_header");
+ dump_bucket_usage(calculated_stats, formatter);
+ formatter->close_section();
+ formatter->close_section();
+}
+
+int RGWBucket::check_bad_index_multipart(RGWBucketAdminOpState& op_state,
+ RGWFormatterFlusher& flusher ,std::string *err_msg)
+{
+ bool fix_index = op_state.will_fix_index();
+ rgw_bucket bucket = op_state.get_bucket();
+
+ size_t max = 1000;
+
+ map<string, bool> common_prefixes;
+
+ bool is_truncated;
+ map<string, bool> meta_objs;
+ map<rgw_obj_index_key, string> all_objs;
+
+ RGWBucketInfo bucket_info;
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+ int r = store->get_bucket_instance_info(obj_ctx, bucket, bucket_info, nullptr, nullptr);
+ if (r < 0) {
+ ldout(store->ctx(), 0) << "ERROR: " << __func__ << "(): get_bucket_instance_info(bucket=" << bucket << ") returned r=" << r << dendl;
+ return r;
+ }
+
+ RGWRados::Bucket target(store, bucket_info);
+ RGWRados::Bucket::List list_op(&target);
+
+ list_op.params.list_versions = true;
+ list_op.params.ns = RGW_OBJ_NS_MULTIPART;
+
+ do {
+ vector<rgw_bucket_dir_entry> result;
+ int r = list_op.list_objects(max, &result, &common_prefixes, &is_truncated);
+ if (r < 0) {
+ set_err_msg(err_msg, "failed to list objects in bucket=" + bucket.name +
+ " err=" + cpp_strerror(-r));
+
+ return r;
+ }
+
+ vector<rgw_bucket_dir_entry>::iterator iter;
+ for (iter = result.begin(); iter != result.end(); ++iter) {
+ rgw_obj_index_key key = iter->key;
+ rgw_obj obj(bucket, key);
+ string oid = obj.get_oid();
+
+ int pos = oid.find_last_of('.');
+ if (pos < 0) {
+ /* obj has no suffix */
+ all_objs[key] = oid;
+ } else {
+ /* obj has suffix */
+ string name = oid.substr(0, pos);
+ string suffix = oid.substr(pos + 1);
+
+ if (suffix.compare("meta") == 0) {
+ meta_objs[name] = true;
+ } else {
+ all_objs[key] = name;
+ }
+ }
+ }
+
+ } while (is_truncated);
+
+ list<rgw_obj_index_key> objs_to_unlink;
+ Formatter *f = flusher.get_formatter();
+
+ f->open_array_section("invalid_multipart_entries");
+
+ for (auto aiter = all_objs.begin(); aiter != all_objs.end(); ++aiter) {
+ string& name = aiter->second;
+
+ if (meta_objs.find(name) == meta_objs.end()) {
+ objs_to_unlink.push_back(aiter->first);
+ }
+
+ if (objs_to_unlink.size() > max) {
+ if (fix_index) {
+ int r = store->remove_objs_from_index(bucket_info, objs_to_unlink);
+ if (r < 0) {
+ set_err_msg(err_msg, "ERROR: remove_obj_from_index() returned error: " +
+ cpp_strerror(-r));
+ return r;
+ }
+ }
+
+ dump_mulipart_index_results(objs_to_unlink, flusher.get_formatter());
+ flusher.flush();
+ objs_to_unlink.clear();
+ }
+ }
+
+ if (fix_index) {
+ int r = store->remove_objs_from_index(bucket_info, objs_to_unlink);
+ if (r < 0) {
+ set_err_msg(err_msg, "ERROR: remove_obj_from_index() returned error: " +
+ cpp_strerror(-r));
+
+ return r;
+ }
+ }
+
+ dump_mulipart_index_results(objs_to_unlink, f);
+ f->close_section();
+ flusher.flush();
+
+ return 0;
+}
+
+int RGWBucket::check_object_index(RGWBucketAdminOpState& op_state,
+ RGWFormatterFlusher& flusher,
+ std::string *err_msg)
+{
+
+ bool fix_index = op_state.will_fix_index();
+
+ if (!fix_index) {
+ set_err_msg(err_msg, "check-objects flag requires fix index enabled");
+ return -EINVAL;
+ }
+
+ store->cls_obj_set_bucket_tag_timeout(bucket_info, BUCKET_TAG_TIMEOUT);
+
+ string prefix;
+ rgw_obj_index_key marker;
+ bool is_truncated = true;
+
+ Formatter *formatter = flusher.get_formatter();
+ formatter->open_object_section("objects");
+ uint16_t expansion_factor = 1;
+ while (is_truncated) {
+ map<string, rgw_bucket_dir_entry> result;
+
+ int r = store->cls_bucket_list_ordered(bucket_info, RGW_NO_SHARD,
+ marker, prefix,
+ listing_max_entries, true,
+ expansion_factor,
+ result, &is_truncated, &marker,
+ bucket_object_check_filter);
+ if (r == -ENOENT) {
+ break;
+ } else if (r < 0 && r != -ENOENT) {
+ set_err_msg(err_msg, "ERROR: failed operation r=" + cpp_strerror(-r));
+ }
+
+ if (result.size() < listing_max_entries / 8) {
+ ++expansion_factor;
+ } else if (result.size() > listing_max_entries * 7 / 8 &&
+ expansion_factor > 1) {
+ --expansion_factor;
+ }
+
+ dump_bucket_index(result, formatter);
+ flusher.flush();
+ }
+
+ formatter->close_section();
+
+ store->cls_obj_set_bucket_tag_timeout(bucket_info, 0);
+
+ return 0;
+}
+
+
+int RGWBucket::check_index(RGWBucketAdminOpState& op_state,
+ map<RGWObjCategory, RGWStorageStats>& existing_stats,
+ map<RGWObjCategory, RGWStorageStats>& calculated_stats,
+ std::string *err_msg)
+{
+ bool fix_index = op_state.will_fix_index();
+
+ int r = store->bucket_check_index(bucket_info, &existing_stats, &calculated_stats);
+ if (r < 0) {
+ set_err_msg(err_msg, "failed to check index error=" + cpp_strerror(-r));
+ return r;
+ }
+
+ if (fix_index) {
+ r = store->bucket_rebuild_index(bucket_info);
+ if (r < 0) {
+ set_err_msg(err_msg, "failed to rebuild index err=" + cpp_strerror(-r));
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+int RGWBucket::policy_bl_to_stream(bufferlist& bl, ostream& o)
+{
+ RGWAccessControlPolicy_S3 policy(g_ceph_context);
+ int ret = decode_bl(bl, policy);
+ if (ret < 0) {
+ ldout(store->ctx(),0) << "failed to decode RGWAccessControlPolicy" << dendl;
+ }
+ policy.to_xml(o);
+ return 0;
+}
+
+int rgw_object_get_attr(RGWRados* store, const RGWBucketInfo& bucket_info,
+ const rgw_obj& obj, const char* attr_name,
+ bufferlist& out_bl)
+{
+ RGWObjectCtx obj_ctx(store);
+ RGWRados::Object op_target(store, bucket_info, obj_ctx, obj);
+ RGWRados::Object::Read rop(&op_target);
+
+ return rop.get_attr(attr_name, out_bl);
+}
+
+int RGWBucket::get_policy(RGWBucketAdminOpState& op_state, RGWAccessControlPolicy& policy)
+{
+ std::string object_name = op_state.get_object_name();
+ rgw_bucket bucket = op_state.get_bucket();
+ auto sysobj_ctx = store->svc.sysobj->init_obj_ctx();
+
+ RGWBucketInfo bucket_info;
+ map<string, bufferlist> attrs;
+ int ret = store->get_bucket_info(sysobj_ctx, bucket.tenant, bucket.name, bucket_info, NULL, &attrs);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (!object_name.empty()) {
+ bufferlist bl;
+ rgw_obj obj(bucket, object_name);
+
+ ret = rgw_object_get_attr(store, bucket_info, obj, RGW_ATTR_ACL, bl);
+ if (ret < 0){
+ return ret;
+ }
+
+ ret = decode_bl(bl, policy);
+ if (ret < 0) {
+ ldout(store->ctx(),0) << "failed to decode RGWAccessControlPolicy" << dendl;
+ }
+ return ret;
+ }
+
+ map<string, bufferlist>::iterator aiter = attrs.find(RGW_ATTR_ACL);
+ if (aiter == attrs.end()) {
+ return -ENOENT;
+ }
+
+ ret = decode_bl(aiter->second, policy);
+ if (ret < 0) {
+ ldout(store->ctx(),0) << "failed to decode RGWAccessControlPolicy" << dendl;
+ }
+
+ return ret;
+}
+
+
+int RGWBucketAdminOp::get_policy(RGWRados *store, RGWBucketAdminOpState& op_state,
+ RGWAccessControlPolicy& policy)
+{
+ RGWBucket bucket;
+
+ int ret = bucket.init(store, op_state);
+ if (ret < 0)
+ return ret;
+
+ ret = bucket.get_policy(op_state, policy);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+/* Wrappers to facilitate RESTful interface */
+
+
+int RGWBucketAdminOp::get_policy(RGWRados *store, RGWBucketAdminOpState& op_state,
+ RGWFormatterFlusher& flusher)
+{
+ RGWAccessControlPolicy policy(store->ctx());
+
+ int ret = get_policy(store, op_state, policy);
+ if (ret < 0)
+ return ret;
+
+ Formatter *formatter = flusher.get_formatter();
+
+ flusher.start(0);
+
+ formatter->open_object_section("policy");
+ policy.dump(formatter);
+ formatter->close_section();
+
+ flusher.flush();
+
+ return 0;
+}
+
+int RGWBucketAdminOp::dump_s3_policy(RGWRados *store, RGWBucketAdminOpState& op_state,
+ ostream& os)
+{
+ RGWAccessControlPolicy_S3 policy(store->ctx());
+
+ int ret = get_policy(store, op_state, policy);
+ if (ret < 0)
+ return ret;
+
+ policy.to_xml(os);
+
+ return 0;
+}
+
+int RGWBucketAdminOp::unlink(RGWRados *store, RGWBucketAdminOpState& op_state)
+{
+ RGWBucket bucket;
+
+ int ret = bucket.init(store, op_state);
+ if (ret < 0)
+ return ret;
+
+ return bucket.unlink(op_state);
+}
+
+int RGWBucketAdminOp::link(RGWRados *store, RGWBucketAdminOpState& op_state, string *err)
+{
+ RGWBucket bucket;
+
+ int ret = bucket.init(store, op_state);
+ if (ret < 0)
+ return ret;
+
+ return bucket.link(op_state, err);
+
+}
+
+int RGWBucketAdminOp::check_index(RGWRados *store, RGWBucketAdminOpState& op_state,
+ RGWFormatterFlusher& flusher)
+{
+ int ret;
+ map<RGWObjCategory, RGWStorageStats> existing_stats;
+ map<RGWObjCategory, RGWStorageStats> calculated_stats;
+
+
+ RGWBucket bucket;
+
+ ret = bucket.init(store, op_state);
+ if (ret < 0)
+ return ret;
+
+ Formatter *formatter = flusher.get_formatter();
+ flusher.start(0);
+
+ ret = bucket.check_bad_index_multipart(op_state, flusher);
+ if (ret < 0)
+ return ret;
+
+ ret = bucket.check_object_index(op_state, flusher);
+ if (ret < 0)
+ return ret;
+
+ ret = bucket.check_index(op_state, existing_stats, calculated_stats);
+ if (ret < 0)
+ return ret;
+
+ dump_index_check(existing_stats, calculated_stats, formatter);
+ flusher.flush();
+
+ return 0;
+}
+
+int RGWBucketAdminOp::remove_bucket(RGWRados *store, RGWBucketAdminOpState& op_state,
+ bool bypass_gc, bool keep_index_consistent)
+{
+ RGWBucket bucket;
+
+ int ret = bucket.init(store, op_state);
+ if (ret < 0)
+ return ret;
+
+ std::string err_msg;
+ ret = bucket.remove(op_state, bypass_gc, keep_index_consistent, &err_msg);
+ if (!err_msg.empty()) {
+ lderr(store->ctx()) << "ERROR: " << err_msg << dendl;
+ }
+ return ret;
+}
+
+int RGWBucketAdminOp::remove_object(RGWRados *store, RGWBucketAdminOpState& op_state)
+{
+ RGWBucket bucket;
+
+ int ret = bucket.init(store, op_state);
+ if (ret < 0)
+ return ret;
+
+ return bucket.remove_object(op_state);
+}
+
+static int bucket_stats(RGWRados *store, const std::string& tenant_name, const std::string& bucket_name, Formatter *formatter)
+{
+ RGWBucketInfo bucket_info;
+ map<RGWObjCategory, RGWStorageStats> stats;
+
+ real_time mtime;
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+ int r = store->get_bucket_info(obj_ctx, tenant_name, bucket_name, bucket_info, &mtime);
+ if (r < 0) {
+ return r;
+ }
+
+ rgw_bucket& bucket = bucket_info.bucket;
+
+ string bucket_ver, master_ver;
+ string max_marker;
+ int ret = store->get_bucket_stats(bucket_info, RGW_NO_SHARD, &bucket_ver, &master_ver, stats, &max_marker);
+ if (ret < 0) {
+ cerr << "error getting bucket stats bucket=" << bucket.name << " ret=" << ret << std::endl;
+ return ret;
+ }
+
+ utime_t ut(mtime);
+
+ formatter->open_object_section("stats");
+ formatter->dump_string("bucket", bucket.name);
+ formatter->dump_int("num_shards", bucket_info.num_shards);
+ formatter->dump_string("tenant", bucket.tenant);
+ formatter->dump_string("zonegroup", bucket_info.zonegroup);
+ formatter->dump_string("placement_rule", bucket_info.placement_rule.to_str());
+ ::encode_json("explicit_placement", bucket.explicit_placement, formatter);
+ formatter->dump_string("id", bucket.bucket_id);
+ formatter->dump_string("marker", bucket.marker);
+ formatter->dump_stream("index_type") << bucket_info.index_type;
+ ::encode_json("owner", bucket_info.owner, formatter);
+ formatter->dump_string("ver", bucket_ver);
+ formatter->dump_string("master_ver", master_ver);
+ ut.gmtime(formatter->dump_stream("mtime"));
+ formatter->dump_string("max_marker", max_marker);
+ dump_bucket_usage(stats, formatter);
+ encode_json("bucket_quota", bucket_info.quota, formatter);
+ formatter->close_section();
+
+ return 0;
+}
+
+int RGWBucketAdminOp::limit_check(RGWRados *store,
+ RGWBucketAdminOpState& op_state,
+ const std::list<std::string>& user_ids,
+ RGWFormatterFlusher& flusher,
+ bool warnings_only)
+{
+ int ret = 0;
+ const size_t max_entries =
+ store->ctx()->_conf->rgw_list_buckets_max_chunk;
+
+ const size_t safe_max_objs_per_shard =
+ store->ctx()->_conf->rgw_safe_max_objects_per_shard;
+
+ uint16_t shard_warn_pct =
+ store->ctx()->_conf->rgw_shard_warning_threshold;
+ if (shard_warn_pct > 100)
+ shard_warn_pct = 90;
+
+ Formatter *formatter = flusher.get_formatter();
+ flusher.start(0);
+
+ formatter->open_array_section("users");
+
+ for (const auto& user_id : user_ids) {
+
+ formatter->open_object_section("user");
+ formatter->dump_string("user_id", user_id);
+ formatter->open_array_section("buckets");
+
+ string marker;
+ bool is_truncated{false};
+ do {
+ RGWUserBuckets buckets;
+
+ ret = rgw_read_user_buckets(store, user_id, buckets,
+ marker, string(), max_entries, false,
+ &is_truncated);
+ if (ret < 0)
+ return ret;
+
+ map<string, RGWBucketEnt>& m_buckets = buckets.get_buckets();
+
+ for (const auto& iter : m_buckets) {
+ auto& bucket = iter.second.bucket;
+ uint32_t num_shards = 1;
+ uint64_t num_objects = 0;
+
+ /* need info for num_shards */
+ RGWBucketInfo info;
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+
+ marker = bucket.name; /* Casey's location for marker update,
+ * as we may now not reach the end of
+ * the loop body */
+
+ ret = store->get_bucket_info(obj_ctx, bucket.tenant, bucket.name,
+ info, nullptr);
+ if (ret < 0)
+ continue;
+
+ /* need stats for num_entries */
+ string bucket_ver, master_ver;
+ std::map<RGWObjCategory, RGWStorageStats> stats;
+ ret = store->get_bucket_stats(info, RGW_NO_SHARD, &bucket_ver,
+ &master_ver, stats, nullptr);
+
+ if (ret < 0)
+ continue;
+
+ for (const auto& s : stats) {
+ num_objects += s.second.num_objects;
+ }
+
+ num_shards = info.num_shards;
+ uint64_t objs_per_shard =
+ (num_shards) ? num_objects/num_shards : num_objects;
+ {
+ bool warn;
+ stringstream ss;
+ uint64_t fill_pct = objs_per_shard * 100 / safe_max_objs_per_shard;
+ if (fill_pct > 100) {
+ ss << "OVER " << fill_pct << "%";
+ warn = true;
+ } else if (fill_pct >= shard_warn_pct) {
+ ss << "WARN " << fill_pct << "%";
+ warn = true;
+ } else {
+ ss << "OK";
+ warn = false;
+ }
+
+ if (warn || !warnings_only) {
+ formatter->open_object_section("bucket");
+ formatter->dump_string("bucket", bucket.name);
+ formatter->dump_string("tenant", bucket.tenant);
+ formatter->dump_int("num_objects", num_objects);
+ formatter->dump_int("num_shards", num_shards);
+ formatter->dump_int("objects_per_shard", objs_per_shard);
+ formatter->dump_string("fill_status", ss.str());
+ formatter->close_section();
+ }
+ }
+ }
+ formatter->flush(cout);
+ } while (is_truncated); /* foreach: bucket */
+
+ formatter->close_section();
+ formatter->close_section();
+ formatter->flush(cout);
+
+ } /* foreach: user_id */
+
+ formatter->close_section();
+ formatter->flush(cout);
+
+ return ret;
+} /* RGWBucketAdminOp::limit_check */
+
+int RGWBucketAdminOp::info(RGWRados *store, RGWBucketAdminOpState& op_state,
+ RGWFormatterFlusher& flusher)
+{
+ RGWBucket bucket;
+ int ret = 0;
+ const std::string& bucket_name = op_state.get_bucket_name();
+ if (!bucket_name.empty()) {
+ ret = bucket.init(store, op_state);
+ if (-ENOENT == ret)
+ return -ERR_NO_SUCH_BUCKET;
+ else if (ret < 0)
+ return ret;
+ }
+
+ Formatter *formatter = flusher.get_formatter();
+ flusher.start(0);
+
+ CephContext *cct = store->ctx();
+
+ const size_t max_entries = cct->_conf->rgw_list_buckets_max_chunk;
+
+ const bool show_stats = op_state.will_fetch_stats();
+ const rgw_user& user_id = op_state.get_user_id();
+ if (op_state.is_user_op()) {
+ formatter->open_array_section("buckets");
+
+ RGWUserBuckets buckets;
+ string marker;
+ const std::string empty_end_marker;
+ constexpr bool no_need_stats = false; // set need_stats to false
+
+ bool is_truncated = false;
+ do {
+ buckets.clear();
+ ret = rgw_read_user_buckets(store, op_state.get_user_id(), buckets,
+ marker, empty_end_marker, max_entries, no_need_stats,
+ &is_truncated);
+ if (ret < 0) {
+ return ret;
+ }
+
+ const std::string* marker_cursor = nullptr;
+ map<string, RGWBucketEnt>& m = buckets.get_buckets();
+
+ for (const auto& i : m) {
+ const std::string& obj_name = i.first;
+ if (!bucket_name.empty() && bucket_name != obj_name) {
+ continue;
+ }
+
+ if (show_stats) {
+ bucket_stats(store, user_id.tenant, obj_name, formatter);
+ } else {
+ formatter->dump_string("bucket", obj_name);
+ }
+
+ marker_cursor = &obj_name;
+ } // for loop
+ if (marker_cursor) {
+ marker = *marker_cursor;
+ }
+
+ flusher.flush();
+ } while (is_truncated);
+
+ formatter->close_section();
+ } else if (!bucket_name.empty()) {
+ ret = bucket_stats(store, user_id.tenant, bucket_name, formatter);
+ if (ret < 0) {
+ return ret;
+ }
+ } else {
+ void *handle = nullptr;
+ bool truncated = true;
+
+ formatter->open_array_section("buckets");
+ ret = store->meta_mgr->list_keys_init("bucket", &handle);
+ while (ret == 0 && truncated) {
+ std::list<std::string> buckets;
+ constexpr int max_keys = 1000;
+ ret = store->meta_mgr->list_keys_next(handle, max_keys, buckets,
+ &truncated);
+ for (auto& bucket_name : buckets) {
+ if (show_stats) {
+ bucket_stats(store, user_id.tenant, bucket_name, formatter);
+ } else {
+ formatter->dump_string("bucket", bucket_name);
+ }
+ }
+ }
+ store->meta_mgr->list_keys_complete(handle);
+
+ formatter->close_section();
+ }
+
+ flusher.flush();
+
+ return 0;
+}
+
+int RGWBucketAdminOp::set_quota(RGWRados *store, RGWBucketAdminOpState& op_state)
+{
+ RGWBucket bucket;
+
+ int ret = bucket.init(store, op_state);
+ if (ret < 0)
+ return ret;
+ return bucket.set_quota(op_state);
+}
+
+static int purge_bucket_instance(RGWRados *store, const RGWBucketInfo& bucket_info)
+{
+ int max_shards = (bucket_info.num_shards > 0 ? bucket_info.num_shards : 1);
+ for (int i = 0; i < max_shards; i++) {
+ RGWRados::BucketShard bs(store);
+ int shard_id = (bucket_info.num_shards > 0 ? i : -1);
+ int ret = bs.init(bucket_info.bucket, shard_id, nullptr);
+ if (ret < 0) {
+ cerr << "ERROR: bs.init(bucket=" << bucket_info.bucket << ", shard=" << shard_id
+ << "): " << cpp_strerror(-ret) << std::endl;
+ return ret;
+ }
+ ret = store->bi_remove(bs);
+ if (ret < 0) {
+ cerr << "ERROR: failed to remove bucket index object: "
+ << cpp_strerror(-ret) << std::endl;
+ return ret;
+ }
+ }
+ return 0;
+}
+
+inline auto split_tenant(const std::string& bucket_name){
+ auto p = bucket_name.find('/');
+ if(p != std::string::npos) {
+ return std::make_pair(bucket_name.substr(0,p), bucket_name.substr(p+1));
+ }
+ return std::make_pair(std::string(), bucket_name);
+}
+
+using bucket_instance_ls = std::vector<RGWBucketInfo>;
+void get_stale_instances(RGWRados *store, const std::string& bucket_name,
+ const vector<std::string>& lst,
+ bucket_instance_ls& stale_instances)
+{
+
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+
+ bucket_instance_ls other_instances;
+// first iterate over the entries, and pick up the done buckets; these
+// are guaranteed to be stale
+ for (const auto& bucket_instance : lst){
+ RGWBucketInfo binfo;
+ int r = store->get_bucket_instance_info(obj_ctx, bucket_instance,
+ binfo, nullptr,nullptr);
+ if (r < 0){
+ // this can only happen if someone deletes us right when we're processing
+ lderr(store->ctx()) << "Bucket instance is invalid: " << bucket_instance
+ << cpp_strerror(-r) << dendl;
+ continue;
+ }
+ if (binfo.reshard_status == CLS_RGW_RESHARD_DONE)
+ stale_instances.emplace_back(std::move(binfo));
+ else {
+ other_instances.emplace_back(std::move(binfo));
+ }
+ }
+
+ // Read the cur bucket info, if the bucket doesn't exist we can simply return
+ // all the instances
+ auto [tenant, bucket] = split_tenant(bucket_name);
+ RGWBucketInfo cur_bucket_info;
+ int r = store->get_bucket_info(obj_ctx, tenant, bucket, cur_bucket_info, nullptr);
+ if (r < 0) {
+ if (r == -ENOENT) {
+ // bucket doesn't exist, everything is stale then
+ stale_instances.insert(std::end(stale_instances),
+ std::make_move_iterator(other_instances.begin()),
+ std::make_move_iterator(other_instances.end()));
+ } else {
+ // all bets are off if we can't read the bucket, just return the sureshot stale instances
+ lderr(store->ctx()) << "error: reading bucket info for bucket: "
+ << bucket << cpp_strerror(-r) << dendl;
+ }
+ return;
+ }
+
+ // Don't process further in this round if bucket is resharding
+ if (cur_bucket_info.reshard_status == CLS_RGW_RESHARD_IN_PROGRESS)
+ return;
+
+ other_instances.erase(std::remove_if(other_instances.begin(), other_instances.end(),
+ [&cur_bucket_info](const RGWBucketInfo& b){
+ return (b.bucket.bucket_id == cur_bucket_info.bucket.bucket_id ||
+ b.bucket.bucket_id == cur_bucket_info.new_bucket_instance_id);
+ }),
+ other_instances.end());
+
+ // check if there are still instances left
+ if (other_instances.empty()) {
+ return;
+ }
+
+ // Now we have a bucket with instances where the reshard status is none, this
+ // usually happens when the reshard process couldn't complete, lockdown the
+ // bucket and walk through these instances to make sure no one else interferes
+ // with these
+ {
+ RGWBucketReshardLock reshard_lock(store, cur_bucket_info, true);
+ r = reshard_lock.lock();
+ if (r < 0) {
+ // most likely bucket is under reshard, return the sureshot stale instances
+ ldout(store->ctx(), 5) << __func__
+ << "failed to take reshard lock; reshard underway likey" << dendl;
+ return;
+ }
+ auto sg = make_scope_guard([&reshard_lock](){ reshard_lock.unlock();} );
+ // this should be fast enough that we may not need to renew locks and check
+ // exit status?, should we read the values of the instances again?
+ stale_instances.insert(std::end(stale_instances),
+ std::make_move_iterator(other_instances.begin()),
+ std::make_move_iterator(other_instances.end()));
+ }
+
+ return;
+}
+
+static int process_stale_instances(RGWRados *store, RGWBucketAdminOpState& op_state,
+ RGWFormatterFlusher& flusher,
+ std::function<void(const bucket_instance_ls&,
+ Formatter *,
+ RGWRados*)> process_f)
+{
+ std::string marker;
+ void *handle;
+ Formatter *formatter = flusher.get_formatter();
+ static constexpr auto default_max_keys = 1000;
+
+ int ret = store->meta_mgr->list_keys_init("bucket.instance", marker, &handle);
+ if (ret < 0) {
+ cerr << "ERROR: can't get key: " << cpp_strerror(-ret) << std::endl;
+ return ret;
+ }
+
+ bool truncated;
+
+ formatter->open_array_section("keys");
+ auto g = make_scope_guard([&store, &handle, &formatter]() {
+ store->meta_mgr->list_keys_complete(handle);
+ formatter->close_section(); // keys
+ formatter->flush(cout);
+ });
+
+ do {
+ list<std::string> keys;
+
+ ret = store->meta_mgr->list_keys_next(handle, default_max_keys, keys, &truncated);
+ if (ret < 0 && ret != -ENOENT) {
+ cerr << "ERROR: lists_keys_next(): " << cpp_strerror(-ret) << std::endl;
+ return ret;
+ } if (ret != -ENOENT) {
+ // partition the list of buckets by buckets as the listing is un sorted,
+ // since it would minimize the reads to bucket_info
+ std::unordered_map<std::string, std::vector<std::string>> bucket_instance_map;
+ for (auto &key: keys) {
+ auto pos = key.find(':');
+ if(pos != std::string::npos)
+ bucket_instance_map[key.substr(0,pos)].emplace_back(std::move(key));
+ }
+ for (const auto& kv: bucket_instance_map) {
+ bucket_instance_ls stale_lst;
+ get_stale_instances(store, kv.first, kv.second, stale_lst);
+ process_f(stale_lst, formatter, store);
+ }
+ }
+ } while (truncated);
+
+ return 0;
+}
+
+int RGWBucketAdminOp::list_stale_instances(RGWRados *store,
+ RGWBucketAdminOpState& op_state,
+ RGWFormatterFlusher& flusher)
+{
+ auto process_f = [](const bucket_instance_ls& lst,
+ Formatter *formatter,
+ RGWRados*){
+ for (const auto& binfo: lst)
+ formatter->dump_string("key", binfo.bucket.get_key());
+ };
+ return process_stale_instances(store, op_state, flusher, process_f);
+}
+
+
+int RGWBucketAdminOp::clear_stale_instances(RGWRados *store,
+ RGWBucketAdminOpState& op_state,
+ RGWFormatterFlusher& flusher)
+{
+ auto process_f = [](const bucket_instance_ls& lst,
+ Formatter *formatter,
+ RGWRados *store){
+ for (const auto &binfo: lst) {
+ int ret = purge_bucket_instance(store, binfo);
+ if (ret == 0){
+ auto md_key = "bucket.instance:" + binfo.bucket.get_key();
+ ret = store->meta_mgr->remove(md_key);
+ }
+ formatter->open_object_section("delete_status");
+ formatter->dump_string("bucket_instance", binfo.bucket.get_key());
+ formatter->dump_int("status", -ret);
+ formatter->close_section();
+ }
+ };
+
+ return process_stale_instances(store, op_state, flusher, process_f);
+}
+
+static int fix_single_bucket_lc(RGWRados *store,
+ const std::string& tenant_name,
+ const std::string& bucket_name)
+{
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+ RGWBucketInfo bucket_info;
+ map <std::string, bufferlist> bucket_attrs;
+ int ret = store->get_bucket_info(obj_ctx, tenant_name, bucket_name,
+ bucket_info, nullptr, &bucket_attrs);
+ if (ret < 0) {
+ // TODO: Should we handle the case where the bucket could've been removed between
+ // listing and fetching?
+ return ret;
+ }
+
+ return rgw::lc::fix_lc_shard_entry(store, bucket_info, bucket_attrs);
+}
+
+static void format_lc_status(Formatter* formatter,
+ const std::string& tenant_name,
+ const std::string& bucket_name,
+ int status)
+{
+ formatter->open_object_section("bucket_entry");
+ std::string entry = tenant_name.empty() ? bucket_name : tenant_name + "/" + bucket_name;
+ formatter->dump_string("bucket", entry);
+ formatter->dump_int("status", status);
+ formatter->close_section(); // bucket_entry
+}
+
+static void process_single_lc_entry(RGWRados *store, Formatter *formatter,
+ const std::string& tenant_name,
+ const std::string& bucket_name)
+{
+ int ret = fix_single_bucket_lc(store, tenant_name, bucket_name);
+ format_lc_status(formatter, tenant_name, bucket_name, -ret);
+}
+
+int RGWBucketAdminOp::fix_lc_shards(RGWRados *store,
+ RGWBucketAdminOpState& op_state,
+ RGWFormatterFlusher& flusher)
+{
+ std::string marker;
+ void *handle;
+ Formatter *formatter = flusher.get_formatter();
+ static constexpr auto default_max_keys = 1000;
+
+ bool truncated;
+ if (const std::string& bucket_name = op_state.get_bucket_name();
+ ! bucket_name.empty()) {
+ const rgw_user user_id = op_state.get_user_id();
+ process_single_lc_entry(store, formatter, user_id.tenant, bucket_name);
+ formatter->flush(cout);
+ } else {
+ int ret = store->meta_mgr->list_keys_init("bucket", marker, &handle);
+ if (ret < 0) {
+ std::cerr << "ERROR: can't get key: " << cpp_strerror(-ret) << std::endl;
+ return ret;
+ }
+
+ {
+ formatter->open_array_section("lc_fix_status");
+ auto sg = make_scope_guard([&store, &handle, &formatter](){
+ store->meta_mgr->list_keys_complete(handle);
+ formatter->close_section(); // lc_fix_status
+ formatter->flush(cout);
+ });
+ do {
+ list<std::string> keys;
+ ret = store->meta_mgr->list_keys_next(handle, default_max_keys, keys, &truncated);
+ if (ret < 0 && ret != -ENOENT) {
+ std::cerr << "ERROR: lists_keys_next(): " << cpp_strerror(-ret) << std::endl;
+ return ret;
+ } if (ret != -ENOENT) {
+ for (const auto &key:keys) {
+ auto [tenant_name, bucket_name] = split_tenant(key);
+ process_single_lc_entry(store, formatter, tenant_name, bucket_name);
+ }
+ }
+ formatter->flush(cout); // regularly flush every 1k entries
+ } while (truncated);
+ }
+
+ }
+ return 0;
+
+}
+
+static bool has_object_expired(RGWRados *store, const RGWBucketInfo& bucket_info,
+ const rgw_obj_key& key, utime_t& delete_at)
+{
+ rgw_obj obj(bucket_info.bucket, key);
+ bufferlist delete_at_bl;
+
+ int ret = rgw_object_get_attr(store, bucket_info, obj, RGW_ATTR_DELETE_AT, delete_at_bl);
+ if (ret < 0) {
+ return false; // no delete at attr, proceed
+ }
+
+ ret = decode_bl(delete_at_bl, delete_at);
+ if (ret < 0) {
+ return false; // failed to parse
+ }
+
+ if (delete_at <= ceph_clock_now() && !delete_at.is_zero()) {
+ return true;
+ }
+
+ return false;
+}
+
+static int fix_bucket_obj_expiry(RGWRados *store, const RGWBucketInfo& bucket_info,
+ RGWFormatterFlusher& flusher, bool dry_run)
+{
+ if (bucket_info.bucket.bucket_id == bucket_info.bucket.marker) {
+ lderr(store->ctx()) << "Not a resharded bucket skipping" << dendl;
+ return 0; // not a resharded bucket, move along
+ }
+
+ Formatter *formatter = flusher.get_formatter();
+ formatter->open_array_section("expired_deletion_status");
+ auto sg = make_scope_guard([&formatter] {
+ formatter->close_section();
+ formatter->flush(std::cout);
+ });
+
+ RGWRados::Bucket target(store, bucket_info);
+ RGWRados::Bucket::List list_op(&target);
+
+ list_op.params.list_versions = bucket_info.versioned();
+ list_op.params.allow_unordered = true;
+
+ constexpr auto max_objects = 1000;
+ bool is_truncated {false};
+ do {
+ std::vector<rgw_bucket_dir_entry> objs;
+
+ int ret = list_op.list_objects(max_objects, &objs, nullptr, &is_truncated);
+ if (ret < 0) {
+ lderr(store->ctx()) << "ERROR failed to list objects in the bucket" << dendl;
+ return ret;
+ }
+ for (const auto& obj : objs) {
+ rgw_obj_key key(obj.key);
+ utime_t delete_at;
+ if (has_object_expired(store, bucket_info, key, delete_at)) {
+ formatter->open_object_section("object_status");
+ formatter->dump_string("object", key.name);
+ formatter->dump_stream("delete_at") << delete_at;
+
+ if (!dry_run) {
+ ret = rgw_remove_object(store, bucket_info, bucket_info.bucket, key);
+ formatter->dump_int("status", ret);
+ }
+
+ formatter->close_section(); // object_status
+ }
+ }
+ formatter->flush(cout); // regularly flush every 1k entries
+ } while (is_truncated);
+
+ return 0;
+}
+
+int RGWBucketAdminOp::fix_obj_expiry(RGWRados *store, RGWBucketAdminOpState& op_state,
+ RGWFormatterFlusher& flusher, bool dry_run)
+{
+ RGWBucket admin_bucket;
+ int ret = admin_bucket.init(store, op_state);
+ if (ret < 0) {
+ lderr(store->ctx()) << "failed to initialize bucket" << dendl;
+ return ret;
+ }
+
+ return fix_bucket_obj_expiry(store, admin_bucket.get_bucket_info(), flusher, dry_run);
+}
+
+void rgw_data_change::dump(Formatter *f) const
+{
+ string type;
+ switch (entity_type) {
+ case ENTITY_TYPE_BUCKET:
+ type = "bucket";
+ break;
+ default:
+ type = "unknown";
+ }
+ encode_json("entity_type", type, f);
+ encode_json("key", key, f);
+ utime_t ut(timestamp);
+ encode_json("timestamp", ut, f);
+}
+
+void rgw_data_change::decode_json(JSONObj *obj) {
+ string s;
+ JSONDecoder::decode_json("entity_type", s, obj);
+ if (s == "bucket") {
+ entity_type = ENTITY_TYPE_BUCKET;
+ } else {
+ entity_type = ENTITY_TYPE_UNKNOWN;
+ }
+ JSONDecoder::decode_json("key", key, obj);
+ utime_t ut;
+ JSONDecoder::decode_json("timestamp", ut, obj);
+ timestamp = ut.to_real_time();
+}
+
+void rgw_data_change_log_entry::dump(Formatter *f) const
+{
+ encode_json("log_id", log_id, f);
+ utime_t ut(log_timestamp);
+ encode_json("log_timestamp", ut, f);
+ encode_json("entry", entry, f);
+}
+
+void rgw_data_change_log_entry::decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("log_id", log_id, obj);
+ utime_t ut;
+ JSONDecoder::decode_json("log_timestamp", ut, obj);
+ log_timestamp = ut.to_real_time();
+ JSONDecoder::decode_json("entry", entry, obj);
+}
+
+int RGWDataChangesLog::choose_oid(const rgw_bucket_shard& bs) {
+ const string& name = bs.bucket.name;
+ int shard_shift = (bs.shard_id > 0 ? bs.shard_id : 0);
+ uint32_t r = (ceph_str_hash_linux(name.c_str(), name.size()) + shard_shift) % num_shards;
+
+ return (int)r;
+}
+
+int RGWDataChangesLog::renew_entries()
+{
+ if (!store->svc.zone->need_to_log_data())
+ return 0;
+
+ /* we can't keep the bucket name as part of the cls_log_entry, and we need
+ * it later, so we keep two lists under the map */
+ map<int, pair<list<rgw_bucket_shard>, list<cls_log_entry> > > m;
+
+ lock.Lock();
+ map<rgw_bucket_shard, bool> entries;
+ entries.swap(cur_cycle);
+ lock.Unlock();
+
+ map<rgw_bucket_shard, bool>::iterator iter;
+ string section;
+ real_time ut = real_clock::now();
+ for (iter = entries.begin(); iter != entries.end(); ++iter) {
+ const rgw_bucket_shard& bs = iter->first;
+
+ int index = choose_oid(bs);
+
+ cls_log_entry entry;
+
+ rgw_data_change change;
+ bufferlist bl;
+ change.entity_type = ENTITY_TYPE_BUCKET;
+ change.key = bs.get_key();
+ change.timestamp = ut;
+ encode(change, bl);
+
+ store->time_log_prepare_entry(entry, ut, section, change.key, bl);
+
+ m[index].first.push_back(bs);
+ m[index].second.emplace_back(std::move(entry));
+ }
+
+ map<int, pair<list<rgw_bucket_shard>, list<cls_log_entry> > >::iterator miter;
+ for (miter = m.begin(); miter != m.end(); ++miter) {
+ list<cls_log_entry>& entries = miter->second.second;
+
+ real_time now = real_clock::now();
+
+ int ret = store->time_log_add(oids[miter->first], entries, NULL);
+ if (ret < 0) {
+ /* we don't really need to have a special handling for failed cases here,
+ * as this is just an optimization. */
+ lderr(cct) << "ERROR: store->time_log_add() returned " << ret << dendl;
+ return ret;
+ }
+
+ real_time expiration = now;
+ expiration += make_timespan(cct->_conf->rgw_data_log_window);
+
+ list<rgw_bucket_shard>& buckets = miter->second.first;
+ list<rgw_bucket_shard>::iterator liter;
+ for (liter = buckets.begin(); liter != buckets.end(); ++liter) {
+ update_renewed(*liter, expiration);
+ }
+ }
+
+ return 0;
+}
+
+void RGWDataChangesLog::_get_change(const rgw_bucket_shard& bs, ChangeStatusPtr& status)
+{
+ ceph_assert(lock.is_locked());
+ if (!changes.find(bs, status)) {
+ status = ChangeStatusPtr(new ChangeStatus);
+ changes.add(bs, status);
+ }
+}
+
+void RGWDataChangesLog::register_renew(rgw_bucket_shard& bs)
+{
+ Mutex::Locker l(lock);
+ cur_cycle[bs] = true;
+}
+
+void RGWDataChangesLog::update_renewed(rgw_bucket_shard& bs, real_time& expiration)
+{
+ Mutex::Locker l(lock);
+ ChangeStatusPtr status;
+ _get_change(bs, status);
+
+ ldout(cct, 20) << "RGWDataChangesLog::update_renewd() bucket_name=" << bs.bucket.name << " shard_id=" << bs.shard_id << " expiration=" << expiration << dendl;
+ status->cur_expiration = expiration;
+}
+
+int RGWDataChangesLog::get_log_shard_id(rgw_bucket& bucket, int shard_id) {
+ rgw_bucket_shard bs(bucket, shard_id);
+
+ return choose_oid(bs);
+}
+
+int RGWDataChangesLog::add_entry(rgw_bucket& bucket, int shard_id) {
+ if (!store->svc.zone->need_to_log_data())
+ return 0;
+
+ if (observer) {
+ observer->on_bucket_changed(bucket.get_key());
+ }
+
+ rgw_bucket_shard bs(bucket, shard_id);
+
+ int index = choose_oid(bs);
+ mark_modified(index, bs);
+
+ lock.Lock();
+
+ ChangeStatusPtr status;
+ _get_change(bs, status);
+
+ lock.Unlock();
+
+ real_time now = real_clock::now();
+
+ status->lock->Lock();
+
+ ldout(cct, 20) << "RGWDataChangesLog::add_entry() bucket.name=" << bucket.name << " shard_id=" << shard_id << " now=" << now << " cur_expiration=" << status->cur_expiration << dendl;
+
+ if (now < status->cur_expiration) {
+ /* no need to send, recently completed */
+ status->lock->Unlock();
+
+ register_renew(bs);
+ return 0;
+ }
+
+ RefCountedCond *cond;
+
+ if (status->pending) {
+ cond = status->cond;
+
+ ceph_assert(cond);
+
+ status->cond->get();
+ status->lock->Unlock();
+
+ int ret = cond->wait();
+ cond->put();
+ if (!ret) {
+ register_renew(bs);
+ }
+ return ret;
+ }
+
+ status->cond = new RefCountedCond;
+ status->pending = true;
+
+ string& oid = oids[index];
+ real_time expiration;
+
+ int ret;
+
+ do {
+ status->cur_sent = now;
+
+ expiration = now;
+ expiration += ceph::make_timespan(cct->_conf->rgw_data_log_window);
+
+ status->lock->Unlock();
+
+ bufferlist bl;
+ rgw_data_change change;
+ change.entity_type = ENTITY_TYPE_BUCKET;
+ change.key = bs.get_key();
+ change.timestamp = now;
+ encode(change, bl);
+ string section;
+
+ ldout(cct, 20) << "RGWDataChangesLog::add_entry() sending update with now=" << now << " cur_expiration=" << expiration << dendl;
+
+ ret = store->time_log_add(oid, now, section, change.key, bl);
+
+ now = real_clock::now();
+
+ status->lock->Lock();
+
+ } while (!ret && real_clock::now() > expiration);
+
+ cond = status->cond;
+
+ status->pending = false;
+ status->cur_expiration = status->cur_sent; /* time of when operation started, not completed */
+ status->cur_expiration += make_timespan(cct->_conf->rgw_data_log_window);
+ status->cond = NULL;
+ status->lock->Unlock();
+
+ cond->done(ret);
+ cond->put();
+
+ return ret;
+}
+
+int RGWDataChangesLog::list_entries(int shard, const real_time& start_time, const real_time& end_time, int max_entries,
+ list<rgw_data_change_log_entry>& entries,
+ const string& marker,
+ string *out_marker,
+ bool *truncated) {
+ if (shard >= num_shards)
+ return -EINVAL;
+
+ list<cls_log_entry> log_entries;
+
+ int ret = store->time_log_list(oids[shard], start_time, end_time,
+ max_entries, log_entries, marker,
+ out_marker, truncated);
+ if (ret < 0)
+ return ret;
+
+ list<cls_log_entry>::iterator iter;
+ for (iter = log_entries.begin(); iter != log_entries.end(); ++iter) {
+ rgw_data_change_log_entry log_entry;
+ log_entry.log_id = iter->id;
+ real_time rt = iter->timestamp.to_real_time();
+ log_entry.log_timestamp = rt;
+ auto liter = iter->data.cbegin();
+ try {
+ decode(log_entry.entry, liter);
+ } catch (buffer::error& err) {
+ lderr(cct) << "ERROR: failed to decode data changes log entry" << dendl;
+ return -EIO;
+ }
+ entries.push_back(log_entry);
+ }
+
+ return 0;
+}
+
+int RGWDataChangesLog::list_entries(const real_time& start_time, const real_time& end_time, int max_entries,
+ list<rgw_data_change_log_entry>& entries, LogMarker& marker, bool *ptruncated) {
+ bool truncated;
+ entries.clear();
+
+ for (; marker.shard < num_shards && (int)entries.size() < max_entries;
+ marker.shard++, marker.marker.clear()) {
+ int ret = list_entries(marker.shard, start_time, end_time, max_entries - entries.size(), entries,
+ marker.marker, NULL, &truncated);
+ if (ret == -ENOENT) {
+ continue;
+ }
+ if (ret < 0) {
+ return ret;
+ }
+ if (truncated) {
+ *ptruncated = true;
+ return 0;
+ }
+ }
+
+ *ptruncated = (marker.shard < num_shards);
+
+ return 0;
+}
+
+int RGWDataChangesLog::get_info(int shard_id, RGWDataChangesLogInfo *info)
+{
+ if (shard_id >= num_shards)
+ return -EINVAL;
+
+ string oid = oids[shard_id];
+
+ cls_log_header header;
+
+ int ret = store->time_log_info(oid, &header);
+ if ((ret < 0) && (ret != -ENOENT))
+ return ret;
+
+ info->marker = header.max_marker;
+ info->last_update = header.max_time.to_real_time();
+
+ return 0;
+}
+
+int RGWDataChangesLog::trim_entries(int shard_id, const real_time& start_time, const real_time& end_time,
+ const string& start_marker, const string& end_marker)
+{
+ if (shard_id > num_shards)
+ return -EINVAL;
+
+ return store->time_log_trim(oids[shard_id], start_time, end_time,
+ start_marker, end_marker, nullptr);
+}
+
+int RGWDataChangesLog::lock_exclusive(int shard_id, timespan duration, string& zone_id, string& owner_id) {
+ return store->lock_exclusive(store->svc.zone->get_zone_params().log_pool, oids[shard_id], duration, zone_id, owner_id);
+}
+
+int RGWDataChangesLog::unlock(int shard_id, string& zone_id, string& owner_id) {
+ return store->unlock(store->svc.zone->get_zone_params().log_pool, oids[shard_id], zone_id, owner_id);
+}
+
+bool RGWDataChangesLog::going_down()
+{
+ return down_flag;
+}
+
+RGWDataChangesLog::~RGWDataChangesLog() {
+ down_flag = true;
+ renew_thread->stop();
+ renew_thread->join();
+ delete renew_thread;
+ delete[] oids;
+}
+
+void *RGWDataChangesLog::ChangesRenewThread::entry() {
+ do {
+ dout(2) << "RGWDataChangesLog::ChangesRenewThread: start" << dendl;
+ int r = log->renew_entries();
+ if (r < 0) {
+ dout(0) << "ERROR: RGWDataChangesLog::renew_entries returned error r=" << r << dendl;
+ }
+
+ if (log->going_down())
+ break;
+
+ int interval = cct->_conf->rgw_data_log_window * 3 / 4;
+ lock.Lock();
+ cond.WaitInterval(lock, utime_t(interval, 0));
+ lock.Unlock();
+ } while (!log->going_down());
+
+ return NULL;
+}
+
+void RGWDataChangesLog::ChangesRenewThread::stop()
+{
+ Mutex::Locker l(lock);
+ cond.Signal();
+}
+
+void RGWDataChangesLog::mark_modified(int shard_id, const rgw_bucket_shard& bs)
+{
+ auto key = bs.get_key();
+ modified_lock.get_read();
+ map<int, set<string> >::iterator iter = modified_shards.find(shard_id);
+ if (iter != modified_shards.end()) {
+ set<string>& keys = iter->second;
+ if (keys.find(key) != keys.end()) {
+ modified_lock.unlock();
+ return;
+ }
+ }
+ modified_lock.unlock();
+
+ RWLock::WLocker wl(modified_lock);
+ modified_shards[shard_id].insert(key);
+}
+
+void RGWDataChangesLog::read_clear_modified(map<int, set<string> > &modified)
+{
+ RWLock::WLocker wl(modified_lock);
+ modified.swap(modified_shards);
+ modified_shards.clear();
+}
+
+void RGWBucketCompleteInfo::dump(Formatter *f) const {
+ encode_json("bucket_info", info, f);
+ encode_json("attrs", attrs, f);
+}
+
+void RGWBucketCompleteInfo::decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("bucket_info", info, obj);
+ JSONDecoder::decode_json("attrs", attrs, obj);
+}
+
+class RGWBucketMetadataHandler : public RGWMetadataHandler {
+
+public:
+ string get_type() override { return "bucket"; }
+
+ int get(RGWRados *store, string& entry, RGWMetadataObject **obj) override {
+ RGWObjVersionTracker ot;
+ RGWBucketEntryPoint be;
+
+ real_time mtime;
+ map<string, bufferlist> attrs;
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+
+ string tenant_name, bucket_name;
+ parse_bucket(entry, &tenant_name, &bucket_name);
+ int ret = store->get_bucket_entrypoint_info(obj_ctx, tenant_name, bucket_name, be, &ot, &mtime, &attrs);
+ if (ret < 0)
+ return ret;
+
+ RGWBucketEntryMetadataObject *mdo = new RGWBucketEntryMetadataObject(be, ot.read_version, mtime);
+
+ *obj = mdo;
+
+ return 0;
+ }
+
+ int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker,
+ real_time mtime, JSONObj *obj, sync_type_t sync_type) override {
+ RGWBucketEntryPoint be, old_be;
+ try {
+ decode_json_obj(be, obj);
+ } catch (JSONDecoder::err& e) {
+ return -EINVAL;
+ }
+
+ real_time orig_mtime;
+ map<string, bufferlist> attrs;
+
+ RGWObjVersionTracker old_ot;
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+
+ string tenant_name, bucket_name;
+ parse_bucket(entry, &tenant_name, &bucket_name);
+ int ret = store->get_bucket_entrypoint_info(obj_ctx, tenant_name, bucket_name, old_be, &old_ot, &orig_mtime, &attrs);
+ if (ret < 0 && ret != -ENOENT)
+ return ret;
+
+ // are we actually going to perform this put, or is it too old?
+ if (ret != -ENOENT &&
+ !check_versions(old_ot.read_version, orig_mtime,
+ objv_tracker.write_version, mtime, sync_type)) {
+ return STATUS_NO_APPLY;
+ }
+
+ objv_tracker.read_version = old_ot.read_version; /* maintain the obj version we just read */
+
+ ret = store->put_bucket_entrypoint_info(tenant_name, bucket_name, be, false, objv_tracker, mtime, &attrs);
+ if (ret < 0)
+ return ret;
+
+ /* link bucket */
+ if (be.linked) {
+ ret = rgw_link_bucket(store, be.owner, be.bucket, be.creation_time, false);
+ } else {
+ ret = rgw_unlink_bucket(store, be.owner, be.bucket.tenant,
+ be.bucket.name, false);
+ }
+
+ return ret;
+ }
+
+ struct list_keys_info {
+ RGWRados *store;
+ RGWListRawObjsCtx ctx;
+ };
+
+ int remove(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker) override {
+ RGWBucketEntryPoint be;
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+
+ string tenant_name, bucket_name;
+ parse_bucket(entry, &tenant_name, &bucket_name);
+ int ret = store->get_bucket_entrypoint_info(obj_ctx, tenant_name, bucket_name, be, &objv_tracker, NULL, NULL);
+ if (ret < 0)
+ return ret;
+
+ /*
+ * We're unlinking the bucket but we don't want to update the entrypoint here - we're removing
+ * it immediately and don't want to invalidate our cached objv_version or the bucket obj removal
+ * will incorrectly fail.
+ */
+ ret = rgw_unlink_bucket(store, be.owner, tenant_name, bucket_name, false);
+ if (ret < 0) {
+ lderr(store->ctx()) << "could not unlink bucket=" << entry << " owner=" << be.owner << dendl;
+ }
+
+ ret = rgw_bucket_delete_bucket_obj(store, tenant_name, bucket_name, objv_tracker);
+ if (ret < 0) {
+ lderr(store->ctx()) << "could not delete bucket=" << entry << dendl;
+ }
+ /* idempotent */
+ return 0;
+ }
+
+ void get_pool_and_oid(RGWRados *store, const string& key, rgw_pool& pool, string& oid) override {
+ oid = key;
+ pool = store->svc.zone->get_zone_params().domain_root;
+ }
+
+ int list_keys_init(RGWRados *store, const string& marker, void **phandle) override {
+ auto info = std::make_unique<list_keys_info>();
+
+ info->store = store;
+
+ int ret = store->list_raw_objects_init(store->svc.zone->get_zone_params().domain_root, marker,
+ &info->ctx);
+ if (ret < 0) {
+ return ret;
+ }
+ *phandle = (void *)info.release();
+
+ return 0;
+ }
+
+ int list_keys_next(void *handle, int max, list<string>& keys, bool *truncated) override {
+ list_keys_info *info = static_cast<list_keys_info *>(handle);
+
+ string no_filter;
+
+ keys.clear();
+
+ RGWRados *store = info->store;
+
+ list<string> unfiltered_keys;
+
+ int ret = store->list_raw_objects_next(no_filter, max, info->ctx,
+ unfiltered_keys, truncated);
+ if (ret < 0 && ret != -ENOENT)
+ return ret;
+ if (ret == -ENOENT) {
+ if (truncated)
+ *truncated = false;
+ return 0;
+ }
+
+ // now filter out the system entries
+ list<string>::iterator iter;
+ for (iter = unfiltered_keys.begin(); iter != unfiltered_keys.end(); ++iter) {
+ string& k = *iter;
+
+ if (k[0] != '.') {
+ keys.push_back(k);
+ }
+ }
+
+ return 0;
+ }
+
+ void list_keys_complete(void *handle) override {
+ list_keys_info *info = static_cast<list_keys_info *>(handle);
+ delete info;
+ }
+
+ string get_marker(void *handle) override {
+ list_keys_info *info = static_cast<list_keys_info *>(handle);
+ return info->store->list_raw_objs_get_cursor(info->ctx);
+ }
+};
+
+void get_md5_digest(const RGWBucketEntryPoint *be, string& md5_digest) {
+
+ char md5[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 1];
+ unsigned char m[CEPH_CRYPTO_MD5_DIGESTSIZE];
+ bufferlist bl;
+
+ Formatter *f = new JSONFormatter(false);
+ be->dump(f);
+ f->flush(bl);
+
+ MD5 hash;
+ hash.Update((const unsigned char *)bl.c_str(), bl.length());
+ hash.Final(m);
+
+ buf_to_hex(m, CEPH_CRYPTO_MD5_DIGESTSIZE, md5);
+
+ delete f;
+
+ md5_digest = md5;
+}
+
+#define ARCHIVE_META_ATTR RGW_ATTR_PREFIX "zone.archive.info"
+
+struct archive_meta_info {
+ rgw_bucket orig_bucket;
+
+ bool from_attrs(CephContext *cct, map<string, bufferlist>& attrs) {
+ auto iter = attrs.find(ARCHIVE_META_ATTR);
+ if (iter == attrs.end()) {
+ return false;
+ }
+
+ auto bliter = iter->second.cbegin();
+ try {
+ decode(bliter);
+ } catch (buffer::error& err) {
+ ldout(cct, 0) << "ERROR: failed to decode archive meta info" << dendl;
+ return false;
+ }
+
+ return true;
+ }
+
+ void store_in_attrs(map<string, bufferlist>& attrs) const {
+ encode(attrs[ARCHIVE_META_ATTR]);
+ }
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(orig_bucket, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(orig_bucket, bl);
+ DECODE_FINISH(bl);
+ }
+};
+WRITE_CLASS_ENCODER(archive_meta_info)
+
+class RGWArchiveBucketMetadataHandler : public RGWBucketMetadataHandler {
+public:
+ int remove(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker) override {
+ ldout(store->ctx(), 5) << "SKIP: bucket removal is not allowed on archive zone: bucket:" << entry << " ... proceeding to rename" << dendl;
+
+ string tenant_name, bucket_name;
+ parse_bucket(entry, &tenant_name, &bucket_name);
+
+ real_time mtime;
+
+ /* read original entrypoint */
+
+ RGWBucketEntryPoint be;
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+ map<string, bufferlist> attrs;
+ int ret = store->get_bucket_entrypoint_info(obj_ctx, tenant_name, bucket_name, be, &objv_tracker, &mtime, &attrs);
+ if (ret < 0) {
+ return ret;
+ }
+
+ string meta_name = bucket_name + ":" + be.bucket.bucket_id;
+
+ /* read original bucket instance info */
+
+ map<string, bufferlist> attrs_m;
+ ceph::real_time orig_mtime;
+ RGWBucketInfo old_bi;
+
+ ret = store->get_bucket_instance_info(obj_ctx, be.bucket, old_bi, &orig_mtime, &attrs_m);
+ if (ret < 0) {
+ return ret;
+ }
+
+ archive_meta_info ami;
+
+ if (!ami.from_attrs(store->ctx(), attrs_m)) {
+ ami.orig_bucket = old_bi.bucket;
+ ami.store_in_attrs(attrs_m);
+ }
+
+ /* generate a new bucket instance. We could have avoided this if we could just point a new
+ * bucket entry point to the old bucket instance, however, due to limitation in the way
+ * we index buckets under the user, bucket entrypoint and bucket instance of the same
+ * bucket need to have the same name, so we need to copy the old bucket instance into
+ * to a new entry with the new name
+ */
+
+ string new_bucket_name;
+
+ RGWBucketInfo new_bi = old_bi;
+ RGWBucketEntryPoint new_be = be;
+
+ string md5_digest;
+
+ get_md5_digest(&new_be, md5_digest);
+ new_bucket_name = ami.orig_bucket.name + "-deleted-" + md5_digest;
+
+ new_bi.bucket.name = new_bucket_name;
+ new_bi.objv_tracker.clear();
+
+ new_be.bucket.name = new_bucket_name;
+
+ ret = store->put_bucket_instance_info(new_bi, false, orig_mtime, &attrs_m);
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "ERROR: failed to put new bucket instance info for bucket=" << new_bi.bucket << " ret=" << ret << dendl;
+ return ret;
+ }
+
+ /* store a new entrypoint */
+
+ RGWObjVersionTracker ot;
+ ot.generate_new_write_ver(store->ctx());
+
+ ret = store->put_bucket_entrypoint_info(tenant_name, new_bucket_name, new_be, true, ot, mtime, &attrs);
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "ERROR: failed to put new bucket entrypoint for bucket=" << new_be.bucket << " ret=" << ret << dendl;
+ return ret;
+ }
+
+ /* link new bucket */
+
+ ret = rgw_link_bucket(store, new_be.owner, new_be.bucket, new_be.creation_time, false);
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "ERROR: failed to link new bucket for bucket=" << new_be.bucket << " ret=" << ret << dendl;
+ return ret;
+ }
+
+ /* clean up old stuff */
+
+ ret = rgw_unlink_bucket(store, be.owner, tenant_name, bucket_name, false);
+ if (ret < 0) {
+ lderr(store->ctx()) << "could not unlink bucket=" << entry << " owner=" << be.owner << dendl;
+ }
+
+ // if (ret == -ECANCELED) it means that there was a race here, and someone
+ // wrote to the bucket entrypoint just before we removed it. The question is
+ // whether it was a newly created bucket entrypoint ... in which case we
+ // should ignore the error and move forward, or whether it is a higher version
+ // of the same bucket instance ... in which we should retry
+ ret = rgw_bucket_delete_bucket_obj(store, tenant_name, bucket_name, objv_tracker);
+ if (ret < 0) {
+ lderr(store->ctx()) << "could not delete bucket=" << entry << dendl;
+ }
+
+ ret = rgw_delete_system_obj(store, store->svc.zone->get_zone_params().domain_root, RGW_BUCKET_INSTANCE_MD_PREFIX + meta_name, NULL);
+
+ /* idempotent */
+
+ return 0;
+ }
+
+ int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker,
+ real_time mtime, JSONObj *obj, sync_type_t sync_type) override {
+ if (entry.find("-deleted-") != string::npos) {
+ RGWObjVersionTracker ot;
+ RGWMetadataObject *robj;
+ int ret = get(store, entry, &robj);
+ if (ret != -ENOENT) {
+ if (ret < 0) {
+ return ret;
+ }
+ ot.read_version = robj->get_version();
+ delete robj;
+
+ ret = remove(store, entry, ot);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+ }
+
+ return RGWBucketMetadataHandler::put(store, entry, objv_tracker,
+ mtime, obj, sync_type);
+ }
+
+};
+
+class RGWBucketInstanceMetadataHandler : public RGWMetadataHandler {
+
+public:
+ string get_type() override { return "bucket.instance"; }
+
+ int get(RGWRados *store, string& oid, RGWMetadataObject **obj) override {
+ RGWBucketCompleteInfo bci;
+
+ real_time mtime;
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+
+ int ret = store->get_bucket_instance_info(obj_ctx, oid, bci.info, &mtime, &bci.attrs);
+ if (ret < 0)
+ return ret;
+
+ RGWBucketInstanceMetadataObject *mdo = new RGWBucketInstanceMetadataObject(bci, bci.info.objv_tracker.read_version, mtime);
+
+ *obj = mdo;
+
+ return 0;
+ }
+
+ int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker,
+ real_time mtime, JSONObj *obj, sync_type_t sync_type) override {
+ RGWBucketCompleteInfo bci, old_bci;
+ try {
+ decode_json_obj(bci, obj);
+ } catch (JSONDecoder::err& e) {
+ return -EINVAL;
+ }
+
+ real_time orig_mtime;
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+
+ int ret = store->get_bucket_instance_info(obj_ctx, entry, old_bci.info,
+ &orig_mtime, &old_bci.attrs);
+ bool exists = (ret != -ENOENT);
+ if (ret < 0 && exists)
+ return ret;
+
+ if (!exists || old_bci.info.bucket.bucket_id != bci.info.bucket.bucket_id) {
+ /* a new bucket, we need to select a new bucket placement for it */
+ auto key(entry);
+ rgw_bucket_instance_oid_to_key(key);
+ string tenant_name;
+ string bucket_name;
+ string bucket_instance;
+ parse_bucket(key, &tenant_name, &bucket_name, &bucket_instance);
+
+ RGWZonePlacementInfo rule_info;
+ bci.info.bucket.name = bucket_name;
+ bci.info.bucket.bucket_id = bucket_instance;
+ bci.info.bucket.tenant = tenant_name;
+ ret = store->svc.zone->select_bucket_location_by_rule(bci.info.placement_rule, &rule_info);
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "ERROR: select_bucket_placement() returned " << ret << dendl;
+ return ret;
+ }
+ bci.info.index_type = rule_info.index_type;
+ } else {
+ /* existing bucket, keep its placement */
+ bci.info.bucket.explicit_placement = old_bci.info.bucket.explicit_placement;
+ bci.info.placement_rule = old_bci.info.placement_rule;
+ }
+
+ if (exists && old_bci.info.datasync_flag_enabled() != bci.info.datasync_flag_enabled()) {
+ int shards_num = bci.info.num_shards? bci.info.num_shards : 1;
+ int shard_id = bci.info.num_shards? 0 : -1;
+
+ if (!bci.info.datasync_flag_enabled()) {
+ ret = store->stop_bi_log_entries(bci.info, -1);
+ if (ret < 0) {
+ lderr(store->ctx()) << "ERROR: failed writing bilog" << dendl;
+ return ret;
+ }
+ } else {
+ ret = store->resync_bi_log_entries(bci.info, -1);
+ if (ret < 0) {
+ lderr(store->ctx()) << "ERROR: failed writing bilog" << dendl;
+ return ret;
+ }
+ }
+
+ for (int i = 0; i < shards_num; ++i, ++shard_id) {
+ ret = store->data_log->add_entry(bci.info.bucket, shard_id);
+ if (ret < 0) {
+ lderr(store->ctx()) << "ERROR: failed writing data log" << dendl;
+ return ret;
+ }
+ }
+ }
+
+ // are we actually going to perform this put, or is it too old?
+ if (exists &&
+ !check_versions(old_bci.info.objv_tracker.read_version, orig_mtime,
+ objv_tracker.write_version, mtime, sync_type)) {
+ objv_tracker.read_version = old_bci.info.objv_tracker.read_version;
+ return STATUS_NO_APPLY;
+ }
+
+ /* record the read version (if any), store the new version */
+ bci.info.objv_tracker.read_version = old_bci.info.objv_tracker.read_version;
+ bci.info.objv_tracker.write_version = objv_tracker.write_version;
+
+ ret = store->put_bucket_instance_info(bci.info, false, mtime, &bci.attrs);
+ if (ret < 0)
+ return ret;
+
+ objv_tracker = bci.info.objv_tracker;
+
+ ret = store->init_bucket_index(bci.info, bci.info.num_shards);
+ if (ret < 0)
+ return ret;
+
+ return STATUS_APPLIED;
+ }
+
+ struct list_keys_info {
+ RGWRados *store;
+ RGWListRawObjsCtx ctx;
+ };
+
+ int remove(RGWRados *store, string& entry,
+ RGWObjVersionTracker& objv_tracker) override {
+ RGWBucketInfo info;
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+
+ int ret = store->get_bucket_instance_info(obj_ctx, entry, info, NULL, NULL);
+ if (ret < 0 && ret != -ENOENT)
+ return ret;
+
+ return rgw_bucket_instance_remove_entry(store, entry,
+ &info.objv_tracker);
+ }
+
+ void get_pool_and_oid(RGWRados *store, const string& key, rgw_pool& pool, string& oid) override {
+ oid = RGW_BUCKET_INSTANCE_MD_PREFIX + key;
+ rgw_bucket_instance_key_to_oid(oid);
+ pool = store->svc.zone->get_zone_params().domain_root;
+ }
+
+ int list_keys_init(RGWRados *store, const string& marker, void **phandle) override {
+ auto info = std::make_unique<list_keys_info>();
+
+ info->store = store;
+
+ int ret = store->list_raw_objects_init(store->svc.zone->get_zone_params().domain_root, marker,
+ &info->ctx);
+ if (ret < 0) {
+ return ret;
+ }
+ *phandle = (void *)info.release();
+
+ return 0;
+ }
+
+ int list_keys_next(void *handle, int max, list<string>& keys, bool *truncated) override {
+ list_keys_info *info = static_cast<list_keys_info *>(handle);
+
+ string no_filter;
+
+ keys.clear();
+
+ RGWRados *store = info->store;
+
+ list<string> unfiltered_keys;
+
+ int ret = store->list_raw_objects_next(no_filter, max, info->ctx,
+ unfiltered_keys, truncated);
+ if (ret < 0 && ret != -ENOENT)
+ return ret;
+ if (ret == -ENOENT) {
+ if (truncated)
+ *truncated = false;
+ return 0;
+ }
+
+ constexpr int prefix_size = sizeof(RGW_BUCKET_INSTANCE_MD_PREFIX) - 1;
+ // now filter in the relevant entries
+ list<string>::iterator iter;
+ for (iter = unfiltered_keys.begin(); iter != unfiltered_keys.end(); ++iter) {
+ string& k = *iter;
+
+ if (k.compare(0, prefix_size, RGW_BUCKET_INSTANCE_MD_PREFIX) == 0) {
+ auto oid = k.substr(prefix_size);
+ rgw_bucket_instance_oid_to_key(oid);
+ keys.emplace_back(std::move(oid));
+ }
+ }
+
+ return 0;
+ }
+
+ void list_keys_complete(void *handle) override {
+ list_keys_info *info = static_cast<list_keys_info *>(handle);
+ delete info;
+ }
+
+ string get_marker(void *handle) override {
+ list_keys_info *info = static_cast<list_keys_info *>(handle);
+ return info->store->list_raw_objs_get_cursor(info->ctx);
+ }
+
+ /*
+ * hash entry for mdlog placement. Use the same hash key we'd have for the bucket entry
+ * point, so that the log entries end up at the same log shard, so that we process them
+ * in order
+ */
+ void get_hash_key(const string& section, const string& key, string& hash_key) override {
+ string k;
+ int pos = key.find(':');
+ if (pos < 0)
+ k = key;
+ else
+ k = key.substr(0, pos);
+ hash_key = "bucket:" + k;
+ }
+};
+
+class RGWArchiveBucketInstanceMetadataHandler : public RGWBucketInstanceMetadataHandler {
+public:
+
+ int remove(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker) override {
+ ldout(store->ctx(), 0) << "SKIP: bucket instance removal is not allowed on archive zone: bucket.instance:" << entry << dendl;
+ return 0;
+ }
+};
+
+RGWMetadataHandler *RGWBucketMetaHandlerAllocator::alloc() {
+ return new RGWBucketMetadataHandler;
+}
+
+RGWMetadataHandler *RGWBucketInstanceMetaHandlerAllocator::alloc() {
+ return new RGWBucketInstanceMetadataHandler;
+}
+
+RGWMetadataHandler *RGWArchiveBucketMetaHandlerAllocator::alloc() {
+ return new RGWArchiveBucketMetadataHandler;
+}
+
+RGWMetadataHandler *RGWArchiveBucketInstanceMetaHandlerAllocator::alloc() {
+ return new RGWArchiveBucketInstanceMetadataHandler;
+}
+
+void rgw_bucket_init(RGWMetadataManager *mm)
+{
+ auto sync_module = mm->get_store()->get_sync_module();
+ if (sync_module) {
+ bucket_meta_handler = sync_module->alloc_bucket_meta_handler();
+ bucket_instance_meta_handler = sync_module->alloc_bucket_instance_meta_handler();
+ } else {
+ bucket_meta_handler = RGWBucketMetaHandlerAllocator::alloc();
+ bucket_instance_meta_handler = RGWBucketInstanceMetaHandlerAllocator::alloc();
+ }
+ mm->register_handler(bucket_meta_handler);
+ mm->register_handler(bucket_instance_meta_handler);
+}
diff --git a/src/rgw/rgw_bucket.h b/src/rgw/rgw_bucket.h
new file mode 100644
index 00000000..11623b85
--- /dev/null
+++ b/src/rgw/rgw_bucket.h
@@ -0,0 +1,575 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_BUCKET_H
+#define CEPH_RGW_BUCKET_H
+
+#include <string>
+#include <memory>
+
+#include "include/types.h"
+#include "rgw_common.h"
+#include "rgw_tools.h"
+
+#include "rgw_rados.h"
+
+#include "rgw_string.h"
+
+#include "common/Formatter.h"
+#include "common/lru_map.h"
+#include "common/ceph_time.h"
+#include "rgw_formats.h"
+
+
+// define as static when RGWBucket implementation completes
+extern void rgw_get_buckets_obj(const rgw_user& user_id, string& buckets_obj_id);
+
+extern int rgw_bucket_store_info(RGWRados *store, const string& bucket_name, bufferlist& bl, bool exclusive,
+ map<string, bufferlist> *pattrs, RGWObjVersionTracker *objv_tracker,
+ real_time mtime);
+extern int rgw_bucket_instance_store_info(RGWRados *store, string& oid, bufferlist& bl, bool exclusive,
+ map<string, bufferlist> *pattrs, RGWObjVersionTracker *objv_tracker,
+ real_time mtime);
+
+extern int rgw_bucket_parse_bucket_instance(const string& bucket_instance, string *target_bucket_instance, int *shard_id);
+extern int rgw_bucket_parse_bucket_key(CephContext *cct, const string& key,
+ rgw_bucket* bucket, int *shard_id);
+
+extern int rgw_bucket_instance_remove_entry(RGWRados *store, const string& entry,
+ RGWObjVersionTracker *objv_tracker);
+extern void rgw_bucket_instance_key_to_oid(string& key);
+extern void rgw_bucket_instance_oid_to_key(string& oid);
+
+extern int rgw_bucket_delete_bucket_obj(RGWRados *store,
+ const string& tenant_name,
+ const string& bucket_name,
+ RGWObjVersionTracker& objv_tracker);
+
+extern int rgw_bucket_sync_user_stats(RGWRados *store, const rgw_user& user_id, const RGWBucketInfo& bucket_info);
+extern int rgw_bucket_sync_user_stats(RGWRados *store, const string& tenant_name, const string& bucket_name);
+
+extern std::string rgw_make_bucket_entry_name(const std::string& tenant_name,
+ const std::string& bucket_name);
+static inline void rgw_make_bucket_entry_name(const string& tenant_name,
+ const string& bucket_name,
+ std::string& bucket_entry) {
+ bucket_entry = rgw_make_bucket_entry_name(tenant_name, bucket_name);
+}
+
+extern void rgw_parse_url_bucket(const string& bucket,
+ const string& auth_tenant,
+ string &tenant_name, string &bucket_name);
+
+struct RGWBucketCompleteInfo {
+ RGWBucketInfo info;
+ map<string, bufferlist> attrs;
+
+ void dump(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+};
+
+class RGWBucketEntryMetadataObject : public RGWMetadataObject {
+ RGWBucketEntryPoint ep;
+public:
+ RGWBucketEntryMetadataObject(RGWBucketEntryPoint& _ep, obj_version& v, real_time m) : ep(_ep) {
+ objv = v;
+ mtime = m;
+ }
+
+ void dump(Formatter *f) const override {
+ ep.dump(f);
+ }
+};
+
+class RGWBucketInstanceMetadataObject : public RGWMetadataObject {
+ RGWBucketCompleteInfo info;
+public:
+ RGWBucketInstanceMetadataObject() {}
+ RGWBucketInstanceMetadataObject(RGWBucketCompleteInfo& i, obj_version& v, real_time m) : info(i) {
+ objv = v;
+ mtime = m;
+ }
+
+ void dump(Formatter *f) const override {
+ info.dump(f);
+ }
+
+ void decode_json(JSONObj *obj) {
+ info.decode_json(obj);
+ }
+
+ RGWBucketInfo& get_bucket_info() { return info.info; }
+};
+
+/**
+ * Store a list of the user's buckets, with associated functinos.
+ */
+class RGWUserBuckets
+{
+ std::map<std::string, RGWBucketEnt> buckets;
+
+public:
+ RGWUserBuckets() = default;
+ RGWUserBuckets(RGWUserBuckets&&) = default;
+
+ RGWUserBuckets& operator=(const RGWUserBuckets&) = default;
+
+ void encode(bufferlist& bl) const {
+ using ceph::encode;
+ encode(buckets, bl);
+ }
+ void decode(bufferlist::const_iterator& bl) {
+ using ceph::decode;
+ decode(buckets, bl);
+ }
+ /**
+ * Check if the user owns a bucket by the given name.
+ */
+ bool owns(string& name) {
+ map<string, RGWBucketEnt>::iterator iter;
+ iter = buckets.find(name);
+ return (iter != buckets.end());
+ }
+
+ /**
+ * Add a (created) bucket to the user's bucket list.
+ */
+ void add(const RGWBucketEnt& bucket) {
+ buckets[bucket.bucket.name] = bucket;
+ }
+
+ /**
+ * Remove a bucket from the user's list by name.
+ */
+ void remove(const string& name) {
+ map<string, RGWBucketEnt>::iterator iter;
+ iter = buckets.find(name);
+ if (iter != buckets.end()) {
+ buckets.erase(iter);
+ }
+ }
+
+ /**
+ * Get the user's buckets as a map.
+ */
+ map<string, RGWBucketEnt>& get_buckets() { return buckets; }
+
+ /**
+ * Cleanup data structure
+ */
+ void clear() { buckets.clear(); }
+
+ size_t count() { return buckets.size(); }
+};
+WRITE_CLASS_ENCODER(RGWUserBuckets)
+
+class RGWMetadataManager;
+class RGWMetadataHandler;
+
+class RGWBucketMetaHandlerAllocator {
+public:
+ static RGWMetadataHandler *alloc();
+};
+
+class RGWBucketInstanceMetaHandlerAllocator {
+public:
+ static RGWMetadataHandler *alloc();
+};
+
+class RGWArchiveBucketMetaHandlerAllocator {
+public:
+ static RGWMetadataHandler *alloc();
+};
+
+class RGWArchiveBucketInstanceMetaHandlerAllocator {
+public:
+ static RGWMetadataHandler *alloc();
+};
+
+extern void rgw_bucket_init(RGWMetadataManager *mm);
+/**
+ * Get all the buckets owned by a user and fill up an RGWUserBuckets with them.
+ * Returns: 0 on success, -ERR# on failure.
+ */
+extern int rgw_read_user_buckets(RGWRados *store,
+ const rgw_user& user_id,
+ RGWUserBuckets& buckets,
+ const string& marker,
+ const string& end_marker,
+ uint64_t max,
+ bool need_stats,
+ bool* is_truncated,
+ uint64_t default_amount = 1000);
+
+extern int rgw_link_bucket(RGWRados* store,
+ const rgw_user& user_id,
+ rgw_bucket& bucket,
+ ceph::real_time creation_time,
+ bool update_entrypoint = true);
+extern int rgw_unlink_bucket(RGWRados *store, const rgw_user& user_id,
+ const string& tenant_name, const string& bucket_name, bool update_entrypoint = true);
+
+extern int rgw_remove_object(RGWRados *store, const RGWBucketInfo& bucket_info, const rgw_bucket& bucket, rgw_obj_key& key);
+extern int rgw_remove_bucket(RGWRados *store, rgw_bucket& bucket, bool delete_children);
+extern int rgw_remove_bucket_bypass_gc(RGWRados *store, rgw_bucket& bucket, int concurrent_max);
+
+extern int rgw_bucket_set_attrs(RGWRados *store, RGWBucketInfo& bucket_info,
+ map<string, bufferlist>& attrs,
+ RGWObjVersionTracker *objv_tracker);
+extern int rgw_object_get_attr(RGWRados* store, const RGWBucketInfo& bucket_info,
+ const rgw_obj& obj, const char* attr_name,
+ bufferlist& out_bl);
+
+extern void check_bad_user_bucket_mapping(RGWRados *store, const rgw_user& user_id, bool fix);
+
+struct RGWBucketAdminOpState {
+ rgw_user uid;
+ std::string display_name;
+ std::string bucket_name;
+ std::string bucket_id;
+ std::string object_name;
+
+ bool list_buckets;
+ bool stat_buckets;
+ bool check_objects;
+ bool fix_index;
+ bool delete_child_objects;
+ bool bucket_stored;
+ int max_aio = 0;
+
+ rgw_bucket bucket;
+
+ RGWQuotaInfo quota;
+
+ void set_fetch_stats(bool value) { stat_buckets = value; }
+ void set_check_objects(bool value) { check_objects = value; }
+ void set_fix_index(bool value) { fix_index = value; }
+ void set_delete_children(bool value) { delete_child_objects = value; }
+
+ void set_max_aio(int value) { max_aio = value; }
+
+ void set_user_id(const rgw_user& user_id) {
+ if (!user_id.empty())
+ uid = user_id;
+ }
+ void set_tenant(const std::string& tenant_str) {
+ uid.tenant = tenant_str;
+ }
+ void set_bucket_name(const std::string& bucket_str) {
+ bucket_name = bucket_str;
+ }
+ void set_object(std::string& object_str) {
+ object_name = object_str;
+ }
+ void set_quota(RGWQuotaInfo& value) {
+ quota = value;
+ }
+
+
+ rgw_user& get_user_id() { return uid; }
+ std::string& get_user_display_name() { return display_name; }
+ std::string& get_bucket_name() { return bucket_name; }
+ std::string& get_object_name() { return object_name; }
+ std::string& get_tenant() { return uid.tenant; }
+
+ rgw_bucket& get_bucket() { return bucket; }
+ void set_bucket(rgw_bucket& _bucket) {
+ bucket = _bucket;
+ bucket_stored = true;
+ }
+
+ void set_bucket_id(const string& bi) {
+ bucket_id = bi;
+ }
+ const string& get_bucket_id() { return bucket_id; }
+
+ bool will_fetch_stats() { return stat_buckets; }
+ bool will_fix_index() { return fix_index; }
+ bool will_delete_children() { return delete_child_objects; }
+ bool will_check_objects() { return check_objects; }
+ bool is_user_op() { return !uid.empty(); }
+ bool is_system_op() { return uid.empty(); }
+ bool has_bucket_stored() { return bucket_stored; }
+ int get_max_aio() { return max_aio; }
+
+ RGWBucketAdminOpState() : list_buckets(false), stat_buckets(false), check_objects(false),
+ fix_index(false), delete_child_objects(false),
+ bucket_stored(false) {}
+};
+
+/*
+ * A simple wrapper class for administrative bucket operations
+ */
+
+class RGWBucket
+{
+ RGWUserBuckets buckets;
+ RGWRados *store;
+ RGWAccessHandle handle;
+
+ RGWUserInfo user_info;
+ std::string tenant;
+ std::string bucket_name;
+
+ bool failure;
+
+ RGWBucketInfo bucket_info;
+
+public:
+ RGWBucket() : store(NULL), handle(NULL), failure(false) {}
+ int init(RGWRados *storage, RGWBucketAdminOpState& op_state);
+
+ int check_bad_index_multipart(RGWBucketAdminOpState& op_state,
+ RGWFormatterFlusher& flusher, std::string *err_msg = NULL);
+
+ int check_object_index(RGWBucketAdminOpState& op_state,
+ RGWFormatterFlusher& flusher,
+ std::string *err_msg = NULL);
+
+ int check_index(RGWBucketAdminOpState& op_state,
+ map<RGWObjCategory, RGWStorageStats>& existing_stats,
+ map<RGWObjCategory, RGWStorageStats>& calculated_stats,
+ std::string *err_msg = NULL);
+
+ int remove(RGWBucketAdminOpState& op_state, bool bypass_gc = false, bool keep_index_consistent = true, std::string *err_msg = NULL);
+ int link(RGWBucketAdminOpState& op_state, std::string *err_msg = NULL);
+ int unlink(RGWBucketAdminOpState& op_state, std::string *err_msg = NULL);
+ int set_quota(RGWBucketAdminOpState& op_state, std::string *err_msg = NULL);
+
+ int remove_object(RGWBucketAdminOpState& op_state, std::string *err_msg = NULL);
+ int policy_bl_to_stream(bufferlist& bl, ostream& o);
+ int get_policy(RGWBucketAdminOpState& op_state, RGWAccessControlPolicy& policy);
+
+ void clear_failure() { failure = false; }
+
+ const RGWBucketInfo& get_bucket_info() const { return bucket_info; }
+};
+
+class RGWBucketAdminOp
+{
+public:
+ static int get_policy(RGWRados *store, RGWBucketAdminOpState& op_state,
+ RGWFormatterFlusher& flusher);
+ static int get_policy(RGWRados *store, RGWBucketAdminOpState& op_state,
+ RGWAccessControlPolicy& policy);
+ static int dump_s3_policy(RGWRados *store, RGWBucketAdminOpState& op_state,
+ ostream& os);
+
+ static int unlink(RGWRados *store, RGWBucketAdminOpState& op_state);
+ static int link(RGWRados *store, RGWBucketAdminOpState& op_state, string *err_msg = NULL);
+
+ static int check_index(RGWRados *store, RGWBucketAdminOpState& op_state,
+ RGWFormatterFlusher& flusher);
+
+ static int remove_bucket(RGWRados *store, RGWBucketAdminOpState& op_state, bool bypass_gc = false, bool keep_index_consistent = true);
+ static int remove_object(RGWRados *store, RGWBucketAdminOpState& op_state);
+ static int info(RGWRados *store, RGWBucketAdminOpState& op_state, RGWFormatterFlusher& flusher);
+ static int limit_check(RGWRados *store, RGWBucketAdminOpState& op_state,
+ const std::list<std::string>& user_ids,
+ RGWFormatterFlusher& flusher,
+ bool warnings_only = false);
+ static int set_quota(RGWRados *store, RGWBucketAdminOpState& op_state);
+
+ static int list_stale_instances(RGWRados *store, RGWBucketAdminOpState& op_state,
+ RGWFormatterFlusher& flusher);
+
+ static int clear_stale_instances(RGWRados *store, RGWBucketAdminOpState& op_state,
+ RGWFormatterFlusher& flusher);
+ static int fix_lc_shards(RGWRados *store, RGWBucketAdminOpState& op_state,
+ RGWFormatterFlusher& flusher);
+ static int fix_obj_expiry(RGWRados *store, RGWBucketAdminOpState& op_state,
+ RGWFormatterFlusher& flusher, bool dry_run = false);
+};
+
+
+enum DataLogEntityType {
+ ENTITY_TYPE_UNKNOWN = 0,
+ ENTITY_TYPE_BUCKET = 1,
+};
+
+struct rgw_data_change {
+ DataLogEntityType entity_type;
+ string key;
+ real_time timestamp;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ uint8_t t = (uint8_t)entity_type;
+ encode(t, bl);
+ encode(key, bl);
+ encode(timestamp, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ uint8_t t;
+ decode(t, bl);
+ entity_type = (DataLogEntityType)t;
+ decode(key, bl);
+ decode(timestamp, bl);
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+};
+WRITE_CLASS_ENCODER(rgw_data_change)
+
+struct rgw_data_change_log_entry {
+ string log_id;
+ real_time log_timestamp;
+ rgw_data_change entry;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(log_id, bl);
+ encode(log_timestamp, bl);
+ encode(entry, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(log_id, bl);
+ decode(log_timestamp, bl);
+ decode(entry, bl);
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+};
+WRITE_CLASS_ENCODER(rgw_data_change_log_entry)
+
+struct RGWDataChangesLogInfo {
+ string marker;
+ real_time last_update;
+
+ void dump(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+};
+
+namespace rgw {
+struct BucketChangeObserver;
+}
+
+class RGWDataChangesLog {
+ CephContext *cct;
+ RGWRados *store;
+ rgw::BucketChangeObserver *observer = nullptr;
+
+ int num_shards;
+ string *oids;
+
+ Mutex lock;
+ RWLock modified_lock;
+ map<int, set<string> > modified_shards;
+
+ std::atomic<bool> down_flag = { false };
+
+ struct ChangeStatus {
+ real_time cur_expiration;
+ real_time cur_sent;
+ bool pending;
+ RefCountedCond *cond;
+ Mutex *lock;
+
+ ChangeStatus() : pending(false), cond(NULL) {
+ lock = new Mutex("RGWDataChangesLog::ChangeStatus");
+ }
+
+ ~ChangeStatus() {
+ delete lock;
+ }
+ };
+
+ typedef std::shared_ptr<ChangeStatus> ChangeStatusPtr;
+
+ lru_map<rgw_bucket_shard, ChangeStatusPtr> changes;
+
+ map<rgw_bucket_shard, bool> cur_cycle;
+
+ void _get_change(const rgw_bucket_shard& bs, ChangeStatusPtr& status);
+ void register_renew(rgw_bucket_shard& bs);
+ void update_renewed(rgw_bucket_shard& bs, real_time& expiration);
+
+ class ChangesRenewThread : public Thread {
+ CephContext *cct;
+ RGWDataChangesLog *log;
+ Mutex lock;
+ Cond cond;
+
+ public:
+ ChangesRenewThread(CephContext *_cct, RGWDataChangesLog *_log) : cct(_cct), log(_log), lock("ChangesRenewThread::lock") {}
+ void *entry() override;
+ void stop();
+ };
+
+ ChangesRenewThread *renew_thread;
+
+public:
+
+ RGWDataChangesLog(CephContext *_cct, RGWRados *_store) : cct(_cct), store(_store),
+ lock("RGWDataChangesLog::lock"), modified_lock("RGWDataChangesLog::modified_lock"),
+ changes(cct->_conf->rgw_data_log_changes_size) {
+ num_shards = cct->_conf->rgw_data_log_num_shards;
+
+ oids = new string[num_shards];
+
+ string prefix = cct->_conf->rgw_data_log_obj_prefix;
+
+ if (prefix.empty()) {
+ prefix = "data_log";
+ }
+
+ for (int i = 0; i < num_shards; i++) {
+ char buf[16];
+ snprintf(buf, sizeof(buf), "%s.%d", prefix.c_str(), i);
+ oids[i] = buf;
+ }
+
+ renew_thread = new ChangesRenewThread(cct, this);
+ renew_thread->create("rgw_dt_lg_renew");
+ }
+
+ ~RGWDataChangesLog();
+
+ int choose_oid(const rgw_bucket_shard& bs);
+ const std::string& get_oid(int shard_id) const { return oids[shard_id]; }
+ int add_entry(rgw_bucket& bucket, int shard_id);
+ int get_log_shard_id(rgw_bucket& bucket, int shard_id);
+ int renew_entries();
+ int list_entries(int shard, const real_time& start_time, const real_time& end_time, int max_entries,
+ list<rgw_data_change_log_entry>& entries,
+ const string& marker,
+ string *out_marker,
+ bool *truncated);
+ int trim_entries(int shard_id, const real_time& start_time, const real_time& end_time,
+ const string& start_marker, const string& end_marker);
+ int get_info(int shard_id, RGWDataChangesLogInfo *info);
+ int lock_exclusive(int shard_id, timespan duration, string& zone_id, string& owner_id);
+ int unlock(int shard_id, string& zone_id, string& owner_id);
+ struct LogMarker {
+ int shard;
+ string marker;
+
+ LogMarker() : shard(0) {}
+ };
+ int list_entries(const real_time& start_time, const real_time& end_time, int max_entries,
+ list<rgw_data_change_log_entry>& entries, LogMarker& marker, bool *ptruncated);
+
+ void mark_modified(int shard_id, const rgw_bucket_shard& bs);
+ void read_clear_modified(map<int, set<string> > &modified);
+
+ void set_observer(rgw::BucketChangeObserver *observer) {
+ this->observer = observer;
+ }
+
+ bool going_down();
+};
+
+bool rgw_find_bucket_by_id(CephContext *cct, RGWMetadataManager *mgr, const string& marker,
+ const string& bucket_id, rgw_bucket* bucket_out);
+
+#endif
diff --git a/src/rgw/rgw_cache.cc b/src/rgw/rgw_cache.cc
new file mode 100644
index 00000000..df992b59
--- /dev/null
+++ b/src/rgw/rgw_cache.cc
@@ -0,0 +1,353 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "rgw_cache.h"
+#include "rgw_perf_counters.h"
+
+#include <errno.h>
+
+#define dout_subsys ceph_subsys_rgw
+
+
+int ObjectCache::get(const string& name, ObjectCacheInfo& info, uint32_t mask, rgw_cache_entry_info *cache_info)
+{
+ RWLock::RLocker l(lock);
+
+ if (!enabled) {
+ return -ENOENT;
+ }
+
+ auto iter = cache_map.find(name);
+ if (iter == cache_map.end()) {
+ ldout(cct, 10) << "cache get: name=" << name << " : miss" << dendl;
+ if (perfcounter)
+ perfcounter->inc(l_rgw_cache_miss);
+ return -ENOENT;
+ }
+ if (expiry.count() &&
+ (ceph::coarse_mono_clock::now() - iter->second.info.time_added) > expiry) {
+ ldout(cct, 10) << "cache get: name=" << name << " : expiry miss" << dendl;
+ lock.unlock();
+ lock.get_write();
+ // check that wasn't already removed by other thread
+ iter = cache_map.find(name);
+ if (iter != cache_map.end()) {
+ for (auto &kv : iter->second.chained_entries)
+ kv.first->invalidate(kv.second);
+ remove_lru(name, iter->second.lru_iter);
+ cache_map.erase(iter);
+ }
+ if(perfcounter)
+ perfcounter->inc(l_rgw_cache_miss);
+ return -ENOENT;
+ }
+
+ ObjectCacheEntry *entry = &iter->second;
+
+ if (lru_counter - entry->lru_promotion_ts > lru_window) {
+ ldout(cct, 20) << "cache get: touching lru, lru_counter=" << lru_counter
+ << " promotion_ts=" << entry->lru_promotion_ts << dendl;
+ lock.unlock();
+ lock.get_write(); /* promote lock to writer */
+
+ /* need to redo this because entry might have dropped off the cache */
+ iter = cache_map.find(name);
+ if (iter == cache_map.end()) {
+ ldout(cct, 10) << "lost race! cache get: name=" << name << " : miss" << dendl;
+ if(perfcounter) perfcounter->inc(l_rgw_cache_miss);
+ return -ENOENT;
+ }
+
+ entry = &iter->second;
+ /* check again, we might have lost a race here */
+ if (lru_counter - entry->lru_promotion_ts > lru_window) {
+ touch_lru(name, *entry, iter->second.lru_iter);
+ }
+ }
+
+ ObjectCacheInfo& src = iter->second.info;
+ if(src.status == -ENOENT) {
+ ldout(cct, 10) << "cache get: name=" << name << " : hit (negative entry)" << dendl;
+ if (perfcounter) perfcounter->inc(l_rgw_cache_hit);
+ return -ENODATA;
+ }
+ if ((src.flags & mask) != mask) {
+ ldout(cct, 10) << "cache get: name=" << name << " : type miss (requested=0x"
+ << std::hex << mask << ", cached=0x" << src.flags
+ << std::dec << ")" << dendl;
+ if(perfcounter) perfcounter->inc(l_rgw_cache_miss);
+ return -ENOENT;
+ }
+ ldout(cct, 10) << "cache get: name=" << name << " : hit (requested=0x"
+ << std::hex << mask << ", cached=0x" << src.flags
+ << std::dec << ")" << dendl;
+
+ info = src;
+ if (cache_info) {
+ cache_info->cache_locator = name;
+ cache_info->gen = entry->gen;
+ }
+ if(perfcounter) perfcounter->inc(l_rgw_cache_hit);
+
+ return 0;
+}
+
+bool ObjectCache::chain_cache_entry(std::initializer_list<rgw_cache_entry_info*> cache_info_entries,
+ RGWChainedCache::Entry *chained_entry)
+{
+ RWLock::WLocker l(lock);
+
+ if (!enabled) {
+ return false;
+ }
+
+ std::vector<ObjectCacheEntry*> entries;
+ entries.reserve(cache_info_entries.size());
+ /* first verify that all entries are still valid */
+ for (auto cache_info : cache_info_entries) {
+ ldout(cct, 10) << "chain_cache_entry: cache_locator="
+ << cache_info->cache_locator << dendl;
+ auto iter = cache_map.find(cache_info->cache_locator);
+ if (iter == cache_map.end()) {
+ ldout(cct, 20) << "chain_cache_entry: couldn't find cache locator" << dendl;
+ return false;
+ }
+
+ auto entry = &iter->second;
+
+ if (entry->gen != cache_info->gen) {
+ ldout(cct, 20) << "chain_cache_entry: entry.gen (" << entry->gen
+ << ") != cache_info.gen (" << cache_info->gen << ")"
+ << dendl;
+ return false;
+ }
+ entries.push_back(entry);
+ }
+
+
+ chained_entry->cache->chain_cb(chained_entry->key, chained_entry->data);
+
+ for (auto entry : entries) {
+ entry->chained_entries.push_back(make_pair(chained_entry->cache,
+ chained_entry->key));
+ }
+
+ return true;
+}
+
+void ObjectCache::put(const string& name, ObjectCacheInfo& info, rgw_cache_entry_info *cache_info)
+{
+ RWLock::WLocker l(lock);
+
+ if (!enabled) {
+ return;
+ }
+
+ ldout(cct, 10) << "cache put: name=" << name << " info.flags=0x"
+ << std::hex << info.flags << std::dec << dendl;
+
+ auto [iter, inserted] = cache_map.emplace(name, ObjectCacheEntry{});
+ ObjectCacheEntry& entry = iter->second;
+ entry.info.time_added = ceph::coarse_mono_clock::now();
+ if (inserted) {
+ entry.lru_iter = lru.end();
+ }
+ ObjectCacheInfo& target = entry.info;
+
+ invalidate_lru(entry);
+
+ entry.chained_entries.clear();
+ entry.gen++;
+
+ touch_lru(name, entry, entry.lru_iter);
+
+ target.status = info.status;
+
+ if (info.status < 0) {
+ target.flags = 0;
+ target.xattrs.clear();
+ target.data.clear();
+ return;
+ }
+
+ if (cache_info) {
+ cache_info->cache_locator = name;
+ cache_info->gen = entry.gen;
+ }
+
+ // put() must include the latest version if we're going to keep caching it
+ target.flags &= ~CACHE_FLAG_OBJV;
+
+ target.flags |= info.flags;
+
+ if (info.flags & CACHE_FLAG_META)
+ target.meta = info.meta;
+ else if (!(info.flags & CACHE_FLAG_MODIFY_XATTRS))
+ target.flags &= ~CACHE_FLAG_META; // non-meta change should reset meta
+
+ if (info.flags & CACHE_FLAG_XATTRS) {
+ target.xattrs = info.xattrs;
+ map<string, bufferlist>::iterator iter;
+ for (iter = target.xattrs.begin(); iter != target.xattrs.end(); ++iter) {
+ ldout(cct, 10) << "updating xattr: name=" << iter->first << " bl.length()=" << iter->second.length() << dendl;
+ }
+ } else if (info.flags & CACHE_FLAG_MODIFY_XATTRS) {
+ map<string, bufferlist>::iterator iter;
+ for (iter = info.rm_xattrs.begin(); iter != info.rm_xattrs.end(); ++iter) {
+ ldout(cct, 10) << "removing xattr: name=" << iter->first << dendl;
+ target.xattrs.erase(iter->first);
+ }
+ for (iter = info.xattrs.begin(); iter != info.xattrs.end(); ++iter) {
+ ldout(cct, 10) << "appending xattr: name=" << iter->first << " bl.length()=" << iter->second.length() << dendl;
+ target.xattrs[iter->first] = iter->second;
+ }
+ }
+
+ if (info.flags & CACHE_FLAG_DATA)
+ target.data = info.data;
+
+ if (info.flags & CACHE_FLAG_OBJV)
+ target.version = info.version;
+}
+
+bool ObjectCache::remove(const string& name)
+{
+ RWLock::WLocker l(lock);
+
+ if (!enabled) {
+ return false;
+ }
+
+ auto iter = cache_map.find(name);
+ if (iter == cache_map.end())
+ return false;
+
+ ldout(cct, 10) << "removing " << name << " from cache" << dendl;
+ ObjectCacheEntry& entry = iter->second;
+
+ for (auto& kv : entry.chained_entries) {
+ kv.first->invalidate(kv.second);
+ }
+
+ remove_lru(name, iter->second.lru_iter);
+ cache_map.erase(iter);
+ return true;
+}
+
+void ObjectCache::touch_lru(const string& name, ObjectCacheEntry& entry,
+ std::list<string>::iterator& lru_iter)
+{
+ while (lru_size > (size_t)cct->_conf->rgw_cache_lru_size) {
+ auto iter = lru.begin();
+ if ((*iter).compare(name) == 0) {
+ /*
+ * if the entry we're touching happens to be at the lru end, don't remove it,
+ * lru shrinking can wait for next time
+ */
+ break;
+ }
+ auto map_iter = cache_map.find(*iter);
+ ldout(cct, 10) << "removing entry: name=" << *iter << " from cache LRU" << dendl;
+ if (map_iter != cache_map.end()) {
+ ObjectCacheEntry& entry = map_iter->second;
+ invalidate_lru(entry);
+ cache_map.erase(map_iter);
+ }
+ lru.pop_front();
+ lru_size--;
+ }
+
+ if (lru_iter == lru.end()) {
+ lru.push_back(name);
+ lru_size++;
+ lru_iter--;
+ ldout(cct, 10) << "adding " << name << " to cache LRU end" << dendl;
+ } else {
+ ldout(cct, 10) << "moving " << name << " to cache LRU end" << dendl;
+ lru.erase(lru_iter);
+ lru.push_back(name);
+ lru_iter = lru.end();
+ --lru_iter;
+ }
+
+ lru_counter++;
+ entry.lru_promotion_ts = lru_counter;
+}
+
+void ObjectCache::remove_lru(const string& name,
+ std::list<string>::iterator& lru_iter)
+{
+ if (lru_iter == lru.end())
+ return;
+
+ lru.erase(lru_iter);
+ lru_size--;
+ lru_iter = lru.end();
+}
+
+void ObjectCache::invalidate_lru(ObjectCacheEntry& entry)
+{
+ for (auto iter = entry.chained_entries.begin();
+ iter != entry.chained_entries.end(); ++iter) {
+ RGWChainedCache *chained_cache = iter->first;
+ chained_cache->invalidate(iter->second);
+ }
+}
+
+void ObjectCache::set_enabled(bool status)
+{
+ RWLock::WLocker l(lock);
+
+ enabled = status;
+
+ if (!enabled) {
+ do_invalidate_all();
+ }
+}
+
+void ObjectCache::invalidate_all()
+{
+ RWLock::WLocker l(lock);
+
+ do_invalidate_all();
+}
+
+void ObjectCache::do_invalidate_all()
+{
+ cache_map.clear();
+ lru.clear();
+
+ lru_size = 0;
+ lru_counter = 0;
+ lru_window = 0;
+
+ for (auto& cache : chained_cache) {
+ cache->invalidate_all();
+ }
+}
+
+void ObjectCache::chain_cache(RGWChainedCache *cache) {
+ RWLock::WLocker l(lock);
+ chained_cache.push_back(cache);
+}
+
+void ObjectCache::unchain_cache(RGWChainedCache *cache) {
+ RWLock::WLocker l(lock);
+
+ auto iter = chained_cache.begin();
+ for (; iter != chained_cache.end(); ++iter) {
+ if (cache == *iter) {
+ chained_cache.erase(iter);
+ cache->unregistered();
+ return;
+ }
+ }
+}
+
+ObjectCache::~ObjectCache()
+{
+ for (auto cache : chained_cache) {
+ cache->unregistered();
+ }
+}
+
diff --git a/src/rgw/rgw_cache.h b/src/rgw/rgw_cache.h
new file mode 100644
index 00000000..b0696237
--- /dev/null
+++ b/src/rgw/rgw_cache.h
@@ -0,0 +1,219 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGWCACHE_H
+#define CEPH_RGWCACHE_H
+
+#include "rgw_rados.h"
+#include <string>
+#include <map>
+#include <unordered_map>
+#include "include/types.h"
+#include "include/utime.h"
+#include "include/ceph_assert.h"
+#include "common/RWLock.h"
+
+enum {
+ UPDATE_OBJ,
+ REMOVE_OBJ,
+};
+
+#define CACHE_FLAG_DATA 0x01
+#define CACHE_FLAG_XATTRS 0x02
+#define CACHE_FLAG_META 0x04
+#define CACHE_FLAG_MODIFY_XATTRS 0x08
+#define CACHE_FLAG_OBJV 0x10
+
+#define mydout(v) lsubdout(T::cct, rgw, v)
+
+struct ObjectMetaInfo {
+ uint64_t size;
+ real_time mtime;
+
+ ObjectMetaInfo() : size(0) {}
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(2, 2, bl);
+ encode(size, bl);
+ encode(mtime, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
+ decode(size, bl);
+ decode(mtime, bl);
+ DECODE_FINISH(bl);
+ }
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<ObjectMetaInfo*>& o);
+};
+WRITE_CLASS_ENCODER(ObjectMetaInfo)
+
+struct ObjectCacheInfo {
+ int status = 0;
+ uint32_t flags = 0;
+ uint64_t epoch = 0;
+ bufferlist data;
+ map<string, bufferlist> xattrs;
+ map<string, bufferlist> rm_xattrs;
+ ObjectMetaInfo meta;
+ obj_version version = {};
+ ceph::coarse_mono_time time_added;
+
+ ObjectCacheInfo() = default;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(5, 3, bl);
+ encode(status, bl);
+ encode(flags, bl);
+ encode(data, bl);
+ encode(xattrs, bl);
+ encode(meta, bl);
+ encode(rm_xattrs, bl);
+ encode(epoch, bl);
+ encode(version, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START_LEGACY_COMPAT_LEN(5, 3, 3, bl);
+ decode(status, bl);
+ decode(flags, bl);
+ decode(data, bl);
+ decode(xattrs, bl);
+ decode(meta, bl);
+ if (struct_v >= 2)
+ decode(rm_xattrs, bl);
+ if (struct_v >= 4)
+ decode(epoch, bl);
+ if (struct_v >= 5)
+ decode(version, bl);
+ DECODE_FINISH(bl);
+ }
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<ObjectCacheInfo*>& o);
+};
+WRITE_CLASS_ENCODER(ObjectCacheInfo)
+
+struct RGWCacheNotifyInfo {
+ uint32_t op;
+ rgw_raw_obj obj;
+ ObjectCacheInfo obj_info;
+ off_t ofs;
+ string ns;
+
+ RGWCacheNotifyInfo() : op(0), ofs(0) {}
+
+ void encode(bufferlist& obl) const {
+ ENCODE_START(2, 2, obl);
+ encode(op, obl);
+ encode(obj, obl);
+ encode(obj_info, obl);
+ encode(ofs, obl);
+ encode(ns, obl);
+ ENCODE_FINISH(obl);
+ }
+ void decode(bufferlist::const_iterator& ibl) {
+ DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, ibl);
+ decode(op, ibl);
+ decode(obj, ibl);
+ decode(obj_info, ibl);
+ decode(ofs, ibl);
+ decode(ns, ibl);
+ DECODE_FINISH(ibl);
+ }
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<RGWCacheNotifyInfo*>& o);
+};
+WRITE_CLASS_ENCODER(RGWCacheNotifyInfo)
+
+class RGWChainedCache {
+public:
+ virtual ~RGWChainedCache() {}
+ virtual void chain_cb(const string& key, void *data) = 0;
+ virtual void invalidate(const string& key) = 0;
+ virtual void invalidate_all() = 0;
+ virtual void unregistered() {}
+
+ struct Entry {
+ RGWChainedCache *cache;
+ const string& key;
+ void *data;
+
+ Entry(RGWChainedCache *_c, const string& _k, void *_d) : cache(_c), key(_k), data(_d) {}
+ };
+};
+
+
+struct ObjectCacheEntry {
+ ObjectCacheInfo info;
+ std::list<string>::iterator lru_iter;
+ uint64_t lru_promotion_ts;
+ uint64_t gen;
+ std::vector<pair<RGWChainedCache *, string> > chained_entries;
+
+ ObjectCacheEntry() : lru_promotion_ts(0), gen(0) {}
+};
+
+class ObjectCache {
+ std::unordered_map<string, ObjectCacheEntry> cache_map;
+ std::list<string> lru;
+ unsigned long lru_size;
+ unsigned long lru_counter;
+ unsigned long lru_window;
+ RWLock lock;
+ CephContext *cct;
+
+ vector<RGWChainedCache *> chained_cache;
+
+ bool enabled;
+ ceph::timespan expiry;
+
+ void touch_lru(const string& name, ObjectCacheEntry& entry,
+ std::list<string>::iterator& lru_iter);
+ void remove_lru(const string& name, std::list<string>::iterator& lru_iter);
+ void invalidate_lru(ObjectCacheEntry& entry);
+
+ void do_invalidate_all();
+
+public:
+ ObjectCache() : lru_size(0), lru_counter(0), lru_window(0), lock("ObjectCache"), cct(NULL), enabled(false) { }
+ ~ObjectCache();
+ int get(const std::string& name, ObjectCacheInfo& bl, uint32_t mask, rgw_cache_entry_info *cache_info);
+ std::optional<ObjectCacheInfo> get(const std::string& name) {
+ std::optional<ObjectCacheInfo> info{std::in_place};
+ auto r = get(name, *info, 0, nullptr);
+ return r < 0 ? std::nullopt : info;
+ }
+
+ template<typename F>
+ void for_each(const F& f) {
+ RWLock::RLocker l(lock);
+ if (enabled) {
+ auto now = ceph::coarse_mono_clock::now();
+ for (const auto& [name, entry] : cache_map) {
+ if (expiry.count() && (now - entry.info.time_added) < expiry) {
+ f(name, entry);
+ }
+ }
+ }
+ }
+
+ void put(const std::string& name, ObjectCacheInfo& bl, rgw_cache_entry_info *cache_info);
+ bool remove(const std::string& name);
+ void set_ctx(CephContext *_cct) {
+ cct = _cct;
+ lru_window = cct->_conf->rgw_cache_lru_size / 2;
+ expiry = std::chrono::seconds(cct->_conf.get_val<uint64_t>(
+ "rgw_cache_expiry_interval"));
+ }
+ bool chain_cache_entry(std::initializer_list<rgw_cache_entry_info*> cache_info_entries,
+ RGWChainedCache::Entry *chained_entry);
+
+ void set_enabled(bool status);
+
+ void chain_cache(RGWChainedCache *cache);
+ void unchain_cache(RGWChainedCache *cache);
+ void invalidate_all();
+};
+
+#endif
diff --git a/src/rgw/rgw_civetweb.cc b/src/rgw/rgw_civetweb.cc
new file mode 100644
index 00000000..f0f8ef5d
--- /dev/null
+++ b/src/rgw/rgw_civetweb.cc
@@ -0,0 +1,248 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <string.h>
+
+#include <boost/algorithm/string/predicate.hpp>
+#include <boost/utility/string_ref.hpp>
+
+#include "civetweb/civetweb.h"
+#include "rgw_civetweb.h"
+#include "rgw_perf_counters.h"
+
+
+#define dout_subsys ceph_subsys_rgw
+
+size_t RGWCivetWeb::write_data(const char *buf, const size_t len)
+{
+ size_t off = 0;
+ auto to_sent = len;
+ while (to_sent) {
+ const int ret = mg_write(conn, buf + off, to_sent);
+ if (ret < 0 || ! ret) {
+ /* According to the documentation of mg_write() it always returns -1 on
+ * error. The details aren't available, so we will just throw EIO. Same
+ * goes to 0 that is associated with writing to a closed connection. */
+ throw rgw::io::Exception(EIO, std::system_category());
+ } else {
+ off += static_cast<size_t>(ret);
+ to_sent -= static_cast<size_t>(ret);
+ }
+ }
+ return len;
+}
+
+RGWCivetWeb::RGWCivetWeb(mg_connection* const conn)
+ : conn(conn),
+ explicit_keepalive(false),
+ explicit_conn_close(false),
+ got_eof_on_read(false),
+ txbuf(*this)
+{
+ sockaddr *lsa = mg_get_local_addr(conn);
+ switch(lsa->sa_family) {
+ case AF_INET:
+ port = ntohs(((struct sockaddr_in*)lsa)->sin_port);
+ break;
+ case AF_INET6:
+ port = ntohs(((struct sockaddr_in6*)lsa)->sin6_port);
+ break;
+ default:
+ port = -1;
+ }
+}
+
+size_t RGWCivetWeb::read_data(char *buf, size_t len)
+{
+ size_t c;
+ int ret;
+ if (got_eof_on_read) {
+ return 0;
+ }
+ for (c = 0; c < len; c += ret) {
+ ret = mg_read(conn, buf+c, len-c);
+ if (ret < 0) {
+ throw rgw::io::Exception(EIO, std::system_category());
+ }
+ if (!ret) {
+ got_eof_on_read = true;
+ break;
+ }
+ }
+ return c;
+}
+
+void RGWCivetWeb::flush()
+{
+ txbuf.pubsync();
+}
+
+size_t RGWCivetWeb::complete_request()
+{
+ perfcounter->inc(l_rgw_qlen, -1);
+ perfcounter->inc(l_rgw_qactive, -1);
+ return 0;
+}
+
+int RGWCivetWeb::init_env(CephContext *cct)
+{
+ env.init(cct);
+ const struct mg_request_info* info = mg_get_request_info(conn);
+
+ if (! info) {
+ // request info is NULL; we have no info about the connection
+ return -EINVAL;
+ }
+
+ for (int i = 0; i < info->num_headers; i++) {
+ const auto header = &info->http_headers[i];
+
+ if (header->name == nullptr || header->value==nullptr) {
+ lderr(cct) << "client supplied malformatted headers" << dendl;
+ return -EINVAL;
+ }
+
+ const boost::string_ref name(header->name);
+ const auto& value = header->value;
+
+ if (boost::algorithm::iequals(name, "content-length")) {
+ env.set("CONTENT_LENGTH", value);
+ continue;
+ }
+ if (boost::algorithm::iequals(name, "content-type")) {
+ env.set("CONTENT_TYPE", value);
+ continue;
+ }
+ if (boost::algorithm::iequals(name, "connection")) {
+ explicit_keepalive = boost::algorithm::iequals(value, "keep-alive");
+ explicit_conn_close = boost::algorithm::iequals(value, "close");
+ }
+
+ static const boost::string_ref HTTP_{"HTTP_"};
+
+ char buf[name.size() + HTTP_.size() + 1];
+ auto dest = std::copy(std::begin(HTTP_), std::end(HTTP_), buf);
+ for (auto src = name.begin(); src != name.end(); ++src, ++dest) {
+ if (*src == '-') {
+ *dest = '_';
+ } else {
+ *dest = std::toupper(*src);
+ }
+ }
+ *dest = '\0';
+
+ env.set(buf, value);
+ }
+
+ perfcounter->inc(l_rgw_qlen);
+ perfcounter->inc(l_rgw_qactive);
+
+ env.set("REMOTE_ADDR", info->remote_addr);
+ env.set("REQUEST_METHOD", info->request_method);
+ env.set("HTTP_VERSION", info->http_version);
+ env.set("REQUEST_URI", info->request_uri); // get the full uri, we anyway handle abs uris later
+ env.set("SCRIPT_URI", info->local_uri);
+ if (info->query_string) {
+ env.set("QUERY_STRING", info->query_string);
+ }
+ if (info->remote_user) {
+ env.set("REMOTE_USER", info->remote_user);
+ }
+
+ if (port <= 0)
+ lderr(cct) << "init_env: bug: invalid port number" << dendl;
+ char port_buf[16];
+ snprintf(port_buf, sizeof(port_buf), "%d", port);
+ env.set("SERVER_PORT", port_buf);
+ if (info->is_ssl) {
+ env.set("SERVER_PORT_SECURE", port_buf);
+ }
+ return 0;
+}
+
+size_t RGWCivetWeb::send_status(int status, const char *status_name)
+{
+ mg_set_http_status(conn, status);
+
+ static constexpr size_t STATUS_BUF_SIZE = 128;
+
+ char statusbuf[STATUS_BUF_SIZE];
+ const auto statuslen = snprintf(statusbuf, sizeof(statusbuf),
+ "HTTP/1.1 %d %s\r\n", status, status_name);
+
+ return txbuf.sputn(statusbuf, statuslen);
+}
+
+size_t RGWCivetWeb::send_100_continue()
+{
+ const char HTTTP_100_CONTINUE[] = "HTTP/1.1 100 CONTINUE\r\n\r\n";
+ const size_t sent = txbuf.sputn(HTTTP_100_CONTINUE,
+ sizeof(HTTTP_100_CONTINUE) - 1);
+ flush();
+ return sent;
+}
+
+size_t RGWCivetWeb::send_header(const boost::string_ref& name,
+ const boost::string_ref& value)
+{
+ static constexpr char HEADER_SEP[] = ": ";
+ static constexpr char HEADER_END[] = "\r\n";
+
+ size_t sent = 0;
+
+ sent += txbuf.sputn(name.data(), name.length());
+ sent += txbuf.sputn(HEADER_SEP, sizeof(HEADER_SEP) - 1);
+ sent += txbuf.sputn(value.data(), value.length());
+ sent += txbuf.sputn(HEADER_END, sizeof(HEADER_END) - 1);
+
+ return sent;
+}
+
+size_t RGWCivetWeb::dump_date_header()
+{
+ char timestr[TIME_BUF_SIZE];
+
+ const time_t gtime = time(nullptr);
+ struct tm result;
+ struct tm const* const tmp = gmtime_r(&gtime, &result);
+
+ if (nullptr == tmp) {
+ return 0;
+ }
+
+ if (! strftime(timestr, sizeof(timestr),
+ "Date: %a, %d %b %Y %H:%M:%S %Z\r\n", tmp)) {
+ return 0;
+ }
+
+ return txbuf.sputn(timestr, strlen(timestr));
+}
+
+size_t RGWCivetWeb::complete_header()
+{
+ size_t sent = dump_date_header();
+
+ if (explicit_keepalive) {
+ constexpr char CONN_KEEP_ALIVE[] = "Connection: Keep-Alive\r\n";
+ sent += txbuf.sputn(CONN_KEEP_ALIVE, sizeof(CONN_KEEP_ALIVE) - 1);
+ } else if (explicit_conn_close) {
+ constexpr char CONN_KEEP_CLOSE[] = "Connection: close\r\n";
+ sent += txbuf.sputn(CONN_KEEP_CLOSE, sizeof(CONN_KEEP_CLOSE) - 1);
+ }
+
+ static constexpr char HEADER_END[] = "\r\n";
+ sent += txbuf.sputn(HEADER_END, sizeof(HEADER_END) - 1);
+
+ flush();
+ return sent;
+}
+
+size_t RGWCivetWeb::send_content_length(uint64_t len)
+{
+ static constexpr size_t CONLEN_BUF_SIZE = 128;
+
+ char sizebuf[CONLEN_BUF_SIZE];
+ const auto sizelen = snprintf(sizebuf, sizeof(sizebuf),
+ "Content-Length: %" PRIu64 "\r\n", len);
+ return txbuf.sputn(sizebuf, sizelen);
+}
diff --git a/src/rgw/rgw_civetweb.h b/src/rgw/rgw_civetweb.h
new file mode 100644
index 00000000..6a6acd58
--- /dev/null
+++ b/src/rgw/rgw_civetweb.h
@@ -0,0 +1,59 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_MONGOOSE_H
+#define CEPH_RGW_MONGOOSE_H
+#define TIME_BUF_SIZE 128
+
+#include "rgw_client_io.h"
+
+
+struct mg_connection;
+
+class RGWCivetWeb : public rgw::io::RestfulClient,
+ public rgw::io::BuffererSink {
+ RGWEnv env;
+ mg_connection *conn;
+
+ int port;
+
+ bool explicit_keepalive;
+ bool explicit_conn_close;
+ bool got_eof_on_read;
+
+ rgw::io::StaticOutputBufferer<> txbuf;
+
+ size_t write_data(const char *buf, size_t len) override;
+ size_t read_data(char *buf, size_t len);
+ size_t dump_date_header();
+
+public:
+ [[nodiscard]] int init_env(CephContext *cct) override;
+
+ size_t send_status(int status, const char *status_name) override;
+ size_t send_100_continue() override;
+ size_t send_header(const boost::string_ref& name,
+ const boost::string_ref& value) override;
+ size_t send_content_length(uint64_t len) override;
+ size_t complete_header() override;
+
+ size_t recv_body(char* buf, size_t max) override {
+ return read_data(buf, max);
+ }
+
+ size_t send_body(const char* buf, size_t len) override {
+ return write_data(buf, len);
+ }
+
+ size_t complete_request() override;
+
+ void flush() override;
+
+ RGWEnv& get_env() noexcept override {
+ return env;
+ }
+
+ explicit RGWCivetWeb(mg_connection *_conn);
+};
+
+#endif
diff --git a/src/rgw/rgw_civetweb_frontend.cc b/src/rgw/rgw_civetweb_frontend.cc
new file mode 100644
index 00000000..4e9d1ce7
--- /dev/null
+++ b/src/rgw/rgw_civetweb_frontend.cc
@@ -0,0 +1,153 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <set>
+#include <string>
+
+#include <boost/utility/string_ref.hpp>
+
+#include "rgw_frontend.h"
+#include "rgw_client_io_filters.h"
+#include "rgw_dmclock_sync_scheduler.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+namespace dmc = rgw::dmclock;
+
+RGWCivetWebFrontend::RGWCivetWebFrontend(RGWProcessEnv& env,
+ RGWFrontendConfig *conf,
+ dmc::SchedulerCtx& sched_ctx)
+ : conf(conf),
+ ctx(nullptr),
+ env(env)
+{
+
+ auto sched_t = dmc::get_scheduler_t(cct());
+ switch(sched_t){
+ case dmc::scheduler_t::none: [[fallthrough]];
+ case dmc::scheduler_t::throttler:
+ break;
+ case dmc::scheduler_t::dmclock:
+ // TODO: keep track of server ready state and use that here civetweb
+ // internally tracks in the ctx the threads used and free, while it is
+ // expected with the current implementation that the threads waiting on the
+ // queue would still show up in the "used" queue, it might be a useful thing
+ // to make decisions on in the future. Also while reconfiguring we should
+ // probably set this to false
+ auto server_ready_f = []() -> bool { return true; };
+
+ scheduler.reset(new dmc::SyncScheduler(cct(),
+ std::ref(sched_ctx.get_dmc_client_counters()),
+ *sched_ctx.get_dmc_client_config(),
+ server_ready_f,
+ std::ref(dmc::SyncScheduler::handle_request_cb),
+ dmc::AtLimit::Reject));
+ }
+
+}
+
+static int civetweb_callback(struct mg_connection* conn)
+{
+ const struct mg_request_info* const req_info = mg_get_request_info(conn);
+ return static_cast<RGWCivetWebFrontend *>(req_info->user_data)->process(conn);
+}
+
+int RGWCivetWebFrontend::process(struct mg_connection* const conn)
+{
+ /* Hold a read lock over access to env.store for reconfiguration. */
+ RWLock::RLocker lock(env.mutex);
+
+ RGWCivetWeb cw_client(conn);
+ auto real_client_io = rgw::io::add_reordering(
+ rgw::io::add_buffering(dout_context,
+ rgw::io::add_chunking(
+ rgw::io::add_conlen_controlling(
+ &cw_client))));
+ RGWRestfulIO client_io(dout_context, &real_client_io);
+
+ RGWRequest req(env.store->get_new_req_id());
+ int http_ret = 0;
+ //assert (scheduler != nullptr);
+ int ret = process_request(env.store, env.rest, &req, env.uri_prefix,
+ *env.auth_registry, &client_io, env.olog,
+ null_yield, scheduler.get() ,&http_ret);
+ if (ret < 0) {
+ /* We don't really care about return code. */
+ dout(20) << "process_request() returned " << ret << dendl;
+ }
+
+ if (http_ret <= 0) {
+ /* Mark as processed. */
+ return 1;
+ }
+
+ return http_ret;
+}
+
+int RGWCivetWebFrontend::run()
+{
+ auto& conf_map = conf->get_config_map();
+
+ set_conf_default(conf_map, "num_threads",
+ std::to_string(g_conf()->rgw_thread_pool_size));
+ set_conf_default(conf_map, "decode_url", "no");
+ set_conf_default(conf_map, "enable_keep_alive", "yes");
+ set_conf_default(conf_map, "validate_http_method", "no");
+ set_conf_default(conf_map, "canonicalize_url_path", "no");
+ set_conf_default(conf_map, "enable_auth_domain_check", "no");
+ set_conf_default(conf_map, "allow_unicode_in_urls", "yes");
+
+ std::string listening_ports;
+ // support multiple port= entries
+ auto range = conf_map.equal_range("port");
+ for (auto p = range.first; p != range.second; ++p) {
+ std::string port_str = p->second;
+ // support port= entries with multiple values
+ std::replace(port_str.begin(), port_str.end(), '+', ',');
+ if (!listening_ports.empty()) {
+ listening_ports.append(1, ',');
+ }
+ listening_ports.append(port_str);
+ }
+ if (listening_ports.empty()) {
+ listening_ports = "80";
+ }
+ conf_map.emplace("listening_ports", std::move(listening_ports));
+
+ /* Set run_as_user. This will cause civetweb to invoke setuid() and setgid()
+ * based on pw_uid and pw_gid obtained from pw_name. */
+ std::string uid_string = g_ceph_context->get_set_uid_string();
+ if (! uid_string.empty()) {
+ conf_map.emplace("run_as_user", std::move(uid_string));
+ }
+
+ /* Prepare options for CivetWeb. */
+ const std::set<boost::string_ref> rgw_opts = { "port", "prefix" };
+
+ std::vector<const char*> options;
+
+ for (const auto& pair : conf_map) {
+ if (! rgw_opts.count(pair.first)) {
+ /* CivetWeb doesn't understand configurables of the glue layer between
+ * it and RadosGW. We need to strip them out. Otherwise CivetWeb would
+ * signalise an error. */
+ options.push_back(pair.first.c_str());
+ options.push_back(pair.second.c_str());
+
+ dout(20) << "civetweb config: " << pair.first
+ << ": " << pair.second << dendl;
+ }
+ }
+
+ options.push_back(nullptr);
+ /* Initialize the CivetWeb right now. */
+ struct mg_callbacks cb;
+ // FIPS zeroization audit 20191115: this memset is not security related.
+ memset((void *)&cb, 0, sizeof(cb));
+ cb.begin_request = civetweb_callback;
+ cb.log_message = rgw_civetweb_log_callback;
+ cb.log_access = rgw_civetweb_log_access_callback;
+ ctx = mg_start(&cb, this, options.data());
+
+ return ! ctx ? -EIO : 0;
+} /* RGWCivetWebFrontend::run */
diff --git a/src/rgw/rgw_civetweb_log.cc b/src/rgw/rgw_civetweb_log.cc
new file mode 100644
index 00000000..d8a89453
--- /dev/null
+++ b/src/rgw/rgw_civetweb_log.cc
@@ -0,0 +1,24 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/config.h"
+#include "rgw_common.h"
+
+#include "civetweb/civetweb.h"
+#include "rgw_crypt_sanitize.h"
+
+#define dout_subsys ceph_subsys_civetweb
+
+
+#define dout_context g_ceph_context
+int rgw_civetweb_log_callback(const struct mg_connection *conn, const char *buf) {
+ dout(0) << "civetweb: " << (void *)conn << ": " << rgw::crypt_sanitize::log_content(buf) << dendl;
+ return 0;
+}
+
+int rgw_civetweb_log_access_callback(const struct mg_connection *conn, const char *buf) {
+ dout(1) << "civetweb: " << (void *)conn << ": " << rgw::crypt_sanitize::log_content(buf) << dendl;
+ return 0;
+}
+
+
diff --git a/src/rgw/rgw_civetweb_log.h b/src/rgw/rgw_civetweb_log.h
new file mode 100644
index 00000000..2fbd517c
--- /dev/null
+++ b/src/rgw/rgw_civetweb_log.h
@@ -0,0 +1,10 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_CIVETWEB_LOG_H
+#define CEPH_RGW_CIVETWEB_LOG_H
+
+int rgw_civetweb_log_callback(const struct mg_connection *conn, const char *buf);
+int rgw_civetweb_log_access_callback(const struct mg_connection *conn, const char *buf);
+
+#endif
diff --git a/src/rgw/rgw_client_io.cc b/src/rgw/rgw_client_io.cc
new file mode 100644
index 00000000..9528ab6f
--- /dev/null
+++ b/src/rgw/rgw_client_io.cc
@@ -0,0 +1,34 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+
+#include "rgw_client_io.h"
+#include "rgw_crypt.h"
+#include "rgw_crypt_sanitize.h"
+#define dout_subsys ceph_subsys_rgw
+
+namespace rgw {
+namespace io {
+
+[[nodiscard]] int BasicClient::init(CephContext *cct) {
+ int init_error = init_env(cct);
+
+ if (init_error != 0)
+ return init_error;
+
+ if (cct->_conf->subsys.should_gather<ceph_subsys_rgw, 20>()) {
+ const auto& env_map = get_env().get_map();
+
+ for (const auto& iter: env_map) {
+ rgw::crypt_sanitize::env x{iter.first, iter.second};
+ ldout(cct, 20) << iter.first << "=" << (x) << dendl;
+ }
+ }
+ return init_error;
+}
+
+} /* namespace io */
+} /* namespace rgw */
diff --git a/src/rgw/rgw_client_io.h b/src/rgw/rgw_client_io.h
new file mode 100644
index 00000000..1f5af676
--- /dev/null
+++ b/src/rgw/rgw_client_io.h
@@ -0,0 +1,439 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_CLIENT_IO_H
+#define CEPH_RGW_CLIENT_IO_H
+
+#include <exception>
+#include <string>
+#include <streambuf>
+#include <istream>
+#include <stdlib.h>
+#include <system_error>
+
+#include <boost/utility/string_ref.hpp>
+
+#include "include/types.h"
+#include "rgw_common.h"
+
+
+class RGWRestfulIO;
+
+namespace rgw {
+namespace io {
+
+using Exception = std::system_error;
+
+/* The minimal and simplest subset of methods that a client of RadosGW can be
+ * interacted with. */
+class BasicClient {
+protected:
+ virtual int init_env(CephContext *cct) = 0;
+
+public:
+ virtual ~BasicClient() = default;
+
+ /* Initialize the BasicClient and inject CephContext. */
+ int init(CephContext *cct);
+
+ /* Return the RGWEnv describing the environment that a given request lives in.
+ * The method does not throw exceptions. */
+ virtual RGWEnv& get_env() noexcept = 0;
+
+ /* Complete request.
+ * On success returns number of bytes generated for a direct client of RadosGW.
+ * On failure throws rgw::io::Exception containing errno. */
+ virtual size_t complete_request() = 0;
+}; /* rgw::io::Client */
+
+
+class Accounter {
+public:
+ virtual ~Accounter() = default;
+
+ /* Enable or disable the accounting of both sent and received data. Changing
+ * the state does not affect the counters. */
+ virtual void set_account(bool enabled) = 0;
+
+ /* Return number of bytes sent to a direct client of RadosGW (direct means
+ * eg. a web server instance in the case of using FastCGI front-end) when
+ * the accounting was enabled. */
+ virtual uint64_t get_bytes_sent() const = 0;
+
+ /* Return number of bytes received from a direct client of RadosGW (direct
+ * means eg. a web server instance in the case of using FastCGI front-end)
+ * when the accounting was enabled. */
+ virtual uint64_t get_bytes_received() const = 0;
+}; /* rgw::io::Accounter */
+
+
+/* Interface abstracting restful interactions with clients, usually through
+ * the HTTP protocol. The methods participating in the response generation
+ * process should be called in the specific order:
+ * 1. send_100_continue() - at most once,
+ * 2. send_status() - exactly once,
+ * 3. Any of:
+ * a. send_header(),
+ * b. send_content_length() XOR send_chunked_transfer_encoding()
+ * Please note that only one of those two methods must be called
+ at most once.
+ * 4. complete_header() - exactly once,
+ * 5. send_body()
+ * 6. complete_request() - exactly once.
+ * There are no restrictions on flush() - it may be called in any moment.
+ *
+ * Receiving data from a client isn't a subject to any further call order
+ * restrictions besides those imposed by BasicClient. That is, get_env()
+ * and recv_body can be mixed. */
+class RestfulClient : public BasicClient {
+ template<typename T> friend class DecoratedRestfulClient;
+
+public:
+ /* Generate the 100 Continue message.
+ * On success returns number of bytes generated for a direct client of RadosGW.
+ * On failure throws rgw::io::Exception containing errno. */
+ virtual size_t send_100_continue() = 0;
+
+ /* Generate the response's status part taking the HTTP status code as @status
+ * and its name pointed in @status_name.
+ * On success returns number of bytes generated for a direct client of RadosGW.
+ * On failure throws rgw::io::Exception containing errno. */
+ virtual size_t send_status(int status, const char *status_name) = 0;
+
+ /* Generate header. On success returns number of bytes generated for a direct
+ * client of RadosGW. On failure throws rgw::io::Exception containing errno.
+ *
+ * boost::string_ref is being used because of length it internally carries. */
+ virtual size_t send_header(const boost::string_ref& name,
+ const boost::string_ref& value) = 0;
+
+ /* Inform a client about a content length. Takes number of bytes as @len.
+ * On success returns number of bytes generated for a direct client of RadosGW.
+ * On failure throws rgw::io::Exception containing errno.
+ *
+ * CALL LIMITATIONS:
+ * - The method must be called EXACTLY ONCE.
+ * - The method is interchangeable with send_chunked_transfer_encoding(). */
+ virtual size_t send_content_length(uint64_t len) = 0;
+
+ /* Inform a client that the chunked transfer encoding will be used.
+ * On success returns number of bytes generated for a direct client of RadosGW.
+ * On failure throws rgw::io::Exception containing errno.
+ *
+ * CALL LIMITATIONS:
+ * - The method must be called EXACTLY ONCE.
+ * - The method is interchangeable with send_content_length(). */
+ virtual size_t send_chunked_transfer_encoding() {
+ /* This is a null implementation. We don't send anything here, even the HTTP
+ * header. The intended behaviour should be provided through a decorator or
+ * directly by a given front-end. */
+ return 0;
+ }
+
+ /* Generate completion (the CRLF sequence separating headers and body in
+ * the case of HTTP) of headers. On success returns number of generated bytes
+ * for a direct client of RadosGW. On failure throws rgw::io::Exception with
+ * errno. */
+ virtual size_t complete_header() = 0;
+
+ /* Receive no more than @max bytes from a request's body and store it in
+ * buffer pointed by @buf. On success returns number of bytes received from
+ * a direct client of RadosGW that has been stored in @buf. On failure throws
+ * rgw::io::Exception containing errno. */
+ virtual size_t recv_body(char* buf, size_t max) = 0;
+
+ /* Generate a part of response's body by taking exactly @len bytes from
+ * the buffer pointed by @buf. On success returns number of generated bytes
+ * of response's body. On failure throws rgw::io::Exception. */
+ virtual size_t send_body(const char* buf, size_t len) = 0;
+
+ /* Flushes all already generated data to a direct client of RadosGW.
+ * On failure throws rgw::io::Exception containing errno. */
+ virtual void flush() = 0;
+} /* rgw::io::RestfulClient */;
+
+
+/* Abstract decorator over any implementation of rgw::io::RestfulClient
+ * which could be provided both as a pointer-to-object or the object itself. */
+template <typename DecorateeT>
+class DecoratedRestfulClient : public RestfulClient {
+ template<typename T> friend class DecoratedRestfulClient;
+ friend RGWRestfulIO;
+
+ typedef typename std::remove_pointer<DecorateeT>::type DerefedDecorateeT;
+
+ static_assert(std::is_base_of<RestfulClient, DerefedDecorateeT>::value,
+ "DecorateeT must be a subclass of rgw::io::RestfulClient");
+
+ DecorateeT decoratee;
+
+ /* There is an indirection layer over accessing decoratee to share the same
+ * code base between dynamic and static decorators. The difference is about
+ * what we store internally: pointer to a decorated object versus the whole
+ * object itself. */
+ template <typename T = void,
+ typename std::enable_if<
+ ! std::is_pointer<DecorateeT>::value, T>::type* = nullptr>
+ DerefedDecorateeT& get_decoratee() {
+ return decoratee;
+ }
+
+protected:
+ template <typename T = void,
+ typename std::enable_if<
+ std::is_pointer<DecorateeT>::value, T>::type* = nullptr>
+ DerefedDecorateeT& get_decoratee() {
+ return *decoratee;
+ }
+
+ /* Dynamic decorators (those storing a pointer instead of the decorated
+ * object itself) can be reconfigured on-the-fly. HOWEVER: there are no
+ * facilities for orchestrating such changes. Callers must take care of
+ * atomicity and thread-safety. */
+ template <typename T = void,
+ typename std::enable_if<
+ std::is_pointer<DecorateeT>::value, T>::type* = nullptr>
+ void set_decoratee(DerefedDecorateeT& new_dec) {
+ decoratee = &new_dec;
+ }
+
+ int init_env(CephContext *cct) override {
+ return get_decoratee().init_env(cct);
+ }
+
+public:
+ explicit DecoratedRestfulClient(DecorateeT&& decoratee)
+ : decoratee(std::forward<DecorateeT>(decoratee)) {
+ }
+
+ size_t send_status(const int status,
+ const char* const status_name) override {
+ return get_decoratee().send_status(status, status_name);
+ }
+
+ size_t send_100_continue() override {
+ return get_decoratee().send_100_continue();
+ }
+
+ size_t send_header(const boost::string_ref& name,
+ const boost::string_ref& value) override {
+ return get_decoratee().send_header(name, value);
+ }
+
+ size_t send_content_length(const uint64_t len) override {
+ return get_decoratee().send_content_length(len);
+ }
+
+ size_t send_chunked_transfer_encoding() override {
+ return get_decoratee().send_chunked_transfer_encoding();
+ }
+
+ size_t complete_header() override {
+ return get_decoratee().complete_header();
+ }
+
+ size_t recv_body(char* const buf, const size_t max) override {
+ return get_decoratee().recv_body(buf, max);
+ }
+
+ size_t send_body(const char* const buf,
+ const size_t len) override {
+ return get_decoratee().send_body(buf, len);
+ }
+
+ void flush() override {
+ return get_decoratee().flush();
+ }
+
+ RGWEnv& get_env() noexcept override {
+ return get_decoratee().get_env();
+ }
+
+ size_t complete_request() override {
+ return get_decoratee().complete_request();
+ }
+} /* rgw::io::DecoratedRestfulClient */;
+
+
+/* Interface that should be provided by a front-end class wanting to to use
+ * the low-level buffering offered by i.e. StaticOutputBufferer. */
+class BuffererSink {
+public:
+ virtual ~BuffererSink() = default;
+
+ /* Send exactly @len bytes from the memory location pointed by @buf.
+ * On success returns @len. On failure throws rgw::io::Exception. */
+ virtual size_t write_data(const char *buf, size_t len) = 0;
+};
+
+/* Utility class providing RestfulClient's implementations with facilities
+ * for low-level buffering without relying on dynamic memory allocations.
+ * The buffer is carried entirely on stack. This narrows down applicability
+ * to these situations where buffers are relatively small. This perfectly
+ * fits the needs of composing an HTTP header. Without that a front-end
+ * might need to issue a lot of small IO operations leading to increased
+ * overhead on syscalls and fragmentation of a message if the Nagle's
+ * algorithm won't be able to form a single TCP segment (usually when
+ * running on extremely fast network interfaces like the loopback). */
+template <size_t BufferSizeV = 4096>
+class StaticOutputBufferer : public std::streambuf {
+ static_assert(BufferSizeV >= sizeof(std::streambuf::char_type),
+ "Buffer size must be bigger than a single char_type.");
+
+ using std::streambuf::int_type;
+
+ int_type overflow(const int_type c) override {
+ *pptr() = c;
+ pbump(sizeof(std::streambuf::char_type));
+
+ if (! sync()) {
+ /* No error, the buffer has been successfully synchronized. */
+ return c;
+ } else {
+ return std::streambuf::traits_type::eof();
+ }
+ }
+
+ int sync() override {
+ const auto len = static_cast<size_t>(std::streambuf::pptr() -
+ std::streambuf::pbase());
+ std::streambuf::pbump(-len);
+ sink.write_data(std::streambuf::pbase(), len);
+ /* Always return success here. In case of failure write_data() will throw
+ * rgw::io::Exception. */
+ return 0;
+ }
+
+ BuffererSink& sink;
+ std::streambuf::char_type buffer[BufferSizeV];
+
+public:
+ explicit StaticOutputBufferer(BuffererSink& sink)
+ : sink(sink) {
+ constexpr size_t len = sizeof(buffer) - sizeof(std::streambuf::char_type);
+ std::streambuf::setp(buffer, buffer + len);
+ }
+};
+
+} /* namespace io */
+} /* namespace rgw */
+
+
+/* We're doing this nasty thing only because of extensive usage of templates
+ * to implement the static decorator pattern. C++ templates de facto enforce
+ * mixing interfaces with implementation. Additionally, those classes derive
+ * from RGWRestfulIO defined here. I believe that including in the middle of
+ * file is still better than polluting it directly. */
+#include "rgw_client_io_filters.h"
+
+
+/* RGWRestfulIO: high level interface to interact with RESTful clients. What
+ * differentiates it from rgw::io::RestfulClient is providing more specific APIs
+ * like rgw::io::Accounter or the AWS Auth v4 stuff implemented by filters
+ * while hiding the pipelined architecture from clients.
+ *
+ * rgw::io::Accounter came in as a part of rgw::io::AccountingFilter. */
+class RGWRestfulIO : public rgw::io::AccountingFilter<rgw::io::RestfulClient*> {
+ std::vector<std::shared_ptr<DecoratedRestfulClient>> filters;
+
+public:
+ ~RGWRestfulIO() override = default;
+
+ RGWRestfulIO(CephContext *_cx, rgw::io::RestfulClient* engine)
+ : AccountingFilter<rgw::io::RestfulClient*>(_cx, std::move(engine)) {
+ }
+
+ void add_filter(std::shared_ptr<DecoratedRestfulClient> new_filter) {
+ new_filter->set_decoratee(this->get_decoratee());
+ this->set_decoratee(*new_filter);
+ filters.emplace_back(std::move(new_filter));
+ }
+}; /* RGWRestfulIO */
+
+
+/* Type conversions to work around lack of req_state type hierarchy matching
+ * (e.g.) REST backends (may be replaced w/dynamic typed req_state). */
+static inline rgw::io::RestfulClient* RESTFUL_IO(struct req_state* s) {
+ ceph_assert(dynamic_cast<rgw::io::RestfulClient*>(s->cio) != nullptr);
+
+ return static_cast<rgw::io::RestfulClient*>(s->cio);
+}
+
+static inline rgw::io::Accounter* ACCOUNTING_IO(struct req_state* s) {
+ auto ptr = dynamic_cast<rgw::io::Accounter*>(s->cio);
+ ceph_assert(ptr != nullptr);
+
+ return ptr;
+}
+
+static inline RGWRestfulIO* AWS_AUTHv4_IO(const req_state* const s) {
+ ceph_assert(dynamic_cast<RGWRestfulIO*>(s->cio) != nullptr);
+
+ return static_cast<RGWRestfulIO*>(s->cio);
+}
+
+
+class RGWClientIOStreamBuf : public std::streambuf {
+protected:
+ RGWRestfulIO &rio;
+ size_t const window_size;
+ size_t const putback_size;
+ std::vector<char> buffer;
+
+public:
+ RGWClientIOStreamBuf(RGWRestfulIO &rio, size_t ws, size_t ps = 1)
+ : rio(rio),
+ window_size(ws),
+ putback_size(ps),
+ buffer(ws + ps)
+ {
+ setg(nullptr, nullptr, nullptr);
+ }
+
+ std::streambuf::int_type underflow() override {
+ if (gptr() < egptr()) {
+ return traits_type::to_int_type(*gptr());
+ }
+
+ char * const base = buffer.data();
+ char * start;
+
+ if (nullptr != eback()) {
+ /* We need to skip moving bytes on first underflow. In such case
+ * there is simply no previous data we should preserve for unget()
+ * or something similar. */
+ std::memmove(base, egptr() - putback_size, putback_size);
+ start = base + putback_size;
+ } else {
+ start = base;
+ }
+
+ size_t read_len = 0;
+ try {
+ read_len = rio.recv_body(base, window_size);
+ } catch (rgw::io::Exception&) {
+ return traits_type::eof();
+ }
+ if (0 == read_len) {
+ return traits_type::eof();
+ }
+
+ setg(base, start, start + read_len);
+
+ return traits_type::to_int_type(*gptr());
+ }
+};
+
+class RGWClientIOStream : private RGWClientIOStreamBuf, public std::istream {
+/* Inheritance from RGWClientIOStreamBuf is a kind of shadow, undirect
+ * form of composition here. We cannot do that explicitly because istream
+ * ctor is being called prior to construction of any member of this class. */
+
+public:
+ explicit RGWClientIOStream(RGWRestfulIO &s)
+ : RGWClientIOStreamBuf(s, 1, 2),
+ istream(static_cast<RGWClientIOStreamBuf *>(this)) {
+ }
+};
+
+#endif /* CEPH_RGW_CLIENT_IO_H */
diff --git a/src/rgw/rgw_client_io_filters.h b/src/rgw/rgw_client_io_filters.h
new file mode 100644
index 00000000..9ce83a93
--- /dev/null
+++ b/src/rgw/rgw_client_io_filters.h
@@ -0,0 +1,456 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_CLIENT_IO_DECOIMPL_H
+#define CEPH_RGW_CLIENT_IO_DECOIMPL_H
+
+#include <type_traits>
+
+#include <boost/optional.hpp>
+
+#include "rgw_common.h"
+#include "rgw_client_io.h"
+
+namespace rgw {
+namespace io {
+
+template <typename T>
+class AccountingFilter : public DecoratedRestfulClient<T>,
+ public Accounter {
+ bool enabled;
+ uint64_t total_sent;
+ uint64_t total_received;
+ CephContext *cct;
+
+public:
+ template <typename U>
+ AccountingFilter(CephContext *cct, U&& decoratee)
+ : DecoratedRestfulClient<T>(std::forward<U>(decoratee)),
+ enabled(false),
+ total_sent(0),
+ total_received(0), cct(cct) {
+ }
+
+ size_t send_status(const int status,
+ const char* const status_name) override {
+ const auto sent = DecoratedRestfulClient<T>::send_status(status,
+ status_name);
+ lsubdout(cct, rgw, 30) << "AccountingFilter::send_status: e="
+ << (enabled ? "1" : "0") << ", sent=" << sent << ", total="
+ << total_sent << dendl;
+ if (enabled) {
+ total_sent += sent;
+ }
+ return sent;
+ }
+
+ size_t send_100_continue() override {
+ const auto sent = DecoratedRestfulClient<T>::send_100_continue();
+ lsubdout(cct, rgw, 30) << "AccountingFilter::send_100_continue: e="
+ << (enabled ? "1" : "0") << ", sent=" << sent << ", total="
+ << total_sent << dendl;
+ if (enabled) {
+ total_sent += sent;
+ }
+ return sent;
+ }
+
+ size_t send_header(const boost::string_ref& name,
+ const boost::string_ref& value) override {
+ const auto sent = DecoratedRestfulClient<T>::send_header(name, value);
+ lsubdout(cct, rgw, 30) << "AccountingFilter::send_header: e="
+ << (enabled ? "1" : "0") << ", sent=" << sent << ", total="
+ << total_sent << dendl;
+ if (enabled) {
+ total_sent += sent;
+ }
+ return sent;
+ }
+
+ size_t send_content_length(const uint64_t len) override {
+ const auto sent = DecoratedRestfulClient<T>::send_content_length(len);
+ lsubdout(cct, rgw, 30) << "AccountingFilter::send_content_length: e="
+ << (enabled ? "1" : "0") << ", sent=" << sent << ", total="
+ << total_sent << dendl;
+ if (enabled) {
+ total_sent += sent;
+ }
+ return sent;
+ }
+
+ size_t send_chunked_transfer_encoding() override {
+ const auto sent = DecoratedRestfulClient<T>::send_chunked_transfer_encoding();
+ lsubdout(cct, rgw, 30) << "AccountingFilter::send_chunked_transfer_encoding: e="
+ << (enabled ? "1" : "0") << ", sent=" << sent << ", total="
+ << total_sent << dendl;
+ if (enabled) {
+ total_sent += sent;
+ }
+ return sent;
+ }
+
+ size_t complete_header() override {
+ const auto sent = DecoratedRestfulClient<T>::complete_header();
+ lsubdout(cct, rgw, 30) << "AccountingFilter::complete_header: e="
+ << (enabled ? "1" : "0") << ", sent=" << sent << ", total="
+ << total_sent << dendl;
+ if (enabled) {
+ total_sent += sent;
+ }
+ return sent;
+ }
+
+ size_t recv_body(char* buf, size_t max) override {
+ const auto received = DecoratedRestfulClient<T>::recv_body(buf, max);
+ lsubdout(cct, rgw, 30) << "AccountingFilter::recv_body: e="
+ << (enabled ? "1" : "0") << ", received=" << received << dendl;
+ if (enabled) {
+ total_received += received;
+ }
+ return received;
+ }
+
+ size_t send_body(const char* const buf,
+ const size_t len) override {
+ const auto sent = DecoratedRestfulClient<T>::send_body(buf, len);
+ lsubdout(cct, rgw, 30) << "AccountingFilter::send_body: e="
+ << (enabled ? "1" : "0") << ", sent=" << sent << ", total="
+ << total_sent << dendl;
+ if (enabled) {
+ total_sent += sent;
+ }
+ return sent;
+ }
+
+ size_t complete_request() override {
+ const auto sent = DecoratedRestfulClient<T>::complete_request();
+ lsubdout(cct, rgw, 30) << "AccountingFilter::complete_request: e="
+ << (enabled ? "1" : "0") << ", sent=" << sent << ", total="
+ << total_sent << dendl;
+ if (enabled) {
+ total_sent += sent;
+ }
+ return sent;
+ }
+
+ uint64_t get_bytes_sent() const override {
+ return total_sent;
+ }
+
+ uint64_t get_bytes_received() const override {
+ return total_received;
+ }
+
+ void set_account(bool enabled) override {
+ this->enabled = enabled;
+ lsubdout(cct, rgw, 30) << "AccountingFilter::set_account: e="
+ << (enabled ? "1" : "0") << dendl;
+ }
+};
+
+
+/* Filter for in-memory buffering incoming data and calculating the content
+ * length header if it isn't present. */
+template <typename T>
+class BufferingFilter : public DecoratedRestfulClient<T> {
+ template<typename Td> friend class DecoratedRestfulClient;
+protected:
+ ceph::bufferlist data;
+
+ bool has_content_length;
+ bool buffer_data;
+ CephContext *cct;
+
+public:
+ template <typename U>
+ BufferingFilter(CephContext *cct, U&& decoratee)
+ : DecoratedRestfulClient<T>(std::forward<U>(decoratee)),
+ has_content_length(false),
+ buffer_data(false), cct(cct) {
+ }
+
+ size_t send_content_length(const uint64_t len) override;
+ size_t send_chunked_transfer_encoding() override;
+ size_t complete_header() override;
+ size_t send_body(const char* buf, size_t len) override;
+ size_t complete_request() override;
+};
+
+template <typename T>
+size_t BufferingFilter<T>::send_body(const char* const buf,
+ const size_t len)
+{
+ if (buffer_data) {
+ data.append(buf, len);
+
+ lsubdout(cct, rgw, 30) << "BufferingFilter<T>::send_body: defer count = "
+ << len << dendl;
+ return 0;
+ }
+
+ return DecoratedRestfulClient<T>::send_body(buf, len);
+}
+
+template <typename T>
+size_t BufferingFilter<T>::send_content_length(const uint64_t len)
+{
+ has_content_length = true;
+ return DecoratedRestfulClient<T>::send_content_length(len);
+}
+
+template <typename T>
+size_t BufferingFilter<T>::send_chunked_transfer_encoding()
+{
+ has_content_length = true;
+ return DecoratedRestfulClient<T>::send_chunked_transfer_encoding();
+}
+
+template <typename T>
+size_t BufferingFilter<T>::complete_header()
+{
+ if (! has_content_length) {
+ /* We will dump everything in complete_request(). */
+ buffer_data = true;
+ lsubdout(cct, rgw, 30) << "BufferingFilter<T>::complete_header: has_content_length="
+ << (has_content_length ? "1" : "0") << dendl;
+ return 0;
+ }
+
+ return DecoratedRestfulClient<T>::complete_header();
+}
+
+template <typename T>
+size_t BufferingFilter<T>::complete_request()
+{
+ size_t sent = 0;
+
+ if (! has_content_length) {
+ /* It is not correct to count these bytes here,
+ * because they can only be part of the header.
+ * Therefore force count to 0.
+ */
+ sent += DecoratedRestfulClient<T>::send_content_length(data.length());
+ sent += DecoratedRestfulClient<T>::complete_header();
+ lsubdout(cct, rgw, 30) <<
+ "BufferingFilter::complete_request: !has_content_length: IGNORE: sent="
+ << sent << dendl;
+ sent = 0;
+ }
+
+ if (buffer_data) {
+ /* We are sending each buffer separately to avoid extra memory shuffling
+ * that would occur on data.c_str() to provide a continuous memory area. */
+ for (const auto& ptr : data.buffers()) {
+ sent += DecoratedRestfulClient<T>::send_body(ptr.c_str(),
+ ptr.length());
+ }
+ data.clear();
+ buffer_data = false;
+ lsubdout(cct, rgw, 30) << "BufferingFilter::complete_request: buffer_data: sent="
+ << sent << dendl;
+ }
+
+ return sent + DecoratedRestfulClient<T>::complete_request();
+}
+
+template <typename T> static inline
+BufferingFilter<T> add_buffering(
+CephContext *cct,
+T&& t) {
+ return BufferingFilter<T>(cct, std::forward<T>(t));
+}
+
+
+template <typename T>
+class ChunkingFilter : public DecoratedRestfulClient<T> {
+ template<typename Td> friend class DecoratedRestfulClient;
+protected:
+ bool chunking_enabled;
+
+public:
+ template <typename U>
+ explicit ChunkingFilter(U&& decoratee)
+ : DecoratedRestfulClient<T>(std::forward<U>(decoratee)),
+ chunking_enabled(false) {
+ }
+
+ size_t send_chunked_transfer_encoding() override {
+ chunking_enabled = true;
+ return DecoratedRestfulClient<T>::send_header("Transfer-Encoding",
+ "chunked");
+ }
+
+ size_t send_body(const char* buf,
+ const size_t len) override {
+ if (! chunking_enabled) {
+ return DecoratedRestfulClient<T>::send_body(buf, len);
+ } else {
+ static constexpr char HEADER_END[] = "\r\n";
+ /* https://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.6.1 */
+ // TODO: we have no support for sending chunked-encoding
+ // extensions/trailing headers.
+ char chunk_size[32];
+ const auto chunk_size_len = snprintf(chunk_size, sizeof(chunk_size),
+ "%" PRIx64 "\r\n", len);
+ size_t sent = 0;
+
+ sent += DecoratedRestfulClient<T>::send_body(chunk_size, chunk_size_len);
+ sent += DecoratedRestfulClient<T>::send_body(buf, len);
+ sent += DecoratedRestfulClient<T>::send_body(HEADER_END,
+ sizeof(HEADER_END) - 1);
+ return sent;
+ }
+ }
+
+ size_t complete_request() override {
+ size_t sent = 0;
+
+ if (chunking_enabled) {
+ static constexpr char CHUNKED_RESP_END[] = "0\r\n\r\n";
+ sent += DecoratedRestfulClient<T>::send_body(CHUNKED_RESP_END,
+ sizeof(CHUNKED_RESP_END) - 1);
+ }
+
+ return sent + DecoratedRestfulClient<T>::complete_request();
+ }
+};
+
+template <typename T> static inline
+ChunkingFilter<T> add_chunking(T&& t) {
+ return ChunkingFilter<T>(std::forward<T>(t));
+}
+
+
+/* Class that controls and inhibits the process of sending Content-Length HTTP
+ * header where RFC 7230 requests so. The cases worth our attention are 204 No
+ * Content as well as 304 Not Modified. */
+template <typename T>
+class ConLenControllingFilter : public DecoratedRestfulClient<T> {
+protected:
+ enum class ContentLengthAction {
+ FORWARD,
+ INHIBIT,
+ UNKNOWN
+ } action;
+
+public:
+ template <typename U>
+ explicit ConLenControllingFilter(U&& decoratee)
+ : DecoratedRestfulClient<T>(std::forward<U>(decoratee)),
+ action(ContentLengthAction::UNKNOWN) {
+ }
+
+ size_t send_status(const int status,
+ const char* const status_name) override {
+ if ((204 == status || 304 == status) &&
+ ! g_conf()->rgw_print_prohibited_content_length) {
+ action = ContentLengthAction::INHIBIT;
+ } else {
+ action = ContentLengthAction::FORWARD;
+ }
+
+ return DecoratedRestfulClient<T>::send_status(status, status_name);
+ }
+
+ size_t send_content_length(const uint64_t len) override {
+ switch(action) {
+ case ContentLengthAction::FORWARD:
+ return DecoratedRestfulClient<T>::send_content_length(len);
+ case ContentLengthAction::INHIBIT:
+ return 0;
+ case ContentLengthAction::UNKNOWN:
+ default:
+ return -EINVAL;
+ }
+ }
+};
+
+template <typename T> static inline
+ConLenControllingFilter<T> add_conlen_controlling(T&& t) {
+ return ConLenControllingFilter<T>(std::forward<T>(t));
+}
+
+
+/* Filter that rectifies the wrong behaviour of some clients of the RGWRestfulIO
+ * interface. Should be removed after fixing those clients. */
+template <typename T>
+class ReorderingFilter : public DecoratedRestfulClient<T> {
+protected:
+ enum class ReorderState {
+ RGW_EARLY_HEADERS, /* Got headers sent before calling send_status. */
+ RGW_STATUS_SEEN, /* Status has been seen. */
+ RGW_DATA /* Header has been completed. */
+ } phase;
+
+ boost::optional<uint64_t> content_length;
+
+ std::vector<std::pair<std::string, std::string>> headers;
+
+ size_t send_header(const boost::string_ref& name,
+ const boost::string_ref& value) override {
+ switch (phase) {
+ case ReorderState::RGW_EARLY_HEADERS:
+ case ReorderState::RGW_STATUS_SEEN:
+ headers.emplace_back(std::make_pair(std::string(name.data(), name.size()),
+ std::string(value.data(), value.size())));
+ return 0;
+ case ReorderState::RGW_DATA:
+ return DecoratedRestfulClient<T>::send_header(name, value);
+ }
+
+ return -EIO;
+ }
+
+public:
+ template <typename U>
+ explicit ReorderingFilter(U&& decoratee)
+ : DecoratedRestfulClient<T>(std::forward<U>(decoratee)),
+ phase(ReorderState::RGW_EARLY_HEADERS) {
+ }
+
+ size_t send_status(const int status,
+ const char* const status_name) override {
+ phase = ReorderState::RGW_STATUS_SEEN;
+
+ return DecoratedRestfulClient<T>::send_status(status, status_name);
+ }
+
+ size_t send_content_length(const uint64_t len) override {
+ if (ReorderState::RGW_EARLY_HEADERS == phase) {
+ /* Oh great, someone tries to send content length before status. */
+ content_length = len;
+ return 0;
+ } else {
+ return DecoratedRestfulClient<T>::send_content_length(len);
+ }
+ }
+
+ size_t complete_header() override {
+ size_t sent = 0;
+
+ /* Change state in order to immediately send everything we get. */
+ phase = ReorderState::RGW_DATA;
+
+ /* Sent content length if necessary. */
+ if (content_length) {
+ sent += DecoratedRestfulClient<T>::send_content_length(*content_length);
+ }
+
+ /* Header data in buffers are already counted. */
+ for (const auto& kv : headers) {
+ sent += DecoratedRestfulClient<T>::send_header(kv.first, kv.second);
+ }
+ headers.clear();
+
+ return sent + DecoratedRestfulClient<T>::complete_header();
+ }
+};
+
+template <typename T> static inline
+ReorderingFilter<T> add_reordering(T&& t) {
+ return ReorderingFilter<T>(std::forward<T>(t));
+}
+
+} /* namespace io */
+} /* namespace rgw */
+#endif /* CEPH_RGW_CLIENT_IO_DECOIMPL_H */
diff --git a/src/rgw/rgw_common.cc b/src/rgw/rgw_common.cc
new file mode 100644
index 00000000..567b80ca
--- /dev/null
+++ b/src/rgw/rgw_common.cc
@@ -0,0 +1,1921 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <errno.h>
+#include <vector>
+#include <algorithm>
+#include <string>
+#include <boost/tokenizer.hpp>
+
+#include "json_spirit/json_spirit.h"
+#include "common/ceph_json.h"
+
+#include "rgw_op.h"
+#include "rgw_common.h"
+#include "rgw_acl.h"
+#include "rgw_string.h"
+#include "rgw_rados.h"
+#include "rgw_http_errors.h"
+#include "rgw_arn.h"
+
+#include "common/ceph_crypto.h"
+#include "common/armor.h"
+#include "common/errno.h"
+#include "common/Clock.h"
+#include "common/Formatter.h"
+#include "common/convenience.h"
+#include "common/strtol.h"
+#include "include/str_list.h"
+#include "rgw_crypt_sanitize.h"
+
+#include <sstream>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+using rgw::ARN;
+using rgw::IAM::Effect;
+using rgw::IAM::op_to_perm;
+using rgw::IAM::Policy;
+
+const uint32_t RGWBucketInfo::NUM_SHARDS_BLIND_BUCKET(UINT32_MAX);
+
+rgw_http_errors rgw_http_s3_errors({
+ { 0, {200, "" }},
+ { STATUS_CREATED, {201, "Created" }},
+ { STATUS_ACCEPTED, {202, "Accepted" }},
+ { STATUS_NO_CONTENT, {204, "NoContent" }},
+ { STATUS_PARTIAL_CONTENT, {206, "" }},
+ { ERR_PERMANENT_REDIRECT, {301, "PermanentRedirect" }},
+ { ERR_WEBSITE_REDIRECT, {301, "WebsiteRedirect" }},
+ { STATUS_REDIRECT, {303, "" }},
+ { ERR_NOT_MODIFIED, {304, "NotModified" }},
+ { EINVAL, {400, "InvalidArgument" }},
+ { ERR_INVALID_REQUEST, {400, "InvalidRequest" }},
+ { ERR_INVALID_DIGEST, {400, "InvalidDigest" }},
+ { ERR_BAD_DIGEST, {400, "BadDigest" }},
+ { ERR_INVALID_LOCATION_CONSTRAINT, {400, "InvalidLocationConstraint" }},
+ { ERR_ZONEGROUP_DEFAULT_PLACEMENT_MISCONFIGURATION, {400, "ZonegroupDefaultPlacementMisconfiguration" }},
+ { ERR_INVALID_BUCKET_NAME, {400, "InvalidBucketName" }},
+ { ERR_INVALID_OBJECT_NAME, {400, "InvalidObjectName" }},
+ { ERR_UNRESOLVABLE_EMAIL, {400, "UnresolvableGrantByEmailAddress" }},
+ { ERR_INVALID_PART, {400, "InvalidPart" }},
+ { ERR_INVALID_PART_ORDER, {400, "InvalidPartOrder" }},
+ { ERR_REQUEST_TIMEOUT, {400, "RequestTimeout" }},
+ { ERR_TOO_LARGE, {400, "EntityTooLarge" }},
+ { ERR_TOO_SMALL, {400, "EntityTooSmall" }},
+ { ERR_TOO_MANY_BUCKETS, {400, "TooManyBuckets" }},
+ { ERR_MALFORMED_XML, {400, "MalformedXML" }},
+ { ERR_AMZ_CONTENT_SHA256_MISMATCH, {400, "XAmzContentSHA256Mismatch" }},
+ { ERR_MALFORMED_DOC, {400, "MalformedPolicyDocument"}},
+ { ERR_INVALID_TAG, {400, "InvalidTag"}},
+ { ERR_MALFORMED_ACL_ERROR, {400, "MalformedACLError" }},
+ { ERR_INVALID_CORS_RULES_ERROR, {400, "InvalidRequest" }},
+ { ERR_INVALID_WEBSITE_ROUTING_RULES_ERROR, {400, "InvalidRequest" }},
+ { ERR_INVALID_ENCRYPTION_ALGORITHM, {400, "InvalidEncryptionAlgorithmError" }},
+ { ERR_INVALID_RETENTION_PERIOD,{400, "InvalidRetentionPeriod"}},
+ { ERR_LENGTH_REQUIRED, {411, "MissingContentLength" }},
+ { EACCES, {403, "AccessDenied" }},
+ { EPERM, {403, "AccessDenied" }},
+ { ERR_SIGNATURE_NO_MATCH, {403, "SignatureDoesNotMatch" }},
+ { ERR_INVALID_ACCESS_KEY, {403, "InvalidAccessKeyId" }},
+ { ERR_USER_SUSPENDED, {403, "UserSuspended" }},
+ { ERR_REQUEST_TIME_SKEWED, {403, "RequestTimeTooSkewed" }},
+ { ERR_QUOTA_EXCEEDED, {403, "QuotaExceeded" }},
+ { ERR_MFA_REQUIRED, {403, "AccessDenied" }},
+ { ENOENT, {404, "NoSuchKey" }},
+ { ERR_NO_SUCH_BUCKET, {404, "NoSuchBucket" }},
+ { ERR_NO_SUCH_WEBSITE_CONFIGURATION, {404, "NoSuchWebsiteConfiguration" }},
+ { ERR_NO_SUCH_UPLOAD, {404, "NoSuchUpload" }},
+ { ERR_NOT_FOUND, {404, "Not Found"}},
+ { ERR_NO_SUCH_LC, {404, "NoSuchLifecycleConfiguration"}},
+ { ERR_NO_SUCH_BUCKET_POLICY, {404, "NoSuchBucketPolicy"}},
+ { ERR_NO_SUCH_USER, {404, "NoSuchUser"}},
+ { ERR_NO_ROLE_FOUND, {404, "NoSuchEntity"}},
+ { ERR_NO_CORS_FOUND, {404, "NoSuchCORSConfiguration"}},
+ { ERR_NO_SUCH_SUBUSER, {404, "NoSuchSubUser"}},
+ { ERR_NO_SUCH_ENTITY, {404, "NoSuchEntity"}},
+ { ERR_NO_SUCH_CORS_CONFIGURATION, {404, "NoSuchCORSConfiguration"}},
+ { ERR_NO_SUCH_OBJECT_LOCK_CONFIGURATION, {404, "ObjectLockConfigurationNotFoundError"}},
+ { ERR_METHOD_NOT_ALLOWED, {405, "MethodNotAllowed" }},
+ { ETIMEDOUT, {408, "RequestTimeout" }},
+ { EEXIST, {409, "BucketAlreadyExists" }},
+ { ERR_USER_EXIST, {409, "UserAlreadyExists" }},
+ { ERR_EMAIL_EXIST, {409, "EmailExists" }},
+ { ERR_KEY_EXIST, {409, "KeyExists"}},
+ { ERR_TAG_CONFLICT, {409, "OperationAborted"}},
+ { ERR_POSITION_NOT_EQUAL_TO_LENGTH, {409, "PositionNotEqualToLength"}},
+ { ERR_OBJECT_NOT_APPENDABLE, {409, "ObjectNotAppendable"}},
+ { ERR_INVALID_BUCKET_STATE, {409, "InvalidBucketState"}},
+ { ERR_INVALID_SECRET_KEY, {400, "InvalidSecretKey"}},
+ { ERR_INVALID_KEY_TYPE, {400, "InvalidKeyType"}},
+ { ERR_INVALID_CAP, {400, "InvalidCapability"}},
+ { ERR_INVALID_TENANT_NAME, {400, "InvalidTenantName" }},
+ { ENOTEMPTY, {409, "BucketNotEmpty" }},
+ { ERR_PRECONDITION_FAILED, {412, "PreconditionFailed" }},
+ { ERANGE, {416, "InvalidRange" }},
+ { ERR_UNPROCESSABLE_ENTITY, {422, "UnprocessableEntity" }},
+ { ERR_LOCKED, {423, "Locked" }},
+ { ERR_INTERNAL_ERROR, {500, "InternalError" }},
+ { ERR_NOT_IMPLEMENTED, {501, "NotImplemented" }},
+ { ERR_SERVICE_UNAVAILABLE, {503, "ServiceUnavailable"}},
+ { ERR_RATE_LIMITED, {503, "SlowDown"}},
+ { ERR_ZERO_IN_URL, {400, "InvalidRequest" }},
+});
+
+rgw_http_errors rgw_http_swift_errors({
+ { EACCES, {403, "AccessDenied" }},
+ { EPERM, {401, "AccessDenied" }},
+ { ENAMETOOLONG, {400, "Metadata name too long" }},
+ { ERR_USER_SUSPENDED, {401, "UserSuspended" }},
+ { ERR_INVALID_UTF8, {412, "Invalid UTF8" }},
+ { ERR_BAD_URL, {412, "Bad URL" }},
+ { ERR_NOT_SLO_MANIFEST, {400, "Not an SLO manifest" }},
+ { ERR_QUOTA_EXCEEDED, {413, "QuotaExceeded" }},
+ { ENOTEMPTY, {409, "There was a conflict when trying "
+ "to complete your request." }},
+ /* FIXME(rzarzynski): we need to find a way to apply Swift's error handling
+ * procedures also for ERR_ZERO_IN_URL. This make a problem as the validation
+ * is performed very early, even before setting the req_state::proto_flags. */
+ { ERR_ZERO_IN_URL, {412, "Invalid UTF8 or contains NULL"}},
+ { ERR_RATE_LIMITED, {498, "Rate Limited"}},
+});
+
+rgw_http_errors rgw_http_sts_errors({
+ { ERR_PACKED_POLICY_TOO_LARGE, {400, "PackedPolicyTooLarge" }},
+ { ERR_INVALID_IDENTITY_TOKEN, {400, "InvalidIdentityToken" }},
+});
+
+rgw_http_errors rgw_http_iam_errors({
+ { ERR_ROLE_EXISTS, {409, "EntityAlreadyExists"}},
+ { ERR_DELETE_CONFLICT, {409, "DeleteConflict"}},
+});
+
+using namespace ceph::crypto;
+
+rgw_err::
+rgw_err()
+{
+ clear();
+}
+
+void rgw_err::
+clear()
+{
+ http_ret = 200;
+ ret = 0;
+ err_code.clear();
+}
+
+bool rgw_err::
+is_clear() const
+{
+ return (http_ret == 200);
+}
+
+bool rgw_err::
+is_err() const
+{
+ return !(http_ret >= 200 && http_ret <= 399);
+}
+
+// The requestURI transferred from the frontend can be abs_path or absoluteURI
+// If it is absoluteURI, we should adjust it to abs_path for the following
+// S3 authorization and some other processes depending on the requestURI
+// The absoluteURI can start with "http://", "https://", "ws://" or "wss://"
+static string get_abs_path(const string& request_uri) {
+ const static string ABS_PREFIXS[] = {"http://", "https://", "ws://", "wss://"};
+ bool isAbs = false;
+ for (int i = 0; i < 4; ++i) {
+ if (boost::algorithm::starts_with(request_uri, ABS_PREFIXS[i])) {
+ isAbs = true;
+ break;
+ }
+ }
+ if (!isAbs) { // it is not a valid absolute uri
+ return request_uri;
+ }
+ size_t beg_pos = request_uri.find("://") + 3;
+ size_t len = request_uri.size();
+ beg_pos = request_uri.find('/', beg_pos);
+ if (beg_pos == string::npos) return request_uri;
+ return request_uri.substr(beg_pos, len - beg_pos);
+}
+
+req_info::req_info(CephContext *cct, const class RGWEnv *env) : env(env) {
+ method = env->get("REQUEST_METHOD", "");
+ script_uri = env->get("SCRIPT_URI", cct->_conf->rgw_script_uri.c_str());
+ request_uri = env->get("REQUEST_URI", cct->_conf->rgw_request_uri.c_str());
+ if (request_uri[0] != '/') {
+ request_uri = get_abs_path(request_uri);
+ }
+ auto pos = request_uri.find('?');
+ if (pos != string::npos) {
+ request_params = request_uri.substr(pos + 1);
+ request_uri = request_uri.substr(0, pos);
+ } else {
+ request_params = env->get("QUERY_STRING", "");
+ }
+ host = env->get("HTTP_HOST", "");
+
+ // strip off any trailing :port from host (added by CrossFTP and maybe others)
+ size_t colon_offset = host.find_last_of(':');
+ if (colon_offset != string::npos) {
+ bool all_digits = true;
+ for (unsigned i = colon_offset + 1; i < host.size(); ++i) {
+ if (!isdigit(host[i])) {
+ all_digits = false;
+ break;
+ }
+ }
+ if (all_digits) {
+ host.resize(colon_offset);
+ }
+ }
+}
+
+void req_info::rebuild_from(req_info& src)
+{
+ method = src.method;
+ script_uri = src.script_uri;
+ args = src.args;
+ if (src.effective_uri.empty()) {
+ request_uri = src.request_uri;
+ } else {
+ request_uri = src.effective_uri;
+ }
+ effective_uri.clear();
+ host = src.host;
+
+ x_meta_map = src.x_meta_map;
+ x_meta_map.erase("x-amz-date");
+}
+
+
+req_state::req_state(CephContext* _cct, RGWEnv* e, RGWUserInfo* u, uint64_t id)
+ : cct(_cct), user(u),
+ info(_cct, e), id(id)
+{
+ enable_ops_log = e->get_enable_ops_log();
+ enable_usage_log = e->get_enable_usage_log();
+ defer_to_bucket_acls = e->get_defer_to_bucket_acls();
+
+ time = Clock::now();
+}
+
+req_state::~req_state() {
+ delete formatter;
+}
+
+std::ostream& req_state::gen_prefix(std::ostream& out) const
+{
+ auto p = out.precision();
+ return out << "req " << id << ' '
+ << std::setprecision(3) << std::fixed << time_elapsed() // '0.123s'
+ << std::setprecision(p) << std::defaultfloat << ' ';
+}
+
+bool search_err(rgw_http_errors& errs, int err_no, int& http_ret, string& code)
+{
+ auto r = errs.find(err_no);
+ if (r != errs.end()) {
+ http_ret = r->second.first;
+ code = r->second.second;
+ return true;
+ }
+ return false;
+}
+
+void set_req_state_err(struct rgw_err& err, /* out */
+ int err_no, /* in */
+ const int prot_flags) /* in */
+{
+ if (err_no < 0)
+ err_no = -err_no;
+
+ err.ret = -err_no;
+
+ if (prot_flags & RGW_REST_SWIFT) {
+ if (search_err(rgw_http_swift_errors, err_no, err.http_ret, err.err_code))
+ return;
+ }
+
+ if (prot_flags & RGW_REST_STS) {
+ if (search_err(rgw_http_sts_errors, err_no, err.http_ret, err.err_code))
+ return;
+ }
+
+ if (prot_flags & RGW_REST_IAM) {
+ if (search_err(rgw_http_iam_errors, err_no, err.http_ret, err.err_code))
+ return;
+ }
+
+ //Default to searching in s3 errors
+ if (search_err(rgw_http_s3_errors, err_no, err.http_ret, err.err_code))
+ return;
+ dout(0) << "WARNING: set_req_state_err err_no=" << err_no
+ << " resorting to 500" << dendl;
+
+ err.http_ret = 500;
+ err.err_code = "UnknownError";
+}
+
+void set_req_state_err(struct req_state* s, int err_no, const string& err_msg)
+{
+ if (s) {
+ set_req_state_err(s, err_no);
+ if (s->prot_flags & RGW_REST_SWIFT && !err_msg.empty()) {
+ /* TODO(rzarzynski): there never ever should be a check like this one.
+ * It's here only for the sake of the patch's backportability. Further
+ * commits will move the logic to a per-RGWHandler replacement of
+ * the end_header() function. Alternativaly, we might consider making
+ * that just for the dump(). Please take a look on @cbodley's comments
+ * in PR #10690 (https://github.com/ceph/ceph/pull/10690). */
+ s->err.err_code = err_msg;
+ } else {
+ s->err.message = err_msg;
+ }
+ }
+}
+
+void set_req_state_err(struct req_state* s, int err_no)
+{
+ if (s) {
+ set_req_state_err(s->err, err_no, s->prot_flags);
+ }
+}
+
+void dump(struct req_state* s)
+{
+ if (s->format != RGW_FORMAT_HTML)
+ s->formatter->open_object_section("Error");
+ if (!s->err.err_code.empty())
+ s->formatter->dump_string("Code", s->err.err_code);
+ if (!s->err.message.empty())
+ s->formatter->dump_string("Message", s->err.message);
+ if (!s->bucket_name.empty()) // TODO: connect to expose_bucket
+ s->formatter->dump_string("BucketName", s->bucket_name);
+ if (!s->trans_id.empty()) // TODO: connect to expose_bucket or another toggle
+ s->formatter->dump_string("RequestId", s->trans_id);
+ s->formatter->dump_string("HostId", s->host_id);
+ if (s->format != RGW_FORMAT_HTML)
+ s->formatter->close_section();
+}
+
+struct str_len {
+ const char *str;
+ int len;
+};
+
+#define STR_LEN_ENTRY(s) { s, sizeof(s) - 1 }
+
+struct str_len meta_prefixes[] = { STR_LEN_ENTRY("HTTP_X_AMZ"),
+ STR_LEN_ENTRY("HTTP_X_GOOG"),
+ STR_LEN_ENTRY("HTTP_X_DHO"),
+ STR_LEN_ENTRY("HTTP_X_RGW"),
+ STR_LEN_ENTRY("HTTP_X_OBJECT"),
+ STR_LEN_ENTRY("HTTP_X_CONTAINER"),
+ STR_LEN_ENTRY("HTTP_X_ACCOUNT"),
+ {NULL, 0} };
+
+void req_info::init_meta_info(bool *found_bad_meta)
+{
+ x_meta_map.clear();
+
+ for (const auto& kv: env->get_map()) {
+ const char *prefix;
+ const string& header_name = kv.first;
+ const string& val = kv.second;
+ for (int prefix_num = 0; (prefix = meta_prefixes[prefix_num].str) != NULL; prefix_num++) {
+ int len = meta_prefixes[prefix_num].len;
+ const char *p = header_name.c_str();
+ if (strncmp(p, prefix, len) == 0) {
+ dout(10) << "meta>> " << p << dendl;
+ const char *name = p+len; /* skip the prefix */
+ int name_len = header_name.size() - len;
+
+ if (found_bad_meta && strncmp(name, "_META_", name_len) == 0)
+ *found_bad_meta = true;
+
+ char name_low[meta_prefixes[0].len + name_len + 1];
+ snprintf(name_low, meta_prefixes[0].len - 5 + name_len + 1, "%s%s", meta_prefixes[0].str + 5 /* skip HTTP_ */, name); // normalize meta prefix
+ int j;
+ for (j = 0; name_low[j]; j++) {
+ if (name_low[j] != '_')
+ name_low[j] = tolower(name_low[j]);
+ else
+ name_low[j] = '-';
+ }
+ name_low[j] = 0;
+
+ auto it = x_meta_map.find(name_low);
+ if (it != x_meta_map.end()) {
+ string old = it->second;
+ boost::algorithm::trim_right(old);
+ old.append(",");
+ old.append(val);
+ x_meta_map[name_low] = old;
+ } else {
+ x_meta_map[name_low] = val;
+ }
+ }
+ }
+ }
+ for (const auto& kv: x_meta_map) {
+ dout(10) << "x>> " << kv.first << ":" << rgw::crypt_sanitize::x_meta_map{kv.first, kv.second} << dendl;
+ }
+}
+
+std::ostream& operator<<(std::ostream& oss, const rgw_err &err)
+{
+ oss << "rgw_err(http_ret=" << err.http_ret << ", err_code='" << err.err_code << "') ";
+ return oss;
+}
+
+string rgw_string_unquote(const string& s)
+{
+ if (s[0] != '"' || s.size() < 2)
+ return s;
+
+ int len;
+ for (len = s.size(); len > 2; --len) {
+ if (s[len - 1] != ' ')
+ break;
+ }
+
+ if (s[len-1] != '"')
+ return s;
+
+ return s.substr(1, len - 2);
+}
+
+static bool check_str_end(const char *s)
+{
+ if (!s)
+ return false;
+
+ while (*s) {
+ if (!isspace(*s))
+ return false;
+ s++;
+ }
+ return true;
+}
+
+static bool check_gmt_end(const char *s)
+{
+ if (!s || !*s)
+ return false;
+
+ while (isspace(*s)) {
+ ++s;
+ }
+
+ /* check for correct timezone */
+ if ((strncmp(s, "GMT", 3) != 0) &&
+ (strncmp(s, "UTC", 3) != 0)) {
+ return false;
+ }
+
+ return true;
+}
+
+static bool parse_rfc850(const char *s, struct tm *t)
+{
+ // FIPS zeroization audit 20191115: this memset is not security related.
+ memset(t, 0, sizeof(*t));
+ return check_gmt_end(strptime(s, "%A, %d-%b-%y %H:%M:%S ", t));
+}
+
+static bool parse_asctime(const char *s, struct tm *t)
+{
+ // FIPS zeroization audit 20191115: this memset is not security related.
+ memset(t, 0, sizeof(*t));
+ return check_str_end(strptime(s, "%a %b %d %H:%M:%S %Y", t));
+}
+
+static bool parse_rfc1123(const char *s, struct tm *t)
+{
+ // FIPS zeroization audit 20191115: this memset is not security related.
+ memset(t, 0, sizeof(*t));
+ return check_gmt_end(strptime(s, "%a, %d %b %Y %H:%M:%S ", t));
+}
+
+static bool parse_rfc1123_alt(const char *s, struct tm *t)
+{
+ // FIPS zeroization audit 20191115: this memset is not security related.
+ memset(t, 0, sizeof(*t));
+ return check_str_end(strptime(s, "%a, %d %b %Y %H:%M:%S %z", t));
+}
+
+bool parse_rfc2616(const char *s, struct tm *t)
+{
+ return parse_rfc850(s, t) || parse_asctime(s, t) || parse_rfc1123(s, t) || parse_rfc1123_alt(s,t);
+}
+
+bool parse_iso8601(const char *s, struct tm *t, uint32_t *pns, bool extended_format)
+{
+ // FIPS zeroization audit 20191115: this memset is not security related.
+ memset(t, 0, sizeof(*t));
+ const char *p;
+
+ if (!s)
+ s = "";
+
+ if (extended_format) {
+ p = strptime(s, "%Y-%m-%dT%T", t);
+ if (!p) {
+ p = strptime(s, "%Y-%m-%d %T", t);
+ }
+ } else {
+ p = strptime(s, "%Y%m%dT%H%M%S", t);
+ }
+ if (!p) {
+ dout(0) << "parse_iso8601 failed" << dendl;
+ return false;
+ }
+ const boost::string_view str = rgw_trim_whitespace(boost::string_view(p));
+ int len = str.size();
+
+ if (len == 0 || (len == 1 && str[0] == 'Z'))
+ return true;
+
+ if (str[0] != '.' ||
+ str[len - 1] != 'Z')
+ return false;
+
+ uint32_t ms;
+ boost::string_view nsstr = str.substr(1, len - 2);
+ int r = stringtoul(nsstr.to_string(), &ms);
+ if (r < 0)
+ return false;
+
+ if (!pns) {
+ return true;
+ }
+
+ if (nsstr.size() > 9) {
+ nsstr = nsstr.substr(0, 9);
+ }
+
+ uint64_t mul_table[] = { 0,
+ 100000000LL,
+ 10000000LL,
+ 1000000LL,
+ 100000LL,
+ 10000LL,
+ 1000LL,
+ 100LL,
+ 10LL,
+ 1 };
+
+
+ *pns = ms * mul_table[nsstr.size()];
+
+ return true;
+}
+
+int parse_key_value(string& in_str, const char *delim, string& key, string& val)
+{
+ if (delim == NULL)
+ return -EINVAL;
+
+ auto pos = in_str.find(delim);
+ if (pos == string::npos)
+ return -EINVAL;
+
+ key = rgw_trim_whitespace(in_str.substr(0, pos));
+ val = rgw_trim_whitespace(in_str.substr(pos + 1));
+
+ return 0;
+}
+
+int parse_key_value(string& in_str, string& key, string& val)
+{
+ return parse_key_value(in_str, "=", key,val);
+}
+
+boost::optional<std::pair<boost::string_view, boost::string_view>>
+parse_key_value(const boost::string_view& in_str,
+ const boost::string_view& delim)
+{
+ const size_t pos = in_str.find(delim);
+ if (pos == boost::string_view::npos) {
+ return boost::none;
+ }
+
+ const auto key = rgw_trim_whitespace(in_str.substr(0, pos));
+ const auto val = rgw_trim_whitespace(in_str.substr(pos + 1));
+
+ return std::make_pair(key, val);
+}
+
+boost::optional<std::pair<boost::string_view, boost::string_view>>
+parse_key_value(const boost::string_view& in_str)
+{
+ return parse_key_value(in_str, "=");
+}
+
+int parse_time(const char *time_str, real_time *time)
+{
+ struct tm tm;
+ uint32_t ns = 0;
+
+ if (!parse_rfc2616(time_str, &tm) && !parse_iso8601(time_str, &tm, &ns)) {
+ return -EINVAL;
+ }
+
+ time_t sec = internal_timegm(&tm);
+ *time = utime_t(sec, ns).to_real_time();
+
+ return 0;
+}
+
+#define TIME_BUF_SIZE 128
+
+void rgw_to_iso8601(const real_time& t, char *dest, int buf_size)
+{
+ utime_t ut(t);
+
+ char buf[TIME_BUF_SIZE];
+ struct tm result;
+ time_t epoch = ut.sec();
+ struct tm *tmp = gmtime_r(&epoch, &result);
+ if (tmp == NULL)
+ return;
+
+ if (strftime(buf, sizeof(buf), "%Y-%m-%dT%T", tmp) == 0)
+ return;
+
+ snprintf(dest, buf_size, "%s.%03dZ", buf, (int)(ut.usec() / 1000));
+}
+
+void rgw_to_iso8601(const real_time& t, string *dest)
+{
+ char buf[TIME_BUF_SIZE];
+ rgw_to_iso8601(t, buf, sizeof(buf));
+ *dest = buf;
+}
+
+
+string rgw_to_asctime(const utime_t& t)
+{
+ stringstream s;
+ t.asctime(s);
+ return s.str();
+}
+
+/*
+ * calculate the sha1 value of a given msg and key
+ */
+void calc_hmac_sha1(const char *key, int key_len,
+ const char *msg, int msg_len, char *dest)
+/* destination should be CEPH_CRYPTO_HMACSHA1_DIGESTSIZE bytes long */
+{
+ HMACSHA1 hmac((const unsigned char *)key, key_len);
+ hmac.Update((const unsigned char *)msg, msg_len);
+ hmac.Final((unsigned char *)dest);
+}
+
+/*
+ * calculate the sha256 value of a given msg and key
+ */
+void calc_hmac_sha256(const char *key, int key_len,
+ const char *msg, int msg_len, char *dest)
+{
+ char hash_sha256[CEPH_CRYPTO_HMACSHA256_DIGESTSIZE];
+
+ HMACSHA256 hmac((const unsigned char *)key, key_len);
+ hmac.Update((const unsigned char *)msg, msg_len);
+ hmac.Final((unsigned char *)hash_sha256);
+
+ memcpy(dest, hash_sha256, CEPH_CRYPTO_HMACSHA256_DIGESTSIZE);
+}
+
+using ceph::crypto::SHA256;
+
+/*
+ * calculate the sha256 hash value of a given msg
+ */
+sha256_digest_t calc_hash_sha256(const boost::string_view& msg)
+{
+ sha256_digest_t hash;
+
+ SHA256 hasher;
+ hasher.Update(reinterpret_cast<const unsigned char*>(msg.data()), msg.size());
+ hasher.Final(hash.v);
+
+ return hash;
+}
+
+SHA256* calc_hash_sha256_open_stream()
+{
+ return new SHA256;
+}
+
+void calc_hash_sha256_update_stream(SHA256 *hash, const char *msg, int len)
+{
+ hash->Update((const unsigned char *)msg, len);
+}
+
+string calc_hash_sha256_close_stream(SHA256 **phash)
+{
+ SHA256 *hash = *phash;
+ if (!hash) {
+ hash = calc_hash_sha256_open_stream();
+ }
+ char hash_sha256[CEPH_CRYPTO_HMACSHA256_DIGESTSIZE];
+
+ hash->Final((unsigned char *)hash_sha256);
+
+ char hex_str[(CEPH_CRYPTO_SHA256_DIGESTSIZE * 2) + 1];
+ buf_to_hex((unsigned char *)hash_sha256, CEPH_CRYPTO_SHA256_DIGESTSIZE, hex_str);
+
+ delete hash;
+ *phash = NULL;
+
+ return std::string(hex_str);
+}
+
+std::string calc_hash_sha256_restart_stream(SHA256 **phash)
+{
+ const auto hash = calc_hash_sha256_close_stream(phash);
+ *phash = calc_hash_sha256_open_stream();
+
+ return hash;
+}
+
+int NameVal::parse()
+{
+ auto delim_pos = str.find('=');
+ int ret = 0;
+
+ if (delim_pos == string::npos) {
+ name = str;
+ val = "";
+ ret = 1;
+ } else {
+ name = str.substr(0, delim_pos);
+ val = str.substr(delim_pos + 1);
+ }
+
+ return ret;
+}
+
+int RGWHTTPArgs::parse()
+{
+ int pos = 0;
+ bool end = false;
+
+ if (str.empty())
+ return 0;
+
+ if (str[pos] == '?')
+ pos++;
+
+ while (!end) {
+ int fpos = str.find('&', pos);
+ if (fpos < pos) {
+ end = true;
+ fpos = str.size();
+ }
+ std::string nameval = url_decode(str.substr(pos, fpos - pos), true);
+ NameVal nv(std::move(nameval));
+ int ret = nv.parse();
+ if (ret >= 0) {
+ string& name = nv.get_name();
+ string& val = nv.get_val();
+
+ append(name, val);
+ }
+
+ pos = fpos + 1;
+ }
+
+ return 0;
+}
+
+void RGWHTTPArgs::append(const string& name, const string& val)
+{
+ if (name.compare(0, sizeof(RGW_SYS_PARAM_PREFIX) - 1, RGW_SYS_PARAM_PREFIX) == 0) {
+ sys_val_map[name] = val;
+ } else {
+ val_map[name] = val;
+ }
+
+ if ((name.compare("acl") == 0) ||
+ (name.compare("cors") == 0) ||
+ (name.compare("notification") == 0) ||
+ (name.compare("location") == 0) ||
+ (name.compare("logging") == 0) ||
+ (name.compare("usage") == 0) ||
+ (name.compare("lifecycle") == 0) ||
+ (name.compare("delete") == 0) ||
+ (name.compare("uploads") == 0) ||
+ (name.compare("partNumber") == 0) ||
+ (name.compare("uploadId") == 0) ||
+ (name.compare("versionId") == 0) ||
+ (name.compare("start-date") == 0) ||
+ (name.compare("end-date") == 0) ||
+ (name.compare("versions") == 0) ||
+ (name.compare("versioning") == 0) ||
+ (name.compare("website") == 0) ||
+ (name.compare("requestPayment") == 0) ||
+ (name.compare("torrent") == 0) ||
+ (name.compare("tagging") == 0) ||
+ (name.compare("append") == 0) ||
+ (name.compare("position") == 0)) {
+ sub_resources[name] = val;
+ } else if (name[0] == 'r') { // root of all evil
+ if ((name.compare("response-content-type") == 0) ||
+ (name.compare("response-content-language") == 0) ||
+ (name.compare("response-expires") == 0) ||
+ (name.compare("response-cache-control") == 0) ||
+ (name.compare("response-content-disposition") == 0) ||
+ (name.compare("response-content-encoding") == 0)) {
+ sub_resources[name] = val;
+ has_resp_modifier = true;
+ }
+ } else if ((name.compare("subuser") == 0) ||
+ (name.compare("key") == 0) ||
+ (name.compare("caps") == 0) ||
+ (name.compare("index") == 0) ||
+ (name.compare("policy") == 0) ||
+ (name.compare("quota") == 0) ||
+ (name.compare("list") == 0) ||
+ (name.compare("object") == 0)) {
+
+ if (!admin_subresource_added) {
+ sub_resources[name] = "";
+ admin_subresource_added = true;
+ }
+ }
+}
+
+const string& RGWHTTPArgs::get(const string& name, bool *exists) const
+{
+ auto iter = val_map.find(name);
+ bool e = (iter != std::end(val_map));
+ if (exists)
+ *exists = e;
+ if (e)
+ return iter->second;
+ return empty_str;
+}
+
+boost::optional<const std::string&>
+RGWHTTPArgs::get_optional(const std::string& name) const
+{
+ bool exists;
+ const std::string& value = get(name, &exists);
+ if (exists) {
+ return value;
+ } else {
+ return boost::none;
+ }
+}
+
+int RGWHTTPArgs::get_bool(const string& name, bool *val, bool *exists)
+{
+ map<string, string>::iterator iter;
+ iter = val_map.find(name);
+ bool e = (iter != val_map.end());
+ if (exists)
+ *exists = e;
+
+ if (e) {
+ const char *s = iter->second.c_str();
+
+ if (strcasecmp(s, "false") == 0) {
+ *val = false;
+ } else if (strcasecmp(s, "true") == 0) {
+ *val = true;
+ } else {
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+int RGWHTTPArgs::get_bool(const char *name, bool *val, bool *exists)
+{
+ string s(name);
+ return get_bool(s, val, exists);
+}
+
+void RGWHTTPArgs::get_bool(const char *name, bool *val, bool def_val)
+{
+ bool exists = false;
+ if ((get_bool(name, val, &exists) < 0) ||
+ !exists) {
+ *val = def_val;
+ }
+}
+
+int RGWHTTPArgs::get_int(const char *name, int *val, int def_val)
+{
+ bool exists = false;
+ string val_str;
+ val_str = get(name, &exists);
+ if (!exists) {
+ *val = def_val;
+ return 0;
+ }
+
+ string err;
+
+ *val = (int)strict_strtol(val_str.c_str(), 10, &err);
+ if (!err.empty()) {
+ *val = def_val;
+ return -EINVAL;
+ }
+ return 0;
+}
+
+string RGWHTTPArgs::sys_get(const string& name, bool * const exists) const
+{
+ const auto iter = sys_val_map.find(name);
+ const bool e = (iter != sys_val_map.end());
+
+ if (exists) {
+ *exists = e;
+ }
+
+ return e ? iter->second : string();
+}
+
+bool rgw_transport_is_secure(CephContext *cct, const RGWEnv& env)
+{
+ const auto& m = env.get_map();
+ // frontend connected with ssl
+ if (m.count("SERVER_PORT_SECURE")) {
+ return true;
+ }
+ // ignore proxy headers unless explicitly enabled
+ if (!cct->_conf->rgw_trust_forwarded_https) {
+ return false;
+ }
+ // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Forwarded
+ // Forwarded: by=<identifier>; for=<identifier>; host=<host>; proto=<http|https>
+ auto i = m.find("HTTP_FORWARDED");
+ if (i != m.end() && i->second.find("proto=https") != std::string::npos) {
+ return true;
+ }
+ // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-Proto
+ i = m.find("HTTP_X_FORWARDED_PROTO");
+ if (i != m.end() && i->second == "https") {
+ return true;
+ }
+ return false;
+}
+
+namespace {
+Effect eval_or_pass(const boost::optional<Policy>& policy,
+ const rgw::IAM::Environment& env,
+ boost::optional<const rgw::auth::Identity&> id,
+ const uint64_t op,
+ const ARN& arn) {
+ if (!policy)
+ return Effect::Pass;
+ else
+ return policy->eval(env, id, op, arn);
+}
+
+}
+
+Effect eval_user_policies(const vector<Policy>& user_policies,
+ const rgw::IAM::Environment& env,
+ boost::optional<const rgw::auth::Identity&> id,
+ const uint64_t op,
+ const ARN& arn) {
+ auto usr_policy_res = Effect::Pass, prev_res = Effect::Pass;
+ for (auto& user_policy : user_policies) {
+ if (usr_policy_res = eval_or_pass(user_policy, env, id, op, arn); usr_policy_res == Effect::Deny)
+ return usr_policy_res;
+ else if (usr_policy_res == Effect::Allow)
+ prev_res = Effect::Allow;
+ else if (usr_policy_res == Effect::Pass && prev_res == Effect::Allow)
+ usr_policy_res = Effect::Allow;
+ }
+ return usr_policy_res;
+}
+
+bool verify_user_permission(const DoutPrefixProvider* dpp,
+ struct req_state * const s,
+ RGWAccessControlPolicy * const user_acl,
+ const vector<rgw::IAM::Policy>& user_policies,
+ const rgw::ARN& res,
+ const uint64_t op)
+{
+ auto usr_policy_res = eval_user_policies(user_policies, s->env, boost::none, op, res);
+ if (usr_policy_res == Effect::Deny) {
+ return false;
+ }
+
+ if (usr_policy_res == Effect::Allow) {
+ return true;
+ }
+
+ if (op == rgw::IAM::s3CreateBucket || op == rgw::IAM::s3ListAllMyBuckets) {
+ auto perm = op_to_perm(op);
+
+ return verify_user_permission_no_policy(dpp, s, user_acl, perm);
+ }
+
+ return false;
+}
+
+bool verify_user_permission_no_policy(const DoutPrefixProvider* dpp, struct req_state * const s,
+ RGWAccessControlPolicy * const user_acl,
+ const int perm)
+{
+ if (s->auth.identity->get_identity_type() == TYPE_ROLE)
+ return false;
+
+ /* S3 doesn't support account ACLs. */
+ if (!user_acl)
+ return true;
+
+ if ((perm & (int)s->perm_mask) != perm)
+ return false;
+
+ return user_acl->verify_permission(dpp, *s->auth.identity, perm, perm);
+}
+
+bool verify_user_permission(const DoutPrefixProvider* dpp,
+ struct req_state * const s,
+ const rgw::ARN& res,
+ const uint64_t op)
+{
+ return verify_user_permission(dpp, s, s->user_acl.get(), s->iam_user_policies, res, op);
+}
+
+bool verify_user_permission_no_policy(const DoutPrefixProvider* dpp,
+ struct req_state * const s,
+ const int perm)
+{
+ return verify_user_permission_no_policy(dpp, s, s->user_acl.get(), perm);
+}
+
+bool verify_requester_payer_permission(struct req_state *s)
+{
+ if (!s->bucket_info.requester_pays)
+ return true;
+
+ if (s->auth.identity->is_owner_of(s->bucket_info.owner))
+ return true;
+
+ if (s->auth.identity->is_anonymous()) {
+ return false;
+ }
+
+ const char *request_payer = s->info.env->get("HTTP_X_AMZ_REQUEST_PAYER");
+ if (!request_payer) {
+ bool exists;
+ request_payer = s->info.args.get("x-amz-request-payer", &exists).c_str();
+ if (!exists) {
+ return false;
+ }
+ }
+
+ if (strcasecmp(request_payer, "requester") == 0) {
+ return true;
+ }
+
+ return false;
+}
+
+bool verify_bucket_permission(const DoutPrefixProvider* dpp,
+ struct req_state * const s,
+ const rgw_bucket& bucket,
+ RGWAccessControlPolicy * const user_acl,
+ RGWAccessControlPolicy * const bucket_acl,
+ const boost::optional<Policy>& bucket_policy,
+ const vector<Policy>& user_policies,
+ const uint64_t op)
+{
+ if (!verify_requester_payer_permission(s))
+ return false;
+
+ auto usr_policy_res = eval_user_policies(user_policies, s->env, boost::none, op, ARN(bucket));
+ if (usr_policy_res == Effect::Deny)
+ return false;
+
+ auto r = eval_or_pass(bucket_policy, s->env, *s->auth.identity,
+ op, ARN(bucket));
+ if (r == Effect::Allow)
+ // It looks like S3 ACLs only GRANT permissions rather than
+ // denying them, so this should be safe.
+ return true;
+ else if (r == Effect::Deny)
+ return false;
+ else if (usr_policy_res == Effect::Allow) // r is Effect::Pass at this point
+ return true;
+
+ const auto perm = op_to_perm(op);
+
+ return verify_bucket_permission_no_policy(dpp, s, user_acl, bucket_acl, perm);
+}
+
+bool verify_bucket_permission_no_policy(const DoutPrefixProvider* dpp, struct req_state * const s,
+ RGWAccessControlPolicy * const user_acl,
+ RGWAccessControlPolicy * const bucket_acl,
+ const int perm)
+{
+ if (!bucket_acl)
+ return false;
+
+ if ((perm & (int)s->perm_mask) != perm)
+ return false;
+
+ if (bucket_acl->verify_permission(dpp, *s->auth.identity, perm, perm,
+ s->info.env->get("HTTP_REFERER")))
+ return true;
+
+ if (!user_acl)
+ return false;
+
+ return user_acl->verify_permission(dpp, *s->auth.identity, perm, perm);
+}
+
+bool verify_bucket_permission_no_policy(const DoutPrefixProvider* dpp, struct req_state * const s, const int perm)
+{
+ if (!verify_requester_payer_permission(s))
+ return false;
+
+ return verify_bucket_permission_no_policy(dpp,
+ s,
+ s->user_acl.get(),
+ s->bucket_acl.get(),
+ perm);
+}
+
+bool verify_bucket_permission(const DoutPrefixProvider* dpp, struct req_state * const s, const uint64_t op)
+{
+ return verify_bucket_permission(dpp,
+ s,
+ s->bucket,
+ s->user_acl.get(),
+ s->bucket_acl.get(),
+ s->iam_policy,
+ s->iam_user_policies,
+ op);
+}
+
+// Authorize anyone permitted by the policy and the bucket owner
+// unless explicitly denied by the policy.
+
+int verify_bucket_owner_or_policy(struct req_state* const s,
+ const uint64_t op)
+{
+ auto e = eval_or_pass(s->iam_policy,
+ s->env, *s->auth.identity,
+ op, ARN(s->bucket));
+ if (e == Effect::Allow ||
+ (e == Effect::Pass &&
+ s->auth.identity->is_owner_of(s->bucket_owner.get_id()))) {
+ return 0;
+ } else {
+ return -EACCES;
+ }
+}
+
+
+static inline bool check_deferred_bucket_perms(const DoutPrefixProvider* dpp,
+ struct req_state * const s,
+ const rgw_bucket& bucket,
+ RGWAccessControlPolicy * const user_acl,
+ RGWAccessControlPolicy * const bucket_acl,
+ const boost::optional<Policy>& bucket_policy,
+ const vector<Policy>& user_policies,
+ const uint8_t deferred_check,
+ const uint64_t op)
+{
+ return (s->defer_to_bucket_acls == deferred_check \
+ && verify_bucket_permission(dpp, s, bucket, user_acl, bucket_acl, bucket_policy, user_policies,op));
+}
+
+static inline bool check_deferred_bucket_only_acl(const DoutPrefixProvider* dpp,
+ struct req_state * const s,
+ RGWAccessControlPolicy * const user_acl,
+ RGWAccessControlPolicy * const bucket_acl,
+ const uint8_t deferred_check,
+ const int perm)
+{
+ return (s->defer_to_bucket_acls == deferred_check \
+ && verify_bucket_permission_no_policy(dpp, s, user_acl, bucket_acl, perm));
+}
+
+bool verify_object_permission(const DoutPrefixProvider* dpp, struct req_state * const s,
+ const rgw_obj& obj,
+ RGWAccessControlPolicy * const user_acl,
+ RGWAccessControlPolicy * const bucket_acl,
+ RGWAccessControlPolicy * const object_acl,
+ const boost::optional<Policy>& bucket_policy,
+ const vector<Policy>& user_policies,
+ const uint64_t op)
+{
+ if (!verify_requester_payer_permission(s))
+ return false;
+
+ auto usr_policy_res = eval_user_policies(user_policies, s->env, boost::none, op, ARN(obj));
+ if (usr_policy_res == Effect::Deny)
+ return false;
+
+ auto r = eval_or_pass(bucket_policy, s->env, *s->auth.identity, op, ARN(obj));
+ if (r == Effect::Allow)
+ // It looks like S3 ACLs only GRANT permissions rather than
+ // denying them, so this should be safe.
+ return true;
+ else if (r == Effect::Deny)
+ return false;
+ else if (usr_policy_res == Effect::Allow)
+ return true;
+
+ const auto perm = op_to_perm(op);
+
+ if (check_deferred_bucket_perms(dpp, s, obj.bucket, user_acl, bucket_acl, bucket_policy,
+ user_policies, RGW_DEFER_TO_BUCKET_ACLS_RECURSE, op) ||
+ check_deferred_bucket_perms(dpp, s, obj.bucket, user_acl, bucket_acl, bucket_policy,
+ user_policies, RGW_DEFER_TO_BUCKET_ACLS_FULL_CONTROL, rgw::IAM::s3All)) {
+ return true;
+ }
+
+ if (!object_acl) {
+ return false;
+ }
+
+ bool ret = object_acl->verify_permission(dpp, *s->auth.identity, s->perm_mask, perm);
+ if (ret) {
+ return true;
+ }
+
+ if (!s->cct->_conf->rgw_enforce_swift_acls)
+ return ret;
+
+ if ((perm & (int)s->perm_mask) != perm)
+ return false;
+
+ int swift_perm = 0;
+ if (perm & (RGW_PERM_READ | RGW_PERM_READ_ACP))
+ swift_perm |= RGW_PERM_READ_OBJS;
+ if (perm & RGW_PERM_WRITE)
+ swift_perm |= RGW_PERM_WRITE_OBJS;
+
+ if (!swift_perm)
+ return false;
+
+ /* we already verified the user mask above, so we pass swift_perm as the mask here,
+ otherwise the mask might not cover the swift permissions bits */
+ if (bucket_acl->verify_permission(dpp, *s->auth.identity, swift_perm, swift_perm,
+ s->info.env->get("HTTP_REFERER")))
+ return true;
+
+ if (!user_acl)
+ return false;
+
+ return user_acl->verify_permission(dpp, *s->auth.identity, swift_perm, swift_perm);
+}
+
+bool verify_object_permission_no_policy(const DoutPrefixProvider* dpp,
+ struct req_state * const s,
+ RGWAccessControlPolicy * const user_acl,
+ RGWAccessControlPolicy * const bucket_acl,
+ RGWAccessControlPolicy * const object_acl,
+ const int perm)
+{
+ if (check_deferred_bucket_only_acl(dpp, s, user_acl, bucket_acl, RGW_DEFER_TO_BUCKET_ACLS_RECURSE, perm) ||
+ check_deferred_bucket_only_acl(dpp, s, user_acl, bucket_acl, RGW_DEFER_TO_BUCKET_ACLS_FULL_CONTROL, RGW_PERM_FULL_CONTROL)) {
+ return true;
+ }
+
+ if (!object_acl) {
+ return false;
+ }
+
+ bool ret = object_acl->verify_permission(dpp, *s->auth.identity, s->perm_mask, perm);
+ if (ret) {
+ return true;
+ }
+
+ if (!s->cct->_conf->rgw_enforce_swift_acls)
+ return ret;
+
+ if ((perm & (int)s->perm_mask) != perm)
+ return false;
+
+ int swift_perm = 0;
+ if (perm & (RGW_PERM_READ | RGW_PERM_READ_ACP))
+ swift_perm |= RGW_PERM_READ_OBJS;
+ if (perm & RGW_PERM_WRITE)
+ swift_perm |= RGW_PERM_WRITE_OBJS;
+
+ if (!swift_perm)
+ return false;
+
+ /* we already verified the user mask above, so we pass swift_perm as the mask here,
+ otherwise the mask might not cover the swift permissions bits */
+ if (bucket_acl->verify_permission(dpp, *s->auth.identity, swift_perm, swift_perm,
+ s->info.env->get("HTTP_REFERER")))
+ return true;
+
+ if (!user_acl)
+ return false;
+
+ return user_acl->verify_permission(dpp, *s->auth.identity, swift_perm, swift_perm);
+}
+
+bool verify_object_permission_no_policy(const DoutPrefixProvider* dpp, struct req_state *s, int perm)
+{
+ if (!verify_requester_payer_permission(s))
+ return false;
+
+ return verify_object_permission_no_policy(dpp,
+ s,
+ s->user_acl.get(),
+ s->bucket_acl.get(),
+ s->object_acl.get(),
+ perm);
+}
+
+bool verify_object_permission(const DoutPrefixProvider* dpp, struct req_state *s, uint64_t op)
+{
+ return verify_object_permission(dpp,
+ s,
+ rgw_obj(s->bucket, s->object),
+ s->user_acl.get(),
+ s->bucket_acl.get(),
+ s->object_acl.get(),
+ s->iam_policy,
+ s->iam_user_policies,
+ op);
+}
+
+class HexTable
+{
+ char table[256];
+
+public:
+ HexTable() {
+ // FIPS zeroization audit 20191115: this memset is not security related.
+ memset(table, -1, sizeof(table));
+ int i;
+ for (i = '0'; i<='9'; i++)
+ table[i] = i - '0';
+ for (i = 'A'; i<='F'; i++)
+ table[i] = i - 'A' + 0xa;
+ for (i = 'a'; i<='f'; i++)
+ table[i] = i - 'a' + 0xa;
+ }
+
+ char to_num(char c) {
+ return table[(int)c];
+ }
+};
+
+static char hex_to_num(char c)
+{
+ static HexTable hex_table;
+ return hex_table.to_num(c);
+}
+
+std::string url_decode(const boost::string_view& src_str, bool in_query)
+{
+ std::string dest_str;
+ dest_str.reserve(src_str.length() + 1);
+
+ for (auto src = std::begin(src_str); src != std::end(src_str); ++src) {
+ if (*src != '%') {
+ if (!in_query || *src != '+') {
+ if (*src == '?') {
+ in_query = true;
+ }
+ dest_str.push_back(*src);
+ } else {
+ dest_str.push_back(' ');
+ }
+ } else {
+ /* 3 == strlen("%%XX") */
+ if (std::distance(src, std::end(src_str)) < 3) {
+ break;
+ }
+
+ src++;
+ const char c1 = hex_to_num(*src++);
+ const char c2 = hex_to_num(*src);
+ if (c1 < 0 || c2 < 0) {
+ return std::string();
+ } else {
+ dest_str.push_back(c1 << 4 | c2);
+ }
+ }
+ }
+
+ return dest_str;
+}
+
+void rgw_uri_escape_char(char c, string& dst)
+{
+ char buf[16];
+ snprintf(buf, sizeof(buf), "%%%.2X", (int)(unsigned char)c);
+ dst.append(buf);
+}
+
+static bool char_needs_url_encoding(char c)
+{
+ if (c <= 0x20 || c >= 0x7f)
+ return true;
+
+ switch (c) {
+ case 0x22:
+ case 0x23:
+ case 0x25:
+ case 0x26:
+ case 0x2B:
+ case 0x2C:
+ case 0x2F:
+ case 0x3A:
+ case 0x3B:
+ case 0x3C:
+ case 0x3E:
+ case 0x3D:
+ case 0x3F:
+ case 0x40:
+ case 0x5B:
+ case 0x5D:
+ case 0x5C:
+ case 0x5E:
+ case 0x60:
+ case 0x7B:
+ case 0x7D:
+ return true;
+ }
+ return false;
+}
+
+void url_encode(const string& src, string& dst, bool encode_slash)
+{
+ const char *p = src.c_str();
+ for (unsigned i = 0; i < src.size(); i++, p++) {
+ if ((!encode_slash && *p == 0x2F) || !char_needs_url_encoding(*p)) {
+ dst.append(p, 1);
+ }else {
+ rgw_uri_escape_char(*p, dst);
+ }
+ }
+}
+
+std::string url_encode(const std::string& src, bool encode_slash)
+{
+ std::string dst;
+ url_encode(src, dst, encode_slash);
+
+ return dst;
+}
+
+string rgw_trim_whitespace(const string& src)
+{
+ if (src.empty()) {
+ return string();
+ }
+
+ int start = 0;
+ for (; start != (int)src.size(); start++) {
+ if (!isspace(src[start]))
+ break;
+ }
+
+ int end = src.size() - 1;
+ if (end < start) {
+ return string();
+ }
+
+ for (; end > start; end--) {
+ if (!isspace(src[end]))
+ break;
+ }
+
+ return src.substr(start, end - start + 1);
+}
+
+boost::string_view rgw_trim_whitespace(const boost::string_view& src)
+{
+ boost::string_view res = src;
+
+ while (res.size() > 0 && std::isspace(res.front())) {
+ res.remove_prefix(1);
+ }
+ while (res.size() > 0 && std::isspace(res.back())) {
+ res.remove_suffix(1);
+ }
+ return res;
+}
+
+string rgw_trim_quotes(const string& val)
+{
+ string s = rgw_trim_whitespace(val);
+ if (s.size() < 2)
+ return s;
+
+ int start = 0;
+ int end = s.size() - 1;
+ int quotes_count = 0;
+
+ if (s[start] == '"') {
+ start++;
+ quotes_count++;
+ }
+ if (s[end] == '"') {
+ end--;
+ quotes_count++;
+ }
+ if (quotes_count == 2) {
+ return s.substr(start, end - start + 1);
+ }
+ return s;
+}
+
+struct rgw_name_to_flag {
+ const char *type_name;
+ uint32_t flag;
+};
+
+static int parse_list_of_flags(struct rgw_name_to_flag *mapping,
+ const string& str, uint32_t *perm)
+{
+ list<string> strs;
+ get_str_list(str, strs);
+ list<string>::iterator iter;
+ uint32_t v = 0;
+ for (iter = strs.begin(); iter != strs.end(); ++iter) {
+ string& s = *iter;
+ for (int i = 0; mapping[i].type_name; i++) {
+ if (s.compare(mapping[i].type_name) == 0)
+ v |= mapping[i].flag;
+ }
+ }
+
+ *perm = v;
+ return 0;
+}
+
+static struct rgw_name_to_flag cap_names[] = { {"*", RGW_CAP_ALL},
+ {"read", RGW_CAP_READ},
+ {"write", RGW_CAP_WRITE},
+ {NULL, 0} };
+
+int RGWUserCaps::parse_cap_perm(const string& str, uint32_t *perm)
+{
+ return parse_list_of_flags(cap_names, str, perm);
+}
+
+int RGWUserCaps::get_cap(const string& cap, string& type, uint32_t *pperm)
+{
+ int pos = cap.find('=');
+ if (pos >= 0) {
+ type = rgw_trim_whitespace(cap.substr(0, pos));
+ }
+
+ if (!is_valid_cap_type(type))
+ return -ERR_INVALID_CAP;
+
+ string cap_perm;
+ uint32_t perm = 0;
+ if (pos < (int)cap.size() - 1) {
+ cap_perm = cap.substr(pos + 1);
+ int r = RGWUserCaps::parse_cap_perm(cap_perm, &perm);
+ if (r < 0)
+ return r;
+ }
+
+ *pperm = perm;
+
+ return 0;
+}
+
+int RGWUserCaps::add_cap(const string& cap)
+{
+ uint32_t perm;
+ string type;
+
+ int r = get_cap(cap, type, &perm);
+ if (r < 0)
+ return r;
+
+ caps[type] |= perm;
+
+ return 0;
+}
+
+int RGWUserCaps::remove_cap(const string& cap)
+{
+ uint32_t perm;
+ string type;
+
+ int r = get_cap(cap, type, &perm);
+ if (r < 0)
+ return r;
+
+ map<string, uint32_t>::iterator iter = caps.find(type);
+ if (iter == caps.end())
+ return 0;
+
+ uint32_t& old_perm = iter->second;
+ old_perm &= ~perm;
+ if (!old_perm)
+ caps.erase(iter);
+
+ return 0;
+}
+
+int RGWUserCaps::add_from_string(const string& str)
+{
+ int start = 0;
+ do {
+ auto end = str.find(';', start);
+ if (end == string::npos)
+ end = str.size();
+
+ int r = add_cap(str.substr(start, end - start));
+ if (r < 0)
+ return r;
+
+ start = end + 1;
+ } while (start < (int)str.size());
+
+ return 0;
+}
+
+int RGWUserCaps::remove_from_string(const string& str)
+{
+ int start = 0;
+ do {
+ auto end = str.find(';', start);
+ if (end == string::npos)
+ end = str.size();
+
+ int r = remove_cap(str.substr(start, end - start));
+ if (r < 0)
+ return r;
+
+ start = end + 1;
+ } while (start < (int)str.size());
+
+ return 0;
+}
+
+void RGWUserCaps::dump(Formatter *f) const
+{
+ dump(f, "caps");
+}
+
+void RGWUserCaps::dump(Formatter *f, const char *name) const
+{
+ f->open_array_section(name);
+ map<string, uint32_t>::const_iterator iter;
+ for (iter = caps.begin(); iter != caps.end(); ++iter)
+ {
+ f->open_object_section("cap");
+ f->dump_string("type", iter->first);
+ uint32_t perm = iter->second;
+ string perm_str;
+ for (int i=0; cap_names[i].type_name; i++) {
+ if ((perm & cap_names[i].flag) == cap_names[i].flag) {
+ if (perm_str.size())
+ perm_str.append(", ");
+
+ perm_str.append(cap_names[i].type_name);
+ perm &= ~cap_names[i].flag;
+ }
+ }
+ if (perm_str.empty())
+ perm_str = "<none>";
+
+ f->dump_string("perm", perm_str);
+ f->close_section();
+ }
+
+ f->close_section();
+}
+
+struct RGWUserCap {
+ string type;
+ uint32_t perm;
+
+ void decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("type", type, obj);
+ string perm_str;
+ JSONDecoder::decode_json("perm", perm_str, obj);
+ if (RGWUserCaps::parse_cap_perm(perm_str, &perm) < 0) {
+ throw JSONDecoder::err("failed to parse permissions");
+ }
+ }
+};
+
+void RGWUserCaps::decode_json(JSONObj *obj)
+{
+ list<RGWUserCap> caps_list;
+ decode_json_obj(caps_list, obj);
+
+ list<RGWUserCap>::iterator iter;
+ for (iter = caps_list.begin(); iter != caps_list.end(); ++iter) {
+ RGWUserCap& cap = *iter;
+ caps[cap.type] = cap.perm;
+ }
+}
+
+int RGWUserCaps::check_cap(const string& cap, uint32_t perm)
+{
+ map<string, uint32_t>::iterator iter = caps.find(cap);
+
+ if ((iter == caps.end()) ||
+ (iter->second & perm) != perm) {
+ return -EPERM;
+ }
+
+ return 0;
+}
+
+bool RGWUserCaps::is_valid_cap_type(const string& tp)
+{
+ static const char *cap_type[] = { "user",
+ "users",
+ "buckets",
+ "metadata",
+ "usage",
+ "zone",
+ "bilog",
+ "mdlog",
+ "datalog",
+ "roles",
+ "user-policy"};
+
+ for (unsigned int i = 0; i < sizeof(cap_type) / sizeof(char *); ++i) {
+ if (tp.compare(cap_type[i]) == 0) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+void rgw_pool::from_str(const string& s)
+{
+ size_t pos = rgw_unescape_str(s, 0, '\\', ':', &name);
+ if (pos != string::npos) {
+ pos = rgw_unescape_str(s, pos, '\\', ':', &ns);
+ /* ignore return; if pos != string::npos it means that we had a colon
+ * in the middle of ns that wasn't escaped, we're going to stop there
+ */
+ }
+}
+
+string rgw_pool::to_str() const
+{
+ string esc_name;
+ rgw_escape_str(name, '\\', ':', &esc_name);
+ if (ns.empty()) {
+ return esc_name;
+ }
+ string esc_ns;
+ rgw_escape_str(ns, '\\', ':', &esc_ns);
+ return esc_name + ":" + esc_ns;
+}
+
+void rgw_raw_obj::decode_from_rgw_obj(bufferlist::const_iterator& bl)
+{
+ using ceph::decode;
+ rgw_obj old_obj;
+ decode(old_obj, bl);
+
+ get_obj_bucket_and_oid_loc(old_obj, oid, loc);
+ pool = old_obj.get_explicit_data_pool();
+}
+
+std::string rgw_bucket::get_key(char tenant_delim, char id_delim, size_t reserve) const
+{
+ const size_t max_len = tenant.size() + sizeof(tenant_delim) +
+ name.size() + sizeof(id_delim) + bucket_id.size() + reserve;
+
+ std::string key;
+ key.reserve(max_len);
+ if (!tenant.empty() && tenant_delim) {
+ key.append(tenant);
+ key.append(1, tenant_delim);
+ }
+ key.append(name);
+ if (!bucket_id.empty() && id_delim) {
+ key.append(1, id_delim);
+ key.append(bucket_id);
+ }
+ return key;
+}
+
+std::string rgw_bucket_shard::get_key(char tenant_delim, char id_delim,
+ char shard_delim) const
+{
+ static constexpr size_t shard_len{12}; // ":4294967295\0"
+ auto key = bucket.get_key(tenant_delim, id_delim, shard_len);
+ if (shard_id >= 0 && shard_delim) {
+ key.append(1, shard_delim);
+ key.append(std::to_string(shard_id));
+ }
+ return key;
+}
+
+static struct rgw_name_to_flag op_type_mapping[] = { {"*", RGW_OP_TYPE_ALL},
+ {"read", RGW_OP_TYPE_READ},
+ {"write", RGW_OP_TYPE_WRITE},
+ {"delete", RGW_OP_TYPE_DELETE},
+ {NULL, 0} };
+
+
+int rgw_parse_op_type_list(const string& str, uint32_t *perm)
+{
+ return parse_list_of_flags(op_type_mapping, str, perm);
+}
+
+bool match_policy(boost::string_view pattern, boost::string_view input,
+ uint32_t flag)
+{
+ const uint32_t flag2 = flag & (MATCH_POLICY_ACTION|MATCH_POLICY_ARN) ?
+ MATCH_CASE_INSENSITIVE : 0;
+ const bool colonblocks = !(flag & (MATCH_POLICY_RESOURCE |
+ MATCH_POLICY_STRING));
+
+ const auto npos = boost::string_view::npos;
+ boost::string_view::size_type last_pos_input = 0, last_pos_pattern = 0;
+ while (true) {
+ auto cur_pos_input = colonblocks ? input.find(":", last_pos_input) : npos;
+ auto cur_pos_pattern =
+ colonblocks ? pattern.find(":", last_pos_pattern) : npos;
+
+ auto substr_input = input.substr(last_pos_input, cur_pos_input);
+ auto substr_pattern = pattern.substr(last_pos_pattern, cur_pos_pattern);
+
+ if (!match_wildcards(substr_pattern, substr_input, flag2))
+ return false;
+
+ if (cur_pos_pattern == npos)
+ return cur_pos_input == npos;
+ if (cur_pos_input == npos)
+ return false;
+
+ last_pos_pattern = cur_pos_pattern + 1;
+ last_pos_input = cur_pos_input + 1;
+ }
+}
+
+/*
+ * make attrs look-like-this
+ * converts underscores to dashes
+ */
+string lowercase_dash_http_attr(const string& orig)
+{
+ const char *s = orig.c_str();
+ char buf[orig.size() + 1];
+ buf[orig.size()] = '\0';
+
+ for (size_t i = 0; i < orig.size(); ++i, ++s) {
+ switch (*s) {
+ case '_':
+ buf[i] = '-';
+ break;
+ default:
+ buf[i] = tolower(*s);
+ }
+ }
+ return string(buf);
+}
+
+/*
+ * make attrs Look-Like-This
+ * converts underscores to dashes
+ */
+string camelcase_dash_http_attr(const string& orig)
+{
+ const char *s = orig.c_str();
+ char buf[orig.size() + 1];
+ buf[orig.size()] = '\0';
+
+ bool last_sep = true;
+
+ for (size_t i = 0; i < orig.size(); ++i, ++s) {
+ switch (*s) {
+ case '_':
+ case '-':
+ buf[i] = '-';
+ last_sep = true;
+ break;
+ default:
+ if (last_sep) {
+ buf[i] = toupper(*s);
+ } else {
+ buf[i] = tolower(*s);
+ }
+ last_sep = false;
+ }
+ }
+ return string(buf);
+}
diff --git a/src/rgw/rgw_common.h b/src/rgw/rgw_common.h
new file mode 100644
index 00000000..3911ab18
--- /dev/null
+++ b/src/rgw/rgw_common.h
@@ -0,0 +1,2742 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2009 Sage Weil <sage@newdream.net>
+ * Copyright (C) 2015 Yehuda Sadeh <yehuda@redhat.com>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_RGW_COMMON_H
+#define CEPH_RGW_COMMON_H
+
+#include <array>
+
+#include <boost/algorithm/string.hpp>
+#include <boost/utility/string_view.hpp>
+
+#include "common/ceph_crypto.h"
+#include "common/random_string.h"
+#include "rgw_acl.h"
+#include "rgw_cors.h"
+#include "rgw_iam_policy.h"
+#include "rgw_quota.h"
+#include "rgw_string.h"
+#include "common/async/yield_context.h"
+#include "rgw_website.h"
+#include "rgw_object_lock.h"
+#include "rgw_tag.h"
+#include "cls/version/cls_version_types.h"
+#include "cls/user/cls_user_types.h"
+#include "cls/rgw/cls_rgw_types.h"
+#include "include/rados/librados.hpp"
+
+namespace ceph {
+ class Formatter;
+}
+
+using ceph::crypto::MD5;
+
+
+#define RGW_ATTR_PREFIX "user.rgw."
+
+#define RGW_HTTP_RGWX_ATTR_PREFIX "RGWX_ATTR_"
+#define RGW_HTTP_RGWX_ATTR_PREFIX_OUT "Rgwx-Attr-"
+
+#define RGW_AMZ_PREFIX "x-amz-"
+#define RGW_AMZ_META_PREFIX RGW_AMZ_PREFIX "meta-"
+#define RGW_AMZ_WEBSITE_REDIRECT_LOCATION RGW_AMZ_PREFIX "website-redirect-location"
+#define RGW_AMZ_TAG_COUNT RGW_AMZ_PREFIX "tagging-count"
+
+#define RGW_SYS_PARAM_PREFIX "rgwx-"
+
+#define RGW_ATTR_ACL RGW_ATTR_PREFIX "acl"
+#define RGW_ATTR_LC RGW_ATTR_PREFIX "lc"
+#define RGW_ATTR_CORS RGW_ATTR_PREFIX "cors"
+#define RGW_ATTR_ETAG RGW_ATTR_PREFIX "etag"
+#define RGW_ATTR_BUCKETS RGW_ATTR_PREFIX "buckets"
+#define RGW_ATTR_META_PREFIX RGW_ATTR_PREFIX RGW_AMZ_META_PREFIX
+#define RGW_ATTR_CONTENT_TYPE RGW_ATTR_PREFIX "content_type"
+#define RGW_ATTR_CACHE_CONTROL RGW_ATTR_PREFIX "cache_control"
+#define RGW_ATTR_CONTENT_DISP RGW_ATTR_PREFIX "content_disposition"
+#define RGW_ATTR_CONTENT_ENC RGW_ATTR_PREFIX "content_encoding"
+#define RGW_ATTR_CONTENT_LANG RGW_ATTR_PREFIX "content_language"
+#define RGW_ATTR_EXPIRES RGW_ATTR_PREFIX "expires"
+#define RGW_ATTR_DELETE_AT RGW_ATTR_PREFIX "delete_at"
+#define RGW_ATTR_ID_TAG RGW_ATTR_PREFIX "idtag"
+#define RGW_ATTR_TAIL_TAG RGW_ATTR_PREFIX "tail_tag"
+#define RGW_ATTR_SHADOW_OBJ RGW_ATTR_PREFIX "shadow_name"
+#define RGW_ATTR_MANIFEST RGW_ATTR_PREFIX "manifest"
+#define RGW_ATTR_USER_MANIFEST RGW_ATTR_PREFIX "user_manifest"
+#define RGW_ATTR_AMZ_WEBSITE_REDIRECT_LOCATION RGW_ATTR_PREFIX RGW_AMZ_WEBSITE_REDIRECT_LOCATION
+#define RGW_ATTR_SLO_MANIFEST RGW_ATTR_PREFIX "slo_manifest"
+/* Information whether an object is SLO or not must be exposed to
+ * user through custom HTTP header named X-Static-Large-Object. */
+#define RGW_ATTR_SLO_UINDICATOR RGW_ATTR_META_PREFIX "static-large-object"
+#define RGW_ATTR_X_ROBOTS_TAG RGW_ATTR_PREFIX "x-robots-tag"
+#define RGW_ATTR_STORAGE_CLASS RGW_ATTR_PREFIX "storage_class"
+
+/* S3 Object Lock*/
+#define RGW_ATTR_OBJECT_LOCK RGW_ATTR_PREFIX "object-lock"
+#define RGW_ATTR_OBJECT_RETENTION RGW_ATTR_PREFIX "object-retention"
+#define RGW_ATTR_OBJECT_LEGAL_HOLD RGW_ATTR_PREFIX "object-legal-hold"
+
+
+#define RGW_ATTR_PG_VER RGW_ATTR_PREFIX "pg_ver"
+#define RGW_ATTR_SOURCE_ZONE RGW_ATTR_PREFIX "source_zone"
+#define RGW_ATTR_TAGS RGW_ATTR_PREFIX RGW_AMZ_PREFIX "tagging"
+
+#define RGW_ATTR_TEMPURL_KEY1 RGW_ATTR_META_PREFIX "temp-url-key"
+#define RGW_ATTR_TEMPURL_KEY2 RGW_ATTR_META_PREFIX "temp-url-key-2"
+
+/* Account/container quota of the Swift API. */
+#define RGW_ATTR_QUOTA_NOBJS RGW_ATTR_META_PREFIX "quota-count"
+#define RGW_ATTR_QUOTA_MSIZE RGW_ATTR_META_PREFIX "quota-bytes"
+
+/* Static Web Site of Swift API. */
+#define RGW_ATTR_WEB_INDEX RGW_ATTR_META_PREFIX "web-index"
+#define RGW_ATTR_WEB_ERROR RGW_ATTR_META_PREFIX "web-error"
+#define RGW_ATTR_WEB_LISTINGS RGW_ATTR_META_PREFIX "web-listings"
+#define RGW_ATTR_WEB_LIST_CSS RGW_ATTR_META_PREFIX "web-listings-css"
+#define RGW_ATTR_SUBDIR_MARKER RGW_ATTR_META_PREFIX "web-directory-type"
+
+#define RGW_ATTR_OLH_PREFIX RGW_ATTR_PREFIX "olh."
+
+#define RGW_ATTR_OLH_INFO RGW_ATTR_OLH_PREFIX "info"
+#define RGW_ATTR_OLH_VER RGW_ATTR_OLH_PREFIX "ver"
+#define RGW_ATTR_OLH_ID_TAG RGW_ATTR_OLH_PREFIX "idtag"
+#define RGW_ATTR_OLH_PENDING_PREFIX RGW_ATTR_OLH_PREFIX "pending."
+
+#define RGW_ATTR_COMPRESSION RGW_ATTR_PREFIX "compression"
+
+#define RGW_ATTR_APPEND_PART_NUM RGW_ATTR_PREFIX "append_part_num"
+
+/* IAM Policy */
+#define RGW_ATTR_IAM_POLICY RGW_ATTR_PREFIX "iam-policy"
+#define RGW_ATTR_USER_POLICY RGW_ATTR_PREFIX "user-policy"
+
+/* RGW File Attributes */
+#define RGW_ATTR_UNIX_KEY1 RGW_ATTR_PREFIX "unix-key1"
+#define RGW_ATTR_UNIX1 RGW_ATTR_PREFIX "unix1"
+
+#define RGW_ATTR_CRYPT_PREFIX RGW_ATTR_PREFIX "crypt."
+#define RGW_ATTR_CRYPT_MODE RGW_ATTR_CRYPT_PREFIX "mode"
+#define RGW_ATTR_CRYPT_KEYMD5 RGW_ATTR_CRYPT_PREFIX "keymd5"
+#define RGW_ATTR_CRYPT_KEYID RGW_ATTR_CRYPT_PREFIX "keyid"
+#define RGW_ATTR_CRYPT_KEYSEL RGW_ATTR_CRYPT_PREFIX "keysel"
+
+#define RGW_BUCKETS_OBJ_SUFFIX ".buckets"
+
+#define RGW_FORMAT_PLAIN 0
+#define RGW_FORMAT_XML 1
+#define RGW_FORMAT_JSON 2
+#define RGW_FORMAT_HTML 3
+
+#define RGW_CAP_READ 0x1
+#define RGW_CAP_WRITE 0x2
+#define RGW_CAP_ALL (RGW_CAP_READ | RGW_CAP_WRITE)
+
+#define RGW_REST_SWIFT 0x1
+#define RGW_REST_SWIFT_AUTH 0x2
+#define RGW_REST_S3 0x4
+#define RGW_REST_WEBSITE 0x8
+#define RGW_REST_STS 0x10
+#define RGW_REST_IAM 0x20
+
+#define RGW_SUSPENDED_USER_AUID (uint64_t)-2
+
+#define RGW_OP_TYPE_READ 0x01
+#define RGW_OP_TYPE_WRITE 0x02
+#define RGW_OP_TYPE_DELETE 0x04
+
+#define RGW_OP_TYPE_MODIFY (RGW_OP_TYPE_WRITE | RGW_OP_TYPE_DELETE)
+#define RGW_OP_TYPE_ALL (RGW_OP_TYPE_READ | RGW_OP_TYPE_WRITE | RGW_OP_TYPE_DELETE)
+
+#define RGW_DEFAULT_MAX_BUCKETS 1000
+
+#define RGW_DEFER_TO_BUCKET_ACLS_RECURSE 1
+#define RGW_DEFER_TO_BUCKET_ACLS_FULL_CONTROL 2
+
+#define STATUS_CREATED 1900
+#define STATUS_ACCEPTED 1901
+#define STATUS_NO_CONTENT 1902
+#define STATUS_PARTIAL_CONTENT 1903
+#define STATUS_REDIRECT 1904
+#define STATUS_NO_APPLY 1905
+#define STATUS_APPLIED 1906
+
+#define ERR_INVALID_BUCKET_NAME 2000
+#define ERR_INVALID_OBJECT_NAME 2001
+#define ERR_NO_SUCH_BUCKET 2002
+#define ERR_METHOD_NOT_ALLOWED 2003
+#define ERR_INVALID_DIGEST 2004
+#define ERR_BAD_DIGEST 2005
+#define ERR_UNRESOLVABLE_EMAIL 2006
+#define ERR_INVALID_PART 2007
+#define ERR_INVALID_PART_ORDER 2008
+#define ERR_NO_SUCH_UPLOAD 2009
+#define ERR_REQUEST_TIMEOUT 2010
+#define ERR_LENGTH_REQUIRED 2011
+#define ERR_REQUEST_TIME_SKEWED 2012
+#define ERR_BUCKET_EXISTS 2013
+#define ERR_BAD_URL 2014
+#define ERR_PRECONDITION_FAILED 2015
+#define ERR_NOT_MODIFIED 2016
+#define ERR_INVALID_UTF8 2017
+#define ERR_UNPROCESSABLE_ENTITY 2018
+#define ERR_TOO_LARGE 2019
+#define ERR_TOO_MANY_BUCKETS 2020
+#define ERR_INVALID_REQUEST 2021
+#define ERR_TOO_SMALL 2022
+#define ERR_NOT_FOUND 2023
+#define ERR_PERMANENT_REDIRECT 2024
+#define ERR_LOCKED 2025
+#define ERR_QUOTA_EXCEEDED 2026
+#define ERR_SIGNATURE_NO_MATCH 2027
+#define ERR_INVALID_ACCESS_KEY 2028
+#define ERR_MALFORMED_XML 2029
+#define ERR_USER_EXIST 2030
+#define ERR_NOT_SLO_MANIFEST 2031
+#define ERR_EMAIL_EXIST 2032
+#define ERR_KEY_EXIST 2033
+#define ERR_INVALID_SECRET_KEY 2034
+#define ERR_INVALID_KEY_TYPE 2035
+#define ERR_INVALID_CAP 2036
+#define ERR_INVALID_TENANT_NAME 2037
+#define ERR_WEBSITE_REDIRECT 2038
+#define ERR_NO_SUCH_WEBSITE_CONFIGURATION 2039
+#define ERR_AMZ_CONTENT_SHA256_MISMATCH 2040
+#define ERR_NO_SUCH_LC 2041
+#define ERR_NO_SUCH_USER 2042
+#define ERR_NO_SUCH_SUBUSER 2043
+#define ERR_MFA_REQUIRED 2044
+#define ERR_NO_SUCH_CORS_CONFIGURATION 2045
+#define ERR_NO_SUCH_OBJECT_LOCK_CONFIGURATION 2046
+#define ERR_INVALID_RETENTION_PERIOD 2047
+#define ERR_USER_SUSPENDED 2100
+#define ERR_INTERNAL_ERROR 2200
+#define ERR_NOT_IMPLEMENTED 2201
+#define ERR_SERVICE_UNAVAILABLE 2202
+#define ERR_ROLE_EXISTS 2203
+#define ERR_MALFORMED_DOC 2204
+#define ERR_NO_ROLE_FOUND 2205
+#define ERR_DELETE_CONFLICT 2206
+#define ERR_NO_SUCH_BUCKET_POLICY 2207
+#define ERR_INVALID_LOCATION_CONSTRAINT 2208
+#define ERR_TAG_CONFLICT 2209
+#define ERR_INVALID_TAG 2210
+#define ERR_ZERO_IN_URL 2211
+#define ERR_MALFORMED_ACL_ERROR 2212
+#define ERR_ZONEGROUP_DEFAULT_PLACEMENT_MISCONFIGURATION 2213
+#define ERR_INVALID_ENCRYPTION_ALGORITHM 2214
+#define ERR_INVALID_CORS_RULES_ERROR 2215
+#define ERR_NO_CORS_FOUND 2216
+#define ERR_INVALID_WEBSITE_ROUTING_RULES_ERROR 2217
+#define ERR_RATE_LIMITED 2218
+#define ERR_POSITION_NOT_EQUAL_TO_LENGTH 2219
+#define ERR_OBJECT_NOT_APPENDABLE 2220
+#define ERR_INVALID_BUCKET_STATE 2221
+
+#define ERR_BUSY_RESHARDING 2300
+#define ERR_NO_SUCH_ENTITY 2301
+
+// STS Errors
+#define ERR_PACKED_POLICY_TOO_LARGE 2400
+#define ERR_INVALID_IDENTITY_TOKEN 2401
+
+#ifndef UINT32_MAX
+#define UINT32_MAX (0xffffffffu)
+#endif
+
+struct req_state;
+
+typedef void *RGWAccessHandle;
+
+enum RGWIntentEvent {
+ DEL_OBJ = 0,
+ DEL_DIR = 1,
+};
+
+enum HostStyle {
+ PathStyle = 0,
+ VirtualStyle = 1,
+};
+
+/** Store error returns for output at a different point in the program */
+struct rgw_err {
+ rgw_err();
+ void clear();
+ bool is_clear() const;
+ bool is_err() const;
+ friend std::ostream& operator<<(std::ostream& oss, const rgw_err &err);
+
+ int http_ret;
+ int ret;
+ std::string err_code;
+ std::string message;
+};
+
+
+
+/* Helper class used for RGWHTTPArgs parsing */
+class NameVal
+{
+ const std::string str;
+ std::string name;
+ std::string val;
+ public:
+ explicit NameVal(const std::string& nv) : str(nv) {}
+
+ int parse();
+
+ std::string& get_name() { return name; }
+ std::string& get_val() { return val; }
+};
+
+/** Stores the XML arguments associated with the HTTP request in req_state*/
+class RGWHTTPArgs {
+ std::string str, empty_str;
+ std::map<std::string, std::string> val_map;
+ std::map<std::string, std::string> sys_val_map;
+ std::map<std::string, std::string> sub_resources;
+ bool has_resp_modifier = false;
+ bool admin_subresource_added = false;
+ public:
+ RGWHTTPArgs() = default;
+ explicit RGWHTTPArgs(const std::string& s) {
+ set(s);
+ parse();
+ }
+
+ /** Set the arguments; as received */
+ void set(const std::string& s) {
+ has_resp_modifier = false;
+ val_map.clear();
+ sub_resources.clear();
+ str = s;
+ }
+ /** parse the received arguments */
+ int parse();
+ void append(const std::string& name, const string& val);
+ /** Get the value for a specific argument parameter */
+ const string& get(const std::string& name, bool *exists = NULL) const;
+ boost::optional<const std::string&>
+ get_optional(const std::string& name) const;
+ int get_bool(const std::string& name, bool *val, bool *exists);
+ int get_bool(const char *name, bool *val, bool *exists);
+ void get_bool(const char *name, bool *val, bool def_val);
+ int get_int(const char *name, int *val, int def_val);
+
+ /** Get the value for specific system argument parameter */
+ std::string sys_get(const std::string& name, bool *exists = nullptr) const;
+
+ /** see if a parameter is contained in this RGWHTTPArgs */
+ bool exists(const char *name) const {
+ return (val_map.find(name) != std::end(val_map));
+ }
+ bool sub_resource_exists(const char *name) const {
+ return (sub_resources.find(name) != std::end(sub_resources));
+ }
+ std::map<std::string, std::string>& get_params() {
+ return val_map;
+ }
+ const std::map<std::string, std::string>& get_sub_resources() const {
+ return sub_resources;
+ }
+ unsigned get_num_params() const {
+ return val_map.size();
+ }
+ bool has_response_modifier() const {
+ return has_resp_modifier;
+ }
+ void set_system() { /* make all system params visible */
+ std::map<std::string, std::string>::iterator iter;
+ for (iter = sys_val_map.begin(); iter != sys_val_map.end(); ++iter) {
+ val_map[iter->first] = iter->second;
+ }
+ }
+ const std::string& get_str() {
+ return str;
+ }
+}; // RGWHTTPArgs
+
+const char *rgw_conf_get(const map<string, string, ltstr_nocase>& conf_map, const char *name, const char *def_val);
+int rgw_conf_get_int(const map<string, string, ltstr_nocase>& conf_map, const char *name, int def_val);
+bool rgw_conf_get_bool(const map<string, string, ltstr_nocase>& conf_map, const char *name, bool def_val);
+
+class RGWEnv;
+
+class RGWConf {
+ friend class RGWEnv;
+ int enable_ops_log;
+ int enable_usage_log;
+ uint8_t defer_to_bucket_acls;
+ void init(CephContext *cct);
+public:
+ RGWConf()
+ : enable_ops_log(1),
+ enable_usage_log(1),
+ defer_to_bucket_acls(0) {
+ }
+};
+
+class RGWEnv {
+ std::map<string, string, ltstr_nocase> env_map;
+ RGWConf conf;
+public:
+ void init(CephContext *cct);
+ void init(CephContext *cct, char **envp);
+ void set(std::string name, std::string val);
+ const char *get(const char *name, const char *def_val = nullptr) const;
+ int get_int(const char *name, int def_val = 0) const;
+ bool get_bool(const char *name, bool def_val = 0);
+ size_t get_size(const char *name, size_t def_val = 0) const;
+ bool exists(const char *name) const;
+ bool exists_prefix(const char *prefix) const;
+ void remove(const char *name);
+ const std::map<string, string, ltstr_nocase>& get_map() const { return env_map; }
+ int get_enable_ops_log() const {
+ return conf.enable_ops_log;
+ }
+
+ int get_enable_usage_log() const {
+ return conf.enable_usage_log;
+ }
+
+ int get_defer_to_bucket_acls() const {
+ return conf.defer_to_bucket_acls;
+ }
+};
+
+// return true if the connection is secure. this either means that the
+// connection arrived via ssl, or was forwarded as https by a trusted proxy
+bool rgw_transport_is_secure(CephContext *cct, const RGWEnv& env);
+
+enum http_op {
+ OP_GET,
+ OP_PUT,
+ OP_DELETE,
+ OP_HEAD,
+ OP_POST,
+ OP_COPY,
+ OP_OPTIONS,
+ OP_UNKNOWN,
+};
+
+enum RGWOpType {
+ RGW_OP_UNKNOWN = 0,
+ RGW_OP_GET_OBJ,
+ RGW_OP_LIST_BUCKETS,
+ RGW_OP_STAT_ACCOUNT,
+ RGW_OP_LIST_BUCKET,
+ RGW_OP_GET_BUCKET_LOGGING,
+ RGW_OP_GET_BUCKET_LOCATION,
+ RGW_OP_GET_BUCKET_VERSIONING,
+ RGW_OP_SET_BUCKET_VERSIONING,
+ RGW_OP_GET_BUCKET_WEBSITE,
+ RGW_OP_SET_BUCKET_WEBSITE,
+ RGW_OP_STAT_BUCKET,
+ RGW_OP_CREATE_BUCKET,
+ RGW_OP_DELETE_BUCKET,
+ RGW_OP_PUT_OBJ,
+ RGW_OP_STAT_OBJ,
+ RGW_OP_POST_OBJ,
+ RGW_OP_PUT_METADATA_ACCOUNT,
+ RGW_OP_PUT_METADATA_BUCKET,
+ RGW_OP_PUT_METADATA_OBJECT,
+ RGW_OP_SET_TEMPURL,
+ RGW_OP_DELETE_OBJ,
+ RGW_OP_COPY_OBJ,
+ RGW_OP_GET_ACLS,
+ RGW_OP_PUT_ACLS,
+ RGW_OP_GET_CORS,
+ RGW_OP_PUT_CORS,
+ RGW_OP_DELETE_CORS,
+ RGW_OP_OPTIONS_CORS,
+ RGW_OP_GET_REQUEST_PAYMENT,
+ RGW_OP_SET_REQUEST_PAYMENT,
+ RGW_OP_INIT_MULTIPART,
+ RGW_OP_COMPLETE_MULTIPART,
+ RGW_OP_ABORT_MULTIPART,
+ RGW_OP_LIST_MULTIPART,
+ RGW_OP_LIST_BUCKET_MULTIPARTS,
+ RGW_OP_DELETE_MULTI_OBJ,
+ RGW_OP_BULK_DELETE,
+ RGW_OP_SET_ATTRS,
+ RGW_OP_GET_CROSS_DOMAIN_POLICY,
+ RGW_OP_GET_HEALTH_CHECK,
+ RGW_OP_GET_INFO,
+ RGW_OP_CREATE_ROLE,
+ RGW_OP_DELETE_ROLE,
+ RGW_OP_GET_ROLE,
+ RGW_OP_MODIFY_ROLE,
+ RGW_OP_LIST_ROLES,
+ RGW_OP_PUT_ROLE_POLICY,
+ RGW_OP_GET_ROLE_POLICY,
+ RGW_OP_LIST_ROLE_POLICIES,
+ RGW_OP_DELETE_ROLE_POLICY,
+ RGW_OP_PUT_BUCKET_POLICY,
+ RGW_OP_GET_BUCKET_POLICY,
+ RGW_OP_DELETE_BUCKET_POLICY,
+ RGW_OP_PUT_OBJ_TAGGING,
+ RGW_OP_GET_OBJ_TAGGING,
+ RGW_OP_DELETE_OBJ_TAGGING,
+ RGW_OP_PUT_LC,
+ RGW_OP_GET_LC,
+ RGW_OP_DELETE_LC,
+ RGW_OP_PUT_USER_POLICY,
+ RGW_OP_GET_USER_POLICY,
+ RGW_OP_LIST_USER_POLICIES,
+ RGW_OP_DELETE_USER_POLICY,
+ RGW_OP_PUT_BUCKET_OBJ_LOCK,
+ RGW_OP_GET_BUCKET_OBJ_LOCK,
+ RGW_OP_PUT_OBJ_RETENTION,
+ RGW_OP_GET_OBJ_RETENTION,
+ RGW_OP_PUT_OBJ_LEGAL_HOLD,
+ RGW_OP_GET_OBJ_LEGAL_HOLD,
+ /* rgw specific */
+ RGW_OP_ADMIN_SET_METADATA,
+ RGW_OP_GET_OBJ_LAYOUT,
+ RGW_OP_BULK_UPLOAD,
+ RGW_OP_METADATA_SEARCH,
+ RGW_OP_CONFIG_BUCKET_META_SEARCH,
+ RGW_OP_GET_BUCKET_META_SEARCH,
+ RGW_OP_DEL_BUCKET_META_SEARCH,
+ /* sts specific*/
+ RGW_STS_ASSUME_ROLE,
+ RGW_STS_GET_SESSION_TOKEN,
+ RGW_STS_ASSUME_ROLE_WEB_IDENTITY,
+ /* pubsub */
+ RGW_OP_PUBSUB_TOPIC_CREATE,
+ RGW_OP_PUBSUB_TOPICS_LIST,
+ RGW_OP_PUBSUB_TOPIC_GET,
+ RGW_OP_PUBSUB_TOPIC_DELETE,
+ RGW_OP_PUBSUB_SUB_CREATE,
+ RGW_OP_PUBSUB_SUB_GET,
+ RGW_OP_PUBSUB_SUB_DELETE,
+ RGW_OP_PUBSUB_SUB_PULL,
+ RGW_OP_PUBSUB_SUB_ACK,
+ RGW_OP_PUBSUB_NOTIF_CREATE,
+ RGW_OP_PUBSUB_NOTIF_DELETE,
+ RGW_OP_PUBSUB_NOTIF_LIST,
+};
+
+class RGWAccessControlPolicy;
+class JSONObj;
+
+struct RGWAccessKey {
+ string id; // AccessKey
+ string key; // SecretKey
+ string subuser;
+
+ RGWAccessKey() {}
+ RGWAccessKey(std::string _id, std::string _key)
+ : id(std::move(_id)), key(std::move(_key)) {}
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(2, 2, bl);
+ encode(id, bl);
+ encode(key, bl);
+ encode(subuser, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START_LEGACY_COMPAT_LEN_32(2, 2, 2, bl);
+ decode(id, bl);
+ decode(key, bl);
+ decode(subuser, bl);
+ DECODE_FINISH(bl);
+ }
+ void dump(Formatter *f) const;
+ void dump_plain(Formatter *f) const;
+ void dump(Formatter *f, const string& user, bool swift) const;
+ static void generate_test_instances(list<RGWAccessKey*>& o);
+
+ void decode_json(JSONObj *obj);
+ void decode_json(JSONObj *obj, bool swift);
+};
+WRITE_CLASS_ENCODER(RGWAccessKey)
+
+struct RGWSubUser {
+ string name;
+ uint32_t perm_mask;
+
+ RGWSubUser() : perm_mask(0) {}
+ void encode(bufferlist& bl) const {
+ ENCODE_START(2, 2, bl);
+ encode(name, bl);
+ encode(perm_mask, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START_LEGACY_COMPAT_LEN_32(2, 2, 2, bl);
+ decode(name, bl);
+ decode(perm_mask, bl);
+ DECODE_FINISH(bl);
+ }
+ void dump(Formatter *f) const;
+ void dump(Formatter *f, const string& user) const;
+ static void generate_test_instances(list<RGWSubUser*>& o);
+
+ void decode_json(JSONObj *obj);
+};
+WRITE_CLASS_ENCODER(RGWSubUser)
+
+class RGWUserCaps
+{
+ map<string, uint32_t> caps;
+
+ int get_cap(const string& cap, string& type, uint32_t *perm);
+ int add_cap(const string& cap);
+ int remove_cap(const string& cap);
+public:
+ static int parse_cap_perm(const string& str, uint32_t *perm);
+ int add_from_string(const string& str);
+ int remove_from_string(const string& str);
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(caps, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(caps, bl);
+ DECODE_FINISH(bl);
+ }
+ int check_cap(const string& cap, uint32_t perm);
+ bool is_valid_cap_type(const string& tp);
+ void dump(Formatter *f) const;
+ void dump(Formatter *f, const char *name) const;
+
+ void decode_json(JSONObj *obj);
+};
+WRITE_CLASS_ENCODER(RGWUserCaps)
+
+void encode_json(const char *name, const obj_version& v, Formatter *f);
+void encode_json(const char *name, const RGWUserCaps& val, Formatter *f);
+
+void decode_json_obj(obj_version& v, JSONObj *obj);
+
+
+
+enum RGWIdentityType
+{
+ TYPE_NONE=0,
+ TYPE_RGW=1,
+ TYPE_KEYSTONE=2,
+ TYPE_LDAP=3,
+ TYPE_ROLE=4,
+ TYPE_WEB=5,
+};
+
+static string RGW_STORAGE_CLASS_STANDARD = "STANDARD";
+
+struct rgw_placement_rule {
+ std::string name;
+ std::string storage_class;
+
+ rgw_placement_rule() {}
+ rgw_placement_rule(const string& _n, const string& _sc) : name(_n), storage_class(_sc) {}
+ rgw_placement_rule(const rgw_placement_rule& _r, const string& _sc) : name(_r.name) {
+ if (!_sc.empty()) {
+ storage_class = _sc;
+ } else {
+ storage_class = _r.storage_class;
+ }
+ }
+
+ bool empty() const {
+ return name.empty() && storage_class.empty();
+ }
+
+ void inherit_from(const rgw_placement_rule& r) {
+ if (name.empty()) {
+ name = r.name;
+ }
+ if (storage_class.empty()) {
+ storage_class = r.storage_class;
+ }
+ }
+
+ void clear() {
+ name.clear();
+ storage_class.clear();
+ }
+
+ void init(const string& n, const string& c) {
+ name = n;
+ storage_class = c;
+ }
+
+ static const string& get_canonical_storage_class(const string& storage_class) {
+ if (storage_class.empty()) {
+ return RGW_STORAGE_CLASS_STANDARD;
+ }
+ return storage_class;
+ }
+
+ const string& get_storage_class() const {
+ return get_canonical_storage_class(storage_class);
+ }
+
+ int compare(const rgw_placement_rule& r) const {
+ int c = name.compare(r.name);
+ if (c != 0) {
+ return c;
+ }
+ return get_storage_class().compare(r.get_storage_class());
+ }
+
+ bool operator==(const rgw_placement_rule& r) const {
+ return (name == r.name &&
+ get_storage_class() == r.get_storage_class());
+ }
+
+ bool operator!=(const rgw_placement_rule& r) const {
+ return !(*this == r);
+ }
+
+ void encode(bufferlist& bl) const {
+ /* no ENCODE_START/END due to backward compatibility */
+ std::string s = to_str();
+ ceph::encode(s, bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ std::string s;
+ ceph::decode(s, bl);
+ from_str(s);
+ }
+
+ std::string to_str() const {
+ if (standard_storage_class()) {
+ return name;
+ }
+ return to_str_explicit();
+ }
+
+ std::string to_str_explicit() const {
+ return name + "/" + storage_class;
+ }
+
+ void from_str(const std::string& s) {
+ size_t pos = s.find("/");
+ if (pos == std::string::npos) {
+ name = s;
+ storage_class.clear();
+ return;
+ }
+ name = s.substr(0, pos);
+ storage_class = s.substr(pos + 1);
+ }
+
+ bool standard_storage_class() const {
+ return storage_class.empty() || storage_class == RGW_STORAGE_CLASS_STANDARD;
+ }
+};
+WRITE_CLASS_ENCODER(rgw_placement_rule)
+
+void encode_json(const char *name, const rgw_placement_rule& val, ceph::Formatter *f);
+void decode_json_obj(rgw_placement_rule& v, JSONObj *obj);
+
+inline ostream& operator<<(ostream& out, const rgw_placement_rule& rule) {
+ return out << rule.to_str();
+}
+struct RGWUserInfo
+{
+ rgw_user user_id;
+ string display_name;
+ string user_email;
+ map<string, RGWAccessKey> access_keys;
+ map<string, RGWAccessKey> swift_keys;
+ map<string, RGWSubUser> subusers;
+ __u8 suspended;
+ int32_t max_buckets;
+ uint32_t op_mask;
+ RGWUserCaps caps;
+ __u8 admin;
+ __u8 system;
+ rgw_placement_rule default_placement;
+ list<string> placement_tags;
+ RGWQuotaInfo bucket_quota;
+ map<int, string> temp_url_keys;
+ RGWQuotaInfo user_quota;
+ uint32_t type;
+ set<string> mfa_ids;
+ string assumed_role_arn;
+
+ RGWUserInfo()
+ : suspended(0),
+ max_buckets(RGW_DEFAULT_MAX_BUCKETS),
+ op_mask(RGW_OP_TYPE_ALL),
+ admin(0),
+ system(0),
+ type(TYPE_NONE) {
+ }
+
+ RGWAccessKey* get_key(const string& access_key) {
+ if (access_keys.empty())
+ return nullptr;
+
+ auto k = access_keys.find(access_key);
+ if (k == access_keys.end())
+ return nullptr;
+ else
+ return &(k->second);
+ }
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(21, 9, bl);
+ encode((uint64_t)0, bl); // old auid
+ string access_key;
+ string secret_key;
+ if (!access_keys.empty()) {
+ map<string, RGWAccessKey>::const_iterator iter = access_keys.begin();
+ const RGWAccessKey& k = iter->second;
+ access_key = k.id;
+ secret_key = k.key;
+ }
+ encode(access_key, bl);
+ encode(secret_key, bl);
+ encode(display_name, bl);
+ encode(user_email, bl);
+ string swift_name;
+ string swift_key;
+ if (!swift_keys.empty()) {
+ map<string, RGWAccessKey>::const_iterator iter = swift_keys.begin();
+ const RGWAccessKey& k = iter->second;
+ swift_name = k.id;
+ swift_key = k.key;
+ }
+ encode(swift_name, bl);
+ encode(swift_key, bl);
+ encode(user_id.id, bl);
+ encode(access_keys, bl);
+ encode(subusers, bl);
+ encode(suspended, bl);
+ encode(swift_keys, bl);
+ encode(max_buckets, bl);
+ encode(caps, bl);
+ encode(op_mask, bl);
+ encode(system, bl);
+ encode(default_placement, bl);
+ encode(placement_tags, bl);
+ encode(bucket_quota, bl);
+ encode(temp_url_keys, bl);
+ encode(user_quota, bl);
+ encode(user_id.tenant, bl);
+ encode(admin, bl);
+ encode(type, bl);
+ encode(mfa_ids, bl);
+ encode(assumed_role_arn, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START_LEGACY_COMPAT_LEN_32(20, 9, 9, bl);
+ if (struct_v >= 2) {
+ uint64_t old_auid;
+ decode(old_auid, bl);
+ }
+ string access_key;
+ string secret_key;
+ decode(access_key, bl);
+ decode(secret_key, bl);
+ if (struct_v < 6) {
+ RGWAccessKey k;
+ k.id = access_key;
+ k.key = secret_key;
+ access_keys[access_key] = k;
+ }
+ decode(display_name, bl);
+ decode(user_email, bl);
+ /* We populate swift_keys map later nowadays, but we have to decode. */
+ string swift_name;
+ string swift_key;
+ if (struct_v >= 3) decode(swift_name, bl);
+ if (struct_v >= 4) decode(swift_key, bl);
+ if (struct_v >= 5)
+ decode(user_id.id, bl);
+ else
+ user_id.id = access_key;
+ if (struct_v >= 6) {
+ decode(access_keys, bl);
+ decode(subusers, bl);
+ }
+ suspended = 0;
+ if (struct_v >= 7) {
+ decode(suspended, bl);
+ }
+ if (struct_v >= 8) {
+ decode(swift_keys, bl);
+ }
+ if (struct_v >= 10) {
+ decode(max_buckets, bl);
+ } else {
+ max_buckets = RGW_DEFAULT_MAX_BUCKETS;
+ }
+ if (struct_v >= 11) {
+ decode(caps, bl);
+ }
+ if (struct_v >= 12) {
+ decode(op_mask, bl);
+ } else {
+ op_mask = RGW_OP_TYPE_ALL;
+ }
+ if (struct_v >= 13) {
+ decode(system, bl);
+ decode(default_placement, bl);
+ decode(placement_tags, bl); /* tags of allowed placement rules */
+ }
+ if (struct_v >= 14) {
+ decode(bucket_quota, bl);
+ }
+ if (struct_v >= 15) {
+ decode(temp_url_keys, bl);
+ }
+ if (struct_v >= 16) {
+ decode(user_quota, bl);
+ }
+ if (struct_v >= 17) {
+ decode(user_id.tenant, bl);
+ } else {
+ user_id.tenant.clear();
+ }
+ if (struct_v >= 18) {
+ decode(admin, bl);
+ }
+ if (struct_v >= 19) {
+ decode(type, bl);
+ }
+ if (struct_v >= 20) {
+ decode(mfa_ids, bl);
+ }
+ if (struct_v >= 21) {
+ decode(assumed_role_arn, bl);
+ }
+ DECODE_FINISH(bl);
+ }
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<RGWUserInfo*>& o);
+
+ void decode_json(JSONObj *obj);
+};
+WRITE_CLASS_ENCODER(RGWUserInfo)
+
+struct rgw_pool {
+ std::string name;
+ std::string ns;
+
+ rgw_pool() = default;
+ rgw_pool(const rgw_pool& _p) : name(_p.name), ns(_p.ns) {}
+ rgw_pool(rgw_pool&&) = default;
+ rgw_pool(const string& _s) {
+ from_str(_s);
+ }
+ rgw_pool(const string& _name, const string& _ns) : name(_name), ns(_ns) {}
+
+ string to_str() const;
+ void from_str(const string& s);
+
+ void init(const string& _s) {
+ from_str(_s);
+ }
+
+ bool empty() const {
+ return name.empty();
+ }
+
+ int compare(const rgw_pool& p) const {
+ int r = name.compare(p.name);
+ if (r != 0) {
+ return r;
+ }
+ return ns.compare(p.ns);
+ }
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(10, 10, bl);
+ encode(name, bl);
+ encode(ns, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode_from_bucket(bufferlist::const_iterator& bl);
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START_LEGACY_COMPAT_LEN(10, 3, 3, bl);
+
+ decode(name, bl);
+
+ if (struct_v < 10) {
+
+ /*
+ * note that rgw_pool can be used where rgw_bucket was used before
+ * therefore we inherit rgw_bucket's old versions. However, we only
+ * need the first field from rgw_bucket. unless we add more fields
+ * in which case we'll need to look at struct_v, and check the actual
+ * version. Anything older than 10 needs to be treated as old rgw_bucket
+ */
+
+ } else {
+ decode(ns, bl);
+ }
+
+ DECODE_FINISH(bl);
+ }
+
+ rgw_pool& operator=(const rgw_pool&) = default;
+
+ bool operator==(const rgw_pool& p) const {
+ return (compare(p) == 0);
+ }
+ bool operator!=(const rgw_pool& p) const {
+ return !(*this == p);
+ }
+ bool operator<(const rgw_pool& p) const {
+ int r = name.compare(p.name);
+ if (r == 0) {
+ return (ns.compare(p.ns) < 0);
+ }
+ return (r < 0);
+ }
+};
+WRITE_CLASS_ENCODER(rgw_pool)
+
+struct rgw_data_placement_target {
+ rgw_pool data_pool;
+ rgw_pool data_extra_pool;
+ rgw_pool index_pool;
+
+ rgw_data_placement_target() = default;
+ rgw_data_placement_target(const rgw_data_placement_target&) = default;
+ rgw_data_placement_target(rgw_data_placement_target&&) = default;
+
+ rgw_data_placement_target(const rgw_pool& data_pool,
+ const rgw_pool& data_extra_pool,
+ const rgw_pool& index_pool)
+ : data_pool(data_pool),
+ data_extra_pool(data_extra_pool),
+ index_pool(index_pool) {
+ }
+
+ rgw_data_placement_target&
+ operator=(const rgw_data_placement_target&) = default;
+
+ const rgw_pool& get_data_extra_pool() const {
+ if (data_extra_pool.empty()) {
+ return data_pool;
+ }
+ return data_extra_pool;
+ }
+
+ int compare(const rgw_data_placement_target& t) {
+ int c = data_pool.compare(t.data_pool);
+ if (c != 0) {
+ return c;
+ }
+ c = data_extra_pool.compare(t.data_extra_pool);
+ if (c != 0) {
+ return c;
+ }
+ return index_pool.compare(t.index_pool);
+ };
+
+ void dump(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+};
+
+inline ostream& operator<<(ostream& out, const rgw_pool& p) {
+ out << p.to_str();
+ return out;
+}
+
+struct rgw_raw_obj {
+ rgw_pool pool;
+ std::string oid;
+ std::string loc;
+
+ rgw_raw_obj() {}
+ rgw_raw_obj(const rgw_pool& _pool, const std::string& _oid) {
+ init(_pool, _oid);
+ }
+ rgw_raw_obj(const rgw_pool& _pool, const std::string& _oid, const string& _loc) : loc(_loc) {
+ init(_pool, _oid);
+ }
+
+ void init(const rgw_pool& _pool, const std::string& _oid) {
+ pool = _pool;
+ oid = _oid;
+ }
+
+ bool empty() const {
+ return oid.empty();
+ }
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(6, 6, bl);
+ encode(pool, bl);
+ encode(oid, bl);
+ encode(loc, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode_from_rgw_obj(bufferlist::const_iterator& bl);
+
+ void decode(bufferlist::const_iterator& bl) {
+ unsigned ofs = bl.get_off();
+ DECODE_START(6, bl);
+ if (struct_v < 6) {
+ /*
+ * this object was encoded as rgw_obj, prior to rgw_raw_obj been split out of it,
+ * let's decode it as rgw_obj and convert it
+ */
+ bl.seek(ofs);
+ decode_from_rgw_obj(bl);
+ return;
+ }
+ decode(pool, bl);
+ decode(oid, bl);
+ decode(loc, bl);
+ DECODE_FINISH(bl);
+ }
+
+ bool operator<(const rgw_raw_obj& o) const {
+ int r = pool.compare(o.pool);
+ if (r == 0) {
+ r = oid.compare(o.oid);
+ if (r == 0) {
+ r = loc.compare(o.loc);
+ }
+ }
+ return (r < 0);
+ }
+
+ bool operator==(const rgw_raw_obj& o) const {
+ return (pool == o.pool && oid == o.oid && loc == o.loc);
+ }
+
+ void dump(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+};
+WRITE_CLASS_ENCODER(rgw_raw_obj)
+
+inline ostream& operator<<(ostream& out, const rgw_raw_obj& o) {
+ out << o.pool << ":" << o.oid;
+ return out;
+}
+
+struct rgw_bucket {
+ std::string tenant;
+ std::string name;
+ std::string marker;
+ std::string bucket_id;
+ rgw_data_placement_target explicit_placement;
+
+ std::string oid; /*
+ * runtime in-memory only info. If not empty, points to the bucket instance object
+ */
+
+ rgw_bucket() { }
+ // cppcheck-suppress noExplicitConstructor
+ explicit rgw_bucket(const rgw_user& u, const cls_user_bucket& b) :
+ tenant(u.tenant),
+ name(b.name),
+ marker(b.marker),
+ bucket_id(b.bucket_id),
+ explicit_placement(b.explicit_placement.data_pool,
+ b.explicit_placement.data_extra_pool,
+ b.explicit_placement.index_pool) {}
+ rgw_bucket(const rgw_bucket&) = default;
+ rgw_bucket(rgw_bucket&&) = default;
+
+ void convert(cls_user_bucket *b) const {
+ b->name = name;
+ b->marker = marker;
+ b->bucket_id = bucket_id;
+ b->explicit_placement.data_pool = explicit_placement.data_pool.to_str();
+ b->explicit_placement.data_extra_pool = explicit_placement.data_extra_pool.to_str();
+ b->explicit_placement.index_pool = explicit_placement.index_pool.to_str();
+ }
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(10, 10, bl);
+ encode(name, bl);
+ encode(marker, bl);
+ encode(bucket_id, bl);
+ encode(tenant, bl);
+ bool encode_explicit = !explicit_placement.data_pool.empty();
+ encode(encode_explicit, bl);
+ if (encode_explicit) {
+ encode(explicit_placement.data_pool, bl);
+ encode(explicit_placement.data_extra_pool, bl);
+ encode(explicit_placement.index_pool, bl);
+ }
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START_LEGACY_COMPAT_LEN(10, 3, 3, bl);
+ decode(name, bl);
+ if (struct_v < 10) {
+ decode(explicit_placement.data_pool.name, bl);
+ }
+ if (struct_v >= 2) {
+ decode(marker, bl);
+ if (struct_v <= 3) {
+ uint64_t id;
+ decode(id, bl);
+ char buf[16];
+ snprintf(buf, sizeof(buf), "%" PRIu64, id);
+ bucket_id = buf;
+ } else {
+ decode(bucket_id, bl);
+ }
+ }
+ if (struct_v < 10) {
+ if (struct_v >= 5) {
+ decode(explicit_placement.index_pool.name, bl);
+ } else {
+ explicit_placement.index_pool = explicit_placement.data_pool;
+ }
+ if (struct_v >= 7) {
+ decode(explicit_placement.data_extra_pool.name, bl);
+ }
+ }
+ if (struct_v >= 8) {
+ decode(tenant, bl);
+ }
+ if (struct_v >= 10) {
+ bool decode_explicit = !explicit_placement.data_pool.empty();
+ decode(decode_explicit, bl);
+ if (decode_explicit) {
+ decode(explicit_placement.data_pool, bl);
+ decode(explicit_placement.data_extra_pool, bl);
+ decode(explicit_placement.index_pool, bl);
+ }
+ }
+ DECODE_FINISH(bl);
+ }
+
+ void update_bucket_id(const string& new_bucket_id) {
+ bucket_id = new_bucket_id;
+ oid.clear();
+ }
+
+ // format a key for the bucket/instance. pass delim=0 to skip a field
+ std::string get_key(char tenant_delim = '/',
+ char id_delim = ':',
+ size_t reserve = 0) const;
+
+ const rgw_pool& get_data_extra_pool() const {
+ return explicit_placement.get_data_extra_pool();
+ }
+
+ void dump(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+ static void generate_test_instances(list<rgw_bucket*>& o);
+
+ rgw_bucket& operator=(const rgw_bucket&) = default;
+
+ bool operator<(const rgw_bucket& b) const {
+ if (tenant == b.tenant) {
+ return name < b.name;
+ } else {
+ return tenant < b.tenant;
+ }
+ }
+
+ bool operator==(const rgw_bucket& b) const {
+ return (tenant == b.tenant) && (name == b.name) && \
+ (bucket_id == b.bucket_id);
+ }
+};
+WRITE_CLASS_ENCODER(rgw_bucket)
+
+inline ostream& operator<<(ostream& out, const rgw_bucket &b) {
+ out << b.name << "[" << b.marker << "]";
+ return out;
+}
+
+struct rgw_bucket_shard {
+ rgw_bucket bucket;
+ int shard_id;
+
+ rgw_bucket_shard() : shard_id(-1) {}
+ rgw_bucket_shard(const rgw_bucket& _b, int _sid) : bucket(_b), shard_id(_sid) {}
+
+ std::string get_key(char tenant_delim = '/', char id_delim = ':',
+ char shard_delim = ':') const;
+
+ bool operator<(const rgw_bucket_shard& b) const {
+ if (bucket < b.bucket) {
+ return true;
+ }
+ if (b.bucket < bucket) {
+ return false;
+ }
+ return shard_id < b.shard_id;
+ }
+};
+
+
+struct RGWObjVersionTracker {
+ obj_version read_version;
+ obj_version write_version;
+
+ obj_version *version_for_read() {
+ return &read_version;
+ }
+
+ obj_version *version_for_write() {
+ if (write_version.ver == 0)
+ return NULL;
+
+ return &write_version;
+ }
+
+ obj_version *version_for_check() {
+ if (read_version.ver == 0)
+ return NULL;
+
+ return &read_version;
+ }
+
+ void prepare_op_for_read(librados::ObjectReadOperation *op);
+ void prepare_op_for_write(librados::ObjectWriteOperation *op);
+
+ void apply_write();
+
+ void clear() {
+ read_version = obj_version();
+ write_version = obj_version();
+ }
+
+ void generate_new_write_ver(CephContext *cct);
+};
+
+inline ostream& operator<<(ostream& out, const obj_version &v)
+{
+ out << v.tag << ":" << v.ver;
+ return out;
+}
+
+inline ostream& operator<<(ostream& out, const RGWObjVersionTracker &ot)
+{
+ out << "{r=" << ot.read_version << ",w=" << ot.write_version << "}";
+ return out;
+}
+
+enum RGWBucketFlags {
+ BUCKET_SUSPENDED = 0x1,
+ BUCKET_VERSIONED = 0x2,
+ BUCKET_VERSIONS_SUSPENDED = 0x4,
+ BUCKET_DATASYNC_DISABLED = 0X8,
+ BUCKET_MFA_ENABLED = 0X10,
+ BUCKET_OBJ_LOCK_ENABLED = 0X20,
+};
+
+enum RGWBucketIndexType {
+ RGWBIType_Normal = 0,
+ RGWBIType_Indexless = 1,
+};
+
+inline ostream& operator<<(ostream& out, const RGWBucketIndexType &index_type)
+{
+ switch (index_type) {
+ case RGWBIType_Normal:
+ return out << "Normal";
+ case RGWBIType_Indexless:
+ return out << "Indexless";
+ default:
+ return out << "Unknown";
+ }
+}
+
+struct RGWBucketInfo {
+ enum BIShardsHashType {
+ MOD = 0
+ };
+
+ rgw_bucket bucket;
+ rgw_user owner;
+ uint32_t flags;
+ string zonegroup;
+ ceph::real_time creation_time;
+ rgw_placement_rule placement_rule;
+ bool has_instance_obj;
+ RGWObjVersionTracker objv_tracker; /* we don't need to serialize this, for runtime tracking */
+ obj_version ep_objv; /* entry point object version, for runtime tracking only */
+ RGWQuotaInfo quota;
+
+ // Represents the number of bucket index object shards:
+ // - value of 0 indicates there is no sharding (this is by default before this
+ // feature is implemented).
+ // - value of UINT32_T::MAX indicates this is a blind bucket.
+ uint32_t num_shards;
+
+ // Represents the bucket index shard hash type.
+ uint8_t bucket_index_shard_hash_type;
+
+ // Represents the shard number for blind bucket.
+ const static uint32_t NUM_SHARDS_BLIND_BUCKET;
+
+ bool requester_pays;
+
+ bool has_website;
+ RGWBucketWebsiteConf website_conf;
+
+ RGWBucketIndexType index_type = RGWBIType_Normal;
+
+ bool swift_versioning;
+ string swift_ver_location;
+
+ map<string, uint32_t> mdsearch_config;
+
+
+
+ /* resharding */
+ uint8_t reshard_status;
+ string new_bucket_instance_id;
+
+ RGWObjectLock obj_lock;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(20, 4, bl);
+ encode(bucket, bl);
+ encode(owner.id, bl);
+ encode(flags, bl);
+ encode(zonegroup, bl);
+ uint64_t ct = real_clock::to_time_t(creation_time);
+ encode(ct, bl);
+ encode(placement_rule, bl);
+ encode(has_instance_obj, bl);
+ encode(quota, bl);
+ encode(num_shards, bl);
+ encode(bucket_index_shard_hash_type, bl);
+ encode(requester_pays, bl);
+ encode(owner.tenant, bl);
+ encode(has_website, bl);
+ if (has_website) {
+ encode(website_conf, bl);
+ }
+ encode((uint32_t)index_type, bl);
+ encode(swift_versioning, bl);
+ if (swift_versioning) {
+ encode(swift_ver_location, bl);
+ }
+ encode(creation_time, bl);
+ encode(mdsearch_config, bl);
+ encode(reshard_status, bl);
+ encode(new_bucket_instance_id, bl);
+ if (obj_lock_enabled()) {
+ encode(obj_lock, bl);
+ }
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START_LEGACY_COMPAT_LEN_32(20, 4, 4, bl);
+ decode(bucket, bl);
+ if (struct_v >= 2) {
+ string s;
+ decode(s, bl);
+ owner.from_str(s);
+ }
+ if (struct_v >= 3)
+ decode(flags, bl);
+ if (struct_v >= 5)
+ decode(zonegroup, bl);
+ if (struct_v >= 6) {
+ uint64_t ct;
+ decode(ct, bl);
+ if (struct_v < 17)
+ creation_time = ceph::real_clock::from_time_t((time_t)ct);
+ }
+ if (struct_v >= 7)
+ decode(placement_rule, bl);
+ if (struct_v >= 8)
+ decode(has_instance_obj, bl);
+ if (struct_v >= 9)
+ decode(quota, bl);
+ if (struct_v >= 10)
+ decode(num_shards, bl);
+ if (struct_v >= 11)
+ decode(bucket_index_shard_hash_type, bl);
+ if (struct_v >= 12)
+ decode(requester_pays, bl);
+ if (struct_v >= 13)
+ decode(owner.tenant, bl);
+ if (struct_v >= 14) {
+ decode(has_website, bl);
+ if (has_website) {
+ decode(website_conf, bl);
+ } else {
+ website_conf = RGWBucketWebsiteConf();
+ }
+ }
+ if (struct_v >= 15) {
+ uint32_t it;
+ decode(it, bl);
+ index_type = (RGWBucketIndexType)it;
+ } else {
+ index_type = RGWBIType_Normal;
+ }
+ swift_versioning = false;
+ swift_ver_location.clear();
+ if (struct_v >= 16) {
+ decode(swift_versioning, bl);
+ if (swift_versioning) {
+ decode(swift_ver_location, bl);
+ }
+ }
+ if (struct_v >= 17) {
+ decode(creation_time, bl);
+ }
+ if (struct_v >= 18) {
+ decode(mdsearch_config, bl);
+ }
+ if (struct_v >= 19) {
+ decode(reshard_status, bl);
+ decode(new_bucket_instance_id, bl);
+ }
+ if (struct_v >= 20 && obj_lock_enabled()) {
+ decode(obj_lock, bl);
+ }
+ DECODE_FINISH(bl);
+ }
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<RGWBucketInfo*>& o);
+
+ void decode_json(JSONObj *obj);
+
+ bool versioned() const { return (flags & BUCKET_VERSIONED) != 0; }
+ int versioning_status() const { return flags & (BUCKET_VERSIONED | BUCKET_VERSIONS_SUSPENDED | BUCKET_MFA_ENABLED); }
+ bool versioning_enabled() const { return (versioning_status() & (BUCKET_VERSIONED | BUCKET_VERSIONS_SUSPENDED)) == BUCKET_VERSIONED; }
+ bool mfa_enabled() const { return (versioning_status() & BUCKET_MFA_ENABLED) != 0; }
+ bool datasync_flag_enabled() const { return (flags & BUCKET_DATASYNC_DISABLED) == 0; }
+ bool obj_lock_enabled() const { return (flags & BUCKET_OBJ_LOCK_ENABLED) != 0; }
+
+ bool has_swift_versioning() const {
+ /* A bucket may be versioned through one mechanism only. */
+ return swift_versioning && !versioned();
+ }
+
+ RGWBucketInfo() : flags(0), has_instance_obj(false), num_shards(0), bucket_index_shard_hash_type(MOD), requester_pays(false),
+ has_website(false), swift_versioning(false), reshard_status(0) {}
+};
+WRITE_CLASS_ENCODER(RGWBucketInfo)
+
+struct RGWBucketEntryPoint
+{
+ rgw_bucket bucket;
+ rgw_user owner;
+ ceph::real_time creation_time;
+ bool linked;
+
+ bool has_bucket_info;
+ RGWBucketInfo old_bucket_info;
+
+ RGWBucketEntryPoint() : linked(false), has_bucket_info(false) {}
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(10, 8, bl);
+ encode(bucket, bl);
+ encode(owner.id, bl);
+ encode(linked, bl);
+ uint64_t ctime = (uint64_t)real_clock::to_time_t(creation_time);
+ encode(ctime, bl);
+ encode(owner, bl);
+ encode(creation_time, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::const_iterator& bl) {
+ auto orig_iter = bl;
+ DECODE_START_LEGACY_COMPAT_LEN_32(10, 4, 4, bl);
+ if (struct_v < 8) {
+ /* ouch, old entry, contains the bucket info itself */
+ old_bucket_info.decode(orig_iter);
+ has_bucket_info = true;
+ return;
+ }
+ has_bucket_info = false;
+ decode(bucket, bl);
+ decode(owner.id, bl);
+ decode(linked, bl);
+ uint64_t ctime;
+ decode(ctime, bl);
+ if (struct_v < 10) {
+ creation_time = real_clock::from_time_t((time_t)ctime);
+ }
+ if (struct_v >= 9) {
+ decode(owner, bl);
+ }
+ if (struct_v >= 10) {
+ decode(creation_time, bl);
+ }
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+};
+WRITE_CLASS_ENCODER(RGWBucketEntryPoint)
+
+struct RGWStorageStats
+{
+ RGWObjCategory category;
+ uint64_t size;
+ uint64_t size_rounded;
+ uint64_t size_utilized{0}; //< size after compression, encryption
+ uint64_t num_objects;
+
+ RGWStorageStats()
+ : category(RGWObjCategory::None),
+ size(0),
+ size_rounded(0),
+ num_objects(0) {}
+
+ void dump(Formatter *f) const;
+};
+
+class RGWEnv;
+
+/* Namespaced forward declarations. */
+namespace rgw {
+ namespace auth {
+ namespace s3 {
+ class AWSBrowserUploadAbstractor;
+ }
+ class Completer;
+ }
+ namespace io {
+ class BasicClient;
+ }
+}
+
+using meta_map_t = boost::container::flat_map <std::string, std::string>;
+
+struct req_info {
+ const RGWEnv *env;
+ RGWHTTPArgs args;
+ meta_map_t x_meta_map;
+
+ string host;
+ const char *method;
+ string script_uri;
+ string request_uri;
+ string request_uri_aws4;
+ string effective_uri;
+ string request_params;
+ string domain;
+ string storage_class;
+
+ req_info(CephContext *cct, const RGWEnv *env);
+ void rebuild_from(req_info& src);
+ void init_meta_info(bool *found_bad_meta);
+};
+
+typedef cls_rgw_obj_key rgw_obj_index_key;
+
+struct rgw_obj_key {
+ string name;
+ string instance;
+ string ns;
+
+ rgw_obj_key() {}
+ // cppcheck-suppress noExplicitConstructor
+ rgw_obj_key(const string& n) : name(n) {}
+ rgw_obj_key(const string& n, const string& i) : name(n), instance(i) {}
+ rgw_obj_key(const string& n, const string& i, const string& _ns) : name(n), instance(i), ns(_ns) {}
+
+ rgw_obj_key(const rgw_obj_index_key& k) {
+ parse_index_key(k.name, &name, &ns);
+ instance = k.instance;
+ }
+
+// Since bucket index entries are stored in sequence, and the elements
+// with namespaces can be between those without, we need a way to skip
+// past namespaced elements; this returns a marker that will do so.
+//
+// Consider the following sequence: ASP, _BAT_cat, __DOG, _eel_FOX,
+// goat; the 2nd and 4th entries are namespaced, but the 3rd is not,
+// it's just an entry that begins with an underscore, which will be
+// quoted with another underscore putting it between two potential
+// namespaced blocks
+ static const rgw_obj_index_key& after_namespace_marker(const std::string& after) {
+ // this is just before "__", so will allow finding non-namespaced
+ // entries that begin with an underscore (and therefore are entered
+ // as starting with "__".
+ static const rgw_obj_index_key result1(std::string("_^") + char(255));
+
+ // this is just before entries that do not begin with an
+ // underscore and will allow skipping past the second namespace
+ // block
+ static const rgw_obj_index_key result2(std::string("_") + char(255));
+
+ if (after < result1.name) {
+ return result1;
+ } else {
+ return result2;
+ }
+ }
+
+ static void parse_index_key(const string& key, string *name, string *ns) {
+ if (key[0] != '_') {
+ *name = key;
+ ns->clear();
+ return;
+ }
+ if (key[1] == '_') {
+ *name = key.substr(1);
+ ns->clear();
+ return;
+ }
+ ssize_t pos = key.find('_', 1);
+ if (pos < 0) {
+ /* shouldn't happen, just use key */
+ *name = key;
+ ns->clear();
+ return;
+ }
+
+ *name = key.substr(pos + 1);
+ *ns = key.substr(1, pos -1);
+ }
+
+ void set(const string& n) {
+ name = n;
+ instance.clear();
+ ns.clear();
+ }
+
+ void set(const string& n, const string& i) {
+ name = n;
+ instance = i;
+ ns.clear();
+ }
+
+ void set(const string& n, const string& i, const string& _ns) {
+ name = n;
+ instance = i;
+ ns = _ns;
+ }
+
+ bool set(const rgw_obj_index_key& index_key) {
+ if (!parse_raw_oid(index_key.name, this)) {
+ return false;
+ }
+ instance = index_key.instance;
+ return true;
+ }
+
+ void set_instance(const string& i) {
+ instance = i;
+ }
+
+ const string& get_instance() const {
+ return instance;
+ }
+
+ void set_ns(const std::string& _ns) {
+ ns = _ns;
+ }
+
+ const std::string& get_ns() const {
+ return ns;
+ }
+
+ string get_index_key_name() const {
+ if (ns.empty()) {
+ if (name.size() < 1 || name[0] != '_') {
+ return name;
+ }
+ return string("_") + name;
+ };
+
+ char buf[ns.size() + 16];
+ snprintf(buf, sizeof(buf), "_%s_", ns.c_str());
+ return string(buf) + name;
+ };
+
+ void get_index_key(rgw_obj_index_key *key) const {
+ key->name = get_index_key_name();
+ key->instance = instance;
+ }
+
+ string get_loc() const {
+ /*
+ * For backward compatibility. Older versions used to have object locator on all objects,
+ * however, the name was the effective object locator. This had the same effect as not
+ * having object locator at all for most objects but the ones that started with underscore as
+ * these were escaped.
+ */
+ if (name[0] == '_' && ns.empty()) {
+ return name;
+ }
+
+ return string();
+ }
+
+ bool empty() const {
+ return name.empty();
+ }
+
+ bool have_null_instance() const {
+ return instance == "null";
+ }
+
+ bool have_instance() const {
+ return !instance.empty();
+ }
+
+ bool need_to_encode_instance() const {
+ return have_instance() && !have_null_instance();
+ }
+
+ string get_oid() const {
+ if (ns.empty() && !need_to_encode_instance()) {
+ if (name.size() < 1 || name[0] != '_') {
+ return name;
+ }
+ return string("_") + name;
+ }
+
+ string oid = "_";
+ oid.append(ns);
+ if (need_to_encode_instance()) {
+ oid.append(string(":") + instance);
+ }
+ oid.append("_");
+ oid.append(name);
+ return oid;
+ }
+
+ bool operator==(const rgw_obj_key& k) const {
+ return (name.compare(k.name) == 0) &&
+ (instance.compare(k.instance) == 0);
+ }
+
+ bool operator<(const rgw_obj_key& k) const {
+ int r = name.compare(k.name);
+ if (r == 0) {
+ r = instance.compare(k.instance);
+ }
+ return (r < 0);
+ }
+
+ bool operator<=(const rgw_obj_key& k) const {
+ return !(k < *this);
+ }
+
+ static void parse_ns_field(string& ns, string& instance) {
+ int pos = ns.find(':');
+ if (pos >= 0) {
+ instance = ns.substr(pos + 1);
+ ns = ns.substr(0, pos);
+ } else {
+ instance.clear();
+ }
+ }
+
+ // takes an oid and parses out the namespace (ns), name, and
+ // instance
+ static bool parse_raw_oid(const string& oid, rgw_obj_key *key) {
+ key->instance.clear();
+ key->ns.clear();
+ if (oid[0] != '_') {
+ key->name = oid;
+ return true;
+ }
+
+ if (oid.size() >= 2 && oid[1] == '_') {
+ key->name = oid.substr(1);
+ return true;
+ }
+
+ if (oid.size() < 3) // for namespace, min size would be 3: _x_
+ return false;
+
+ size_t pos = oid.find('_', 2); // oid must match ^_[^_].+$
+ if (pos == string::npos)
+ return false;
+
+ key->ns = oid.substr(1, pos - 1);
+ parse_ns_field(key->ns, key->instance);
+
+ key->name = oid.substr(pos + 1);
+ return true;
+ }
+
+ /**
+ * Translate a namespace-mangled object name to the user-facing name
+ * existing in the given namespace.
+ *
+ * If the object is part of the given namespace, it returns true
+ * and cuts down the name to the unmangled version. If it is not
+ * part of the given namespace, it returns false.
+ */
+ static bool oid_to_key_in_ns(const string& oid, rgw_obj_key *key, const string& ns) {
+ bool ret = parse_raw_oid(oid, key);
+ if (!ret) {
+ return ret;
+ }
+
+ return (ns == key->ns);
+ }
+
+ /**
+ * Given a mangled object name and an empty namespace string, this
+ * function extracts the namespace into the string and sets the object
+ * name to be the unmangled version.
+ *
+ * It returns true after successfully doing so, or
+ * false if it fails.
+ */
+ static bool strip_namespace_from_name(string& name, string& ns, string& instance) {
+ ns.clear();
+ instance.clear();
+ if (name[0] != '_') {
+ return true;
+ }
+
+ size_t pos = name.find('_', 1);
+ if (pos == string::npos) {
+ return false;
+ }
+
+ if (name[1] == '_') {
+ name = name.substr(1);
+ return true;
+ }
+
+ size_t period_pos = name.find('.');
+ if (period_pos < pos) {
+ return false;
+ }
+
+ ns = name.substr(1, pos-1);
+ name = name.substr(pos+1, string::npos);
+
+ parse_ns_field(ns, instance);
+ return true;
+ }
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(2, 1, bl);
+ encode(name, bl);
+ encode(instance, bl);
+ encode(ns, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(2, bl);
+ decode(name, bl);
+ decode(instance, bl);
+ if (struct_v >= 2) {
+ decode(ns, bl);
+ }
+ DECODE_FINISH(bl);
+ }
+ void dump(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+
+ string to_str() const {
+ if (instance.empty()) {
+ return name;
+ }
+ char buf[name.size() + instance.size() + 16];
+ snprintf(buf, sizeof(buf), "%s[%s]", name.c_str(), instance.c_str());
+ return buf;
+ }
+};
+WRITE_CLASS_ENCODER(rgw_obj_key)
+
+inline ostream& operator<<(ostream& out, const rgw_obj_key &o) {
+ return out << o.to_str();
+}
+
+inline ostream& operator<<(ostream& out, const rgw_obj_index_key &o) {
+ if (o.instance.empty()) {
+ return out << o.name;
+ } else {
+ return out << o.name << "[" << o.instance << "]";
+ }
+}
+
+struct req_init_state {
+ /* Keeps [[tenant]:]bucket until we parse the token. */
+ string url_bucket;
+ string src_bucket;
+};
+
+#include "rgw_auth.h"
+
+class RGWObjectCtx;
+class RGWSysObjectCtx;
+
+/** Store all the state necessary to complete and respond to an HTTP request*/
+struct req_state : DoutPrefixProvider {
+ CephContext *cct;
+ rgw::io::BasicClient *cio{nullptr};
+ http_op op{OP_UNKNOWN};
+ RGWOpType op_type{};
+ bool content_started{false};
+ int format{0};
+ ceph::Formatter *formatter{nullptr};
+ string decoded_uri;
+ string relative_uri;
+ const char *length{nullptr};
+ int64_t content_length{0};
+ map<string, string> generic_attrs;
+ rgw_err err;
+ bool expect_cont{false};
+ uint64_t obj_size{0};
+ bool enable_ops_log;
+ bool enable_usage_log;
+ uint8_t defer_to_bucket_acls;
+ uint32_t perm_mask{0};
+
+ /* Set once when url_bucket is parsed and not violated thereafter. */
+ string account_name;
+
+ string bucket_tenant;
+ string bucket_name;
+
+ rgw_bucket bucket;
+ rgw_obj_key object;
+ string src_tenant_name;
+ string src_bucket_name;
+ rgw_obj_key src_object;
+ ACLOwner bucket_owner;
+ ACLOwner owner;
+
+ string zonegroup_name;
+ string zonegroup_endpoint;
+ string bucket_instance_id;
+ int bucket_instance_shard_id{-1};
+ string redirect_zone_endpoint;
+
+ string redirect;
+
+ RGWBucketInfo bucket_info;
+ real_time bucket_mtime;
+ std::map<std::string, ceph::bufferlist> bucket_attrs;
+ bool bucket_exists{false};
+ rgw_placement_rule dest_placement;
+
+ bool has_bad_meta{false};
+
+ RGWUserInfo *user;
+
+ struct {
+ /* TODO(rzarzynski): switch out to the static_ptr for both members. */
+
+ /* Object having the knowledge about an authenticated identity and allowing
+ * to apply it during the authorization phase (verify_permission() methods
+ * of a given RGWOp). Thus, it bounds authentication and authorization steps
+ * through a well-defined interface. For more details, see rgw_auth.h. */
+ std::unique_ptr<rgw::auth::Identity> identity;
+
+ std::shared_ptr<rgw::auth::Completer> completer;
+
+ /* A container for credentials of the S3's browser upload. It's necessary
+ * because: 1) the ::authenticate() method of auth engines and strategies
+ * take req_state only; 2) auth strategies live much longer than RGWOps -
+ * there is no way to pass additional data dependencies through ctors. */
+ class {
+ /* Writer. */
+ friend class RGWPostObj_ObjStore_S3;
+ /* Reader. */
+ friend class rgw::auth::s3::AWSBrowserUploadAbstractor;
+
+ std::string access_key;
+ std::string signature;
+ std::string x_amz_algorithm;
+ std::string x_amz_credential;
+ std::string x_amz_date;
+ std::string x_amz_security_token;
+ ceph::bufferlist encoded_policy;
+ } s3_postobj_creds;
+ } auth;
+
+ std::unique_ptr<RGWAccessControlPolicy> user_acl;
+ std::unique_ptr<RGWAccessControlPolicy> bucket_acl;
+ std::unique_ptr<RGWAccessControlPolicy> object_acl;
+
+ rgw::IAM::Environment env;
+ boost::optional<rgw::IAM::Policy> iam_policy;
+ vector<rgw::IAM::Policy> iam_user_policies;
+
+ /* Is the request made by an user marked as a system one?
+ * Being system user means we also have the admin status. */
+ bool system_request{false};
+
+ string canned_acl;
+ bool has_acl_header{false};
+ bool local_source{false}; /* source is local */
+
+ int prot_flags{0};
+
+ /* Content-Disposition override for TempURL of Swift API. */
+ struct {
+ string override;
+ string fallback;
+ } content_disp;
+
+ string host_id;
+
+ req_info info;
+ req_init_state init_state;
+
+ using Clock = ceph::coarse_real_clock;
+ Clock::time_point time;
+
+ Clock::duration time_elapsed() const { return Clock::now() - time; }
+
+ RGWObjectCtx *obj_ctx{nullptr};
+ RGWSysObjectCtx *sysobj_ctx{nullptr};
+ string dialect;
+ string req_id;
+ string trans_id;
+ uint64_t id;
+
+ RGWObjTags tagset;
+
+ bool mfa_verified{false};
+
+ /// optional coroutine context
+ optional_yield yield{null_yield};
+
+ req_state(CephContext* _cct, RGWEnv* e, RGWUserInfo* u, uint64_t id);
+ ~req_state();
+
+ bool is_err() const { return err.is_err(); }
+
+ // implements DoutPrefixProvider
+ std::ostream& gen_prefix(std::ostream& out) const override;
+ CephContext* get_cct() const override { return cct; }
+ unsigned get_subsys() const override { return ceph_subsys_rgw; }
+};
+
+void set_req_state_err(struct req_state*, int);
+void set_req_state_err(struct req_state*, int, const string&);
+void set_req_state_err(struct rgw_err&, int, const int);
+void dump(struct req_state*);
+
+/** Store basic data on bucket */
+struct RGWBucketEnt {
+ rgw_bucket bucket;
+ size_t size;
+ size_t size_rounded;
+ ceph::real_time creation_time;
+ uint64_t count;
+
+ /* The placement_rule is necessary to calculate per-storage-policy statics
+ * of the Swift API. Although the info available in RGWBucketInfo, we need
+ * to duplicate it here to not affect the performance of buckets listing. */
+ rgw_placement_rule placement_rule;
+
+ RGWBucketEnt()
+ : size(0),
+ size_rounded(0),
+ count(0) {
+ }
+ RGWBucketEnt(const RGWBucketEnt&) = default;
+ RGWBucketEnt(RGWBucketEnt&&) = default;
+ explicit RGWBucketEnt(const rgw_user& u, cls_user_bucket_entry&& e)
+ : bucket(u, std::move(e.bucket)),
+ size(e.size),
+ size_rounded(e.size_rounded),
+ creation_time(e.creation_time),
+ count(e.count) {
+ }
+
+ RGWBucketEnt& operator=(const RGWBucketEnt&) = default;
+
+ void convert(cls_user_bucket_entry *b) const {
+ bucket.convert(&b->bucket);
+ b->size = size;
+ b->size_rounded = size_rounded;
+ b->creation_time = creation_time;
+ b->count = count;
+ }
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(7, 5, bl);
+ uint64_t s = size;
+ __u32 mt = ceph::real_clock::to_time_t(creation_time);
+ string empty_str; // originally had the bucket name here, but we encode bucket later
+ encode(empty_str, bl);
+ encode(s, bl);
+ encode(mt, bl);
+ encode(count, bl);
+ encode(bucket, bl);
+ s = size_rounded;
+ encode(s, bl);
+ encode(creation_time, bl);
+ encode(placement_rule, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START_LEGACY_COMPAT_LEN(7, 5, 5, bl);
+ __u32 mt;
+ uint64_t s;
+ string empty_str; // backward compatibility
+ decode(empty_str, bl);
+ decode(s, bl);
+ decode(mt, bl);
+ size = s;
+ if (struct_v < 6) {
+ creation_time = ceph::real_clock::from_time_t(mt);
+ }
+ if (struct_v >= 2)
+ decode(count, bl);
+ if (struct_v >= 3)
+ decode(bucket, bl);
+ if (struct_v >= 4)
+ decode(s, bl);
+ size_rounded = s;
+ if (struct_v >= 6)
+ decode(creation_time, bl);
+ if (struct_v >= 7)
+ decode(placement_rule, bl);
+ DECODE_FINISH(bl);
+ }
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<RGWBucketEnt*>& o);
+};
+WRITE_CLASS_ENCODER(RGWBucketEnt)
+
+struct rgw_obj {
+ rgw_bucket bucket;
+ rgw_obj_key key;
+
+ bool in_extra_data{false}; /* in-memory only member, does not serialize */
+
+ // Represents the hash index source for this object once it is set (non-empty)
+ std::string index_hash_source;
+
+ rgw_obj() {}
+ rgw_obj(const rgw_bucket& b, const std::string& name) : bucket(b), key(name) {}
+ rgw_obj(const rgw_bucket& b, const rgw_obj_key& k) : bucket(b), key(k) {}
+ rgw_obj(const rgw_bucket& b, const rgw_obj_index_key& k) : bucket(b), key(k) {}
+
+ void init(const rgw_bucket& b, const std::string& name) {
+ bucket = b;
+ key.set(name);
+ }
+ void init(const rgw_bucket& b, const std::string& name, const string& i, const string& n) {
+ bucket = b;
+ key.set(name, i, n);
+ }
+ void init_ns(const rgw_bucket& b, const std::string& name, const string& n) {
+ bucket = b;
+ key.name = name;
+ key.instance.clear();
+ key.ns = n;
+ }
+
+ bool empty() const {
+ return key.empty();
+ }
+
+ void set_key(const rgw_obj_key& k) {
+ key = k;
+ }
+
+ string get_oid() const {
+ return key.get_oid();
+ }
+
+ const string& get_hash_object() const {
+ return index_hash_source.empty() ? key.name : index_hash_source;
+ }
+
+ void set_in_extra_data(bool val) {
+ in_extra_data = val;
+ }
+
+ bool is_in_extra_data() const {
+ return in_extra_data;
+ }
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(6, 6, bl);
+ encode(bucket, bl);
+ encode(key.ns, bl);
+ encode(key.name, bl);
+ encode(key.instance, bl);
+// encode(placement_id, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START_LEGACY_COMPAT_LEN(6, 3, 3, bl);
+ if (struct_v < 6) {
+ string s;
+ decode(bucket.name, bl); /* bucket.name */
+ decode(s, bl); /* loc */
+ decode(key.ns, bl);
+ decode(key.name, bl);
+ if (struct_v >= 2)
+ decode(bucket, bl);
+ if (struct_v >= 4)
+ decode(key.instance, bl);
+ if (key.ns.empty() && key.instance.empty()) {
+ if (key.name[0] == '_') {
+ key.name = key.name.substr(1);
+ }
+ } else {
+ if (struct_v >= 5) {
+ decode(key.name, bl);
+ } else {
+ ssize_t pos = key.name.find('_', 1);
+ if (pos < 0) {
+ throw buffer::error();
+ }
+ key.name = key.name.substr(pos + 1);
+ }
+ }
+ } else {
+ decode(bucket, bl);
+ decode(key.ns, bl);
+ decode(key.name, bl);
+ decode(key.instance, bl);
+// decode(placement_id, bl);
+ }
+ DECODE_FINISH(bl);
+ }
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<rgw_obj*>& o);
+
+ bool operator==(const rgw_obj& o) const {
+ return (key == o.key) &&
+ (bucket == o.bucket);
+ }
+ bool operator<(const rgw_obj& o) const {
+ int r = key.name.compare(o.key.name);
+ if (r == 0) {
+ r = bucket.bucket_id.compare(o.bucket.bucket_id); /* not comparing bucket.name, if bucket_id is equal so will be bucket.name */
+ if (r == 0) {
+ r = key.ns.compare(o.key.ns);
+ if (r == 0) {
+ r = key.instance.compare(o.key.instance);
+ }
+ }
+ }
+
+ return (r < 0);
+ }
+
+ const rgw_pool& get_explicit_data_pool() {
+ if (!in_extra_data || bucket.explicit_placement.data_extra_pool.empty()) {
+ return bucket.explicit_placement.data_pool;
+ }
+ return bucket.explicit_placement.data_extra_pool;
+ }
+};
+WRITE_CLASS_ENCODER(rgw_obj)
+
+struct rgw_cache_entry_info {
+ string cache_locator;
+ uint64_t gen;
+
+ rgw_cache_entry_info() : gen(0) {}
+};
+
+inline ostream& operator<<(ostream& out, const rgw_obj &o) {
+ return out << o.bucket.name << ":" << o.get_oid();
+}
+
+static inline void buf_to_hex(const unsigned char* const buf,
+ const size_t len,
+ char* const str)
+{
+ str[0] = '\0';
+ for (size_t i = 0; i < len; i++) {
+ ::sprintf(&str[i*2], "%02x", static_cast<int>(buf[i]));
+ }
+}
+
+template<size_t N> static inline std::array<char, N * 2 + 1>
+buf_to_hex(const std::array<unsigned char, N>& buf)
+{
+ static_assert(N > 0, "The input array must be at least one element long");
+
+ std::array<char, N * 2 + 1> hex_dest;
+ buf_to_hex(buf.data(), N, hex_dest.data());
+ return hex_dest;
+}
+
+static inline int hexdigit(char c)
+{
+ if (c >= '0' && c <= '9')
+ return (c - '0');
+ c = toupper(c);
+ if (c >= 'A' && c <= 'F')
+ return c - 'A' + 0xa;
+ return -EINVAL;
+}
+
+static inline int hex_to_buf(const char *hex, char *buf, int len)
+{
+ int i = 0;
+ const char *p = hex;
+ while (*p) {
+ if (i >= len)
+ return -EINVAL;
+ buf[i] = 0;
+ int d = hexdigit(*p);
+ if (d < 0)
+ return d;
+ buf[i] = d << 4;
+ p++;
+ if (!*p)
+ return -EINVAL;
+ d = hexdigit(*p);
+ if (d < 0)
+ return d;
+ buf[i] += d;
+ i++;
+ p++;
+ }
+ return i;
+}
+
+static inline int rgw_str_to_bool(const char *s, int def_val)
+{
+ if (!s)
+ return def_val;
+
+ return (strcasecmp(s, "true") == 0 ||
+ strcasecmp(s, "on") == 0 ||
+ strcasecmp(s, "yes") == 0 ||
+ strcasecmp(s, "1") == 0);
+}
+
+static inline void append_rand_alpha(CephContext *cct, const string& src, string& dest, int len)
+{
+ dest = src;
+ char buf[len + 1];
+ gen_rand_alphanumeric(cct, buf, len);
+ dest.append("_");
+ dest.append(buf);
+}
+
+static inline const char *rgw_obj_category_name(RGWObjCategory category)
+{
+ switch (category) {
+ case RGWObjCategory::None:
+ return "rgw.none";
+ case RGWObjCategory::Main:
+ return "rgw.main";
+ case RGWObjCategory::Shadow:
+ return "rgw.shadow";
+ case RGWObjCategory::MultiMeta:
+ return "rgw.multimeta";
+ }
+
+ return "unknown";
+}
+
+static inline uint64_t rgw_rounded_kb(uint64_t bytes)
+{
+ return (bytes + 1023) / 1024;
+}
+
+static inline uint64_t rgw_rounded_objsize(uint64_t bytes)
+{
+ return ((bytes + 4095) & ~4095);
+}
+
+static inline uint64_t rgw_rounded_objsize_kb(uint64_t bytes)
+{
+ return ((bytes + 4095) & ~4095) / 1024;
+}
+
+/* implement combining step, S3 header canonicalization; k is a
+ * valid header and in lc form */
+static inline void add_amz_meta_header(
+ meta_map_t& x_meta_map,
+ const std::string& k,
+ const std::string& v)
+{
+ auto it = x_meta_map.find(k);
+ if (it != x_meta_map.end()) {
+ std::string old = it->second;
+ boost::algorithm::trim_right(old);
+ old.append(",");
+ old.append(v);
+ x_meta_map[k] = old;
+ } else {
+ x_meta_map[k] = v;
+ }
+} /* add_amz_meta_header */
+
+extern string rgw_string_unquote(const string& s);
+extern void parse_csv_string(const string& ival, vector<string>& ovals);
+extern int parse_key_value(string& in_str, string& key, string& val);
+extern int parse_key_value(string& in_str, const char *delim, string& key, string& val);
+
+extern boost::optional<std::pair<boost::string_view, boost::string_view>>
+parse_key_value(const boost::string_view& in_str,
+ const boost::string_view& delim);
+extern boost::optional<std::pair<boost::string_view, boost::string_view>>
+parse_key_value(const boost::string_view& in_str);
+
+
+/** time parsing */
+extern int parse_time(const char *time_str, real_time *time);
+extern bool parse_rfc2616(const char *s, struct tm *t);
+extern bool parse_iso8601(const char *s, struct tm *t, uint32_t *pns = NULL, bool extended_format = true);
+extern string rgw_trim_whitespace(const string& src);
+extern boost::string_view rgw_trim_whitespace(const boost::string_view& src);
+extern string rgw_trim_quotes(const string& val);
+
+extern void rgw_to_iso8601(const real_time& t, char *dest, int buf_size);
+extern void rgw_to_iso8601(const real_time& t, string *dest);
+extern std::string rgw_to_asctime(const utime_t& t);
+
+/** Check if the req_state's user has the necessary permissions
+ * to do the requested action */
+rgw::IAM::Effect eval_user_policies(const vector<rgw::IAM::Policy>& user_policies,
+ const rgw::IAM::Environment& env,
+ boost::optional<const rgw::auth::Identity&> id,
+ const uint64_t op,
+ const rgw::ARN& arn);
+bool verify_user_permission(const DoutPrefixProvider* dpp,
+ struct req_state * const s,
+ RGWAccessControlPolicy * const user_acl,
+ const vector<rgw::IAM::Policy>& user_policies,
+ const rgw::ARN& res,
+ const uint64_t op);
+bool verify_user_permission_no_policy(const DoutPrefixProvider* dpp,
+ struct req_state * const s,
+ RGWAccessControlPolicy * const user_acl,
+ const int perm);
+bool verify_user_permission(const DoutPrefixProvider* dpp,
+ struct req_state * const s,
+ const rgw::ARN& res,
+ const uint64_t op);
+bool verify_user_permission_no_policy(const DoutPrefixProvider* dpp,
+ struct req_state * const s,
+ int perm);
+bool verify_bucket_permission(
+ const DoutPrefixProvider* dpp,
+ struct req_state * const s,
+ const rgw_bucket& bucket,
+ RGWAccessControlPolicy * const user_acl,
+ RGWAccessControlPolicy * const bucket_acl,
+ const boost::optional<rgw::IAM::Policy>& bucket_policy,
+ const vector<rgw::IAM::Policy>& user_policies,
+ const uint64_t op);
+bool verify_bucket_permission(const DoutPrefixProvider* dpp, struct req_state * const s, const uint64_t op);
+bool verify_bucket_permission_no_policy(
+ const DoutPrefixProvider* dpp,
+ struct req_state * const s,
+ RGWAccessControlPolicy * const user_acl,
+ RGWAccessControlPolicy * const bucket_acl,
+ const int perm);
+bool verify_bucket_permission_no_policy(const DoutPrefixProvider* dpp,
+ struct req_state * const s,
+ const int perm);
+int verify_bucket_owner_or_policy(struct req_state* const s,
+ const uint64_t op);
+extern bool verify_object_permission(
+ const DoutPrefixProvider* dpp,
+ struct req_state * const s,
+ const rgw_obj& obj,
+ RGWAccessControlPolicy * const user_acl,
+ RGWAccessControlPolicy * const bucket_acl,
+ RGWAccessControlPolicy * const object_acl,
+ const boost::optional<rgw::IAM::Policy>& bucket_policy,
+ const vector<rgw::IAM::Policy>& user_policies,
+ const uint64_t op);
+extern bool verify_object_permission(const DoutPrefixProvider* dpp, struct req_state *s, uint64_t op);
+extern bool verify_object_permission_no_policy(
+ const DoutPrefixProvider* dpp,
+ struct req_state * const s,
+ RGWAccessControlPolicy * const user_acl,
+ RGWAccessControlPolicy * const bucket_acl,
+ RGWAccessControlPolicy * const object_acl,
+ int perm);
+extern bool verify_object_permission_no_policy(const DoutPrefixProvider* dpp, struct req_state *s,
+ int perm);
+/** Convert an input URL into a sane object name
+ * by converting %-escaped strings into characters, etc*/
+extern void rgw_uri_escape_char(char c, string& dst);
+extern std::string url_decode(const boost::string_view& src_str,
+ bool in_query = false);
+extern void url_encode(const std::string& src, string& dst,
+ bool encode_slash = true);
+extern std::string url_encode(const std::string& src, bool encode_slash = true);
+/* destination should be CEPH_CRYPTO_HMACSHA1_DIGESTSIZE bytes long */
+extern void calc_hmac_sha1(const char *key, int key_len,
+ const char *msg, int msg_len, char *dest);
+
+static inline sha1_digest_t
+calc_hmac_sha1(const boost::string_view& key, const boost::string_view& msg) {
+ sha1_digest_t dest;
+ calc_hmac_sha1(key.data(), key.size(), msg.data(), msg.size(),
+ reinterpret_cast<char*>(dest.v));
+ return dest;
+}
+
+/* destination should be CEPH_CRYPTO_HMACSHA256_DIGESTSIZE bytes long */
+extern void calc_hmac_sha256(const char *key, int key_len,
+ const char *msg, int msg_len,
+ char *dest);
+
+static inline sha256_digest_t
+calc_hmac_sha256(const char *key, const int key_len,
+ const char *msg, const int msg_len) {
+ sha256_digest_t dest;
+ calc_hmac_sha256(key, key_len, msg, msg_len,
+ reinterpret_cast<char*>(dest.v));
+ return dest;
+}
+
+static inline sha256_digest_t
+calc_hmac_sha256(const boost::string_view& key, const boost::string_view& msg) {
+ sha256_digest_t dest;
+ calc_hmac_sha256(key.data(), key.size(),
+ msg.data(), msg.size(),
+ reinterpret_cast<char*>(dest.v));
+ return dest;
+}
+
+static inline sha256_digest_t
+calc_hmac_sha256(const sha256_digest_t &key,
+ const boost::string_view& msg) {
+ sha256_digest_t dest;
+ calc_hmac_sha256(reinterpret_cast<const char*>(key.v), sha256_digest_t::SIZE,
+ msg.data(), msg.size(),
+ reinterpret_cast<char*>(dest.v));
+ return dest;
+}
+
+static inline sha256_digest_t
+calc_hmac_sha256(const std::vector<unsigned char>& key,
+ const boost::string_view& msg) {
+ sha256_digest_t dest;
+ calc_hmac_sha256(reinterpret_cast<const char*>(key.data()), key.size(),
+ msg.data(), msg.size(),
+ reinterpret_cast<char*>(dest.v));
+ return dest;
+}
+
+template<size_t KeyLenN>
+static inline sha256_digest_t
+calc_hmac_sha256(const std::array<unsigned char, KeyLenN>& key,
+ const boost::string_view& msg) {
+ sha256_digest_t dest;
+ calc_hmac_sha256(reinterpret_cast<const char*>(key.data()), key.size(),
+ msg.data(), msg.size(),
+ reinterpret_cast<char*>(dest.v));
+ return dest;
+}
+
+extern sha256_digest_t calc_hash_sha256(const boost::string_view& msg);
+
+extern ceph::crypto::SHA256* calc_hash_sha256_open_stream();
+extern void calc_hash_sha256_update_stream(ceph::crypto::SHA256* hash,
+ const char* msg,
+ int len);
+extern std::string calc_hash_sha256_close_stream(ceph::crypto::SHA256** phash);
+extern std::string calc_hash_sha256_restart_stream(ceph::crypto::SHA256** phash);
+
+extern int rgw_parse_op_type_list(const string& str, uint32_t *perm);
+
+static constexpr uint32_t MATCH_POLICY_ACTION = 0x01;
+static constexpr uint32_t MATCH_POLICY_RESOURCE = 0x02;
+static constexpr uint32_t MATCH_POLICY_ARN = 0x04;
+static constexpr uint32_t MATCH_POLICY_STRING = 0x08;
+
+extern bool match_policy(boost::string_view pattern, boost::string_view input,
+ uint32_t flag);
+
+extern string camelcase_dash_http_attr(const string& orig);
+extern string lowercase_dash_http_attr(const string& orig);
+
+void rgw_setup_saved_curl_handles();
+void rgw_release_all_curl_handles();
+
+static inline void rgw_escape_str(const string& s, char esc_char,
+ char special_char, string *dest)
+{
+ const char *src = s.c_str();
+ char dest_buf[s.size() * 2 + 1];
+ char *destp = dest_buf;
+
+ for (size_t i = 0; i < s.size(); i++) {
+ char c = src[i];
+ if (c == esc_char || c == special_char) {
+ *destp++ = esc_char;
+ }
+ *destp++ = c;
+ }
+ *destp++ = '\0';
+ *dest = dest_buf;
+}
+
+static inline ssize_t rgw_unescape_str(const string& s, ssize_t ofs,
+ char esc_char, char special_char,
+ string *dest)
+{
+ const char *src = s.c_str();
+ char dest_buf[s.size() + 1];
+ char *destp = dest_buf;
+ bool esc = false;
+
+ dest_buf[0] = '\0';
+
+ for (size_t i = ofs; i < s.size(); i++) {
+ char c = src[i];
+ if (!esc && c == esc_char) {
+ esc = true;
+ continue;
+ }
+ if (!esc && c == special_char) {
+ *destp = '\0';
+ *dest = dest_buf;
+ return (ssize_t)i + 1;
+ }
+ *destp++ = c;
+ esc = false;
+ }
+ *destp = '\0';
+ *dest = dest_buf;
+ return string::npos;
+}
+
+static inline string rgw_bl_str(ceph::buffer::list& raw)
+{
+ size_t len = raw.length();
+ string s(raw.c_str(), len);
+ while (len && !s[len - 1]) {
+ --len;
+ s.resize(len);
+ }
+ return s;
+}
+
+template <typename T>
+int decode_bl(bufferlist& bl, T& t)
+{
+ auto iter = bl.cbegin();
+ try {
+ decode(t, iter);
+ } catch (buffer::error& err) {
+ return -EIO;
+ }
+ return 0;
+}
+
+#endif
diff --git a/src/rgw/rgw_compression.cc b/src/rgw/rgw_compression.cc
new file mode 100644
index 00000000..b70f51ad
--- /dev/null
+++ b/src/rgw/rgw_compression.cc
@@ -0,0 +1,201 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "rgw_compression.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+int rgw_compression_info_from_attr(const bufferlist& attr,
+ bool& need_decompress,
+ RGWCompressionInfo& cs_info)
+{
+ auto bliter = attr.cbegin();
+ try {
+ decode(cs_info, bliter);
+ } catch (buffer::error& err) {
+ return -EIO;
+ }
+ if (cs_info.blocks.size() == 0) {
+ return -EIO;
+ }
+ if (cs_info.compression_type != "none")
+ need_decompress = true;
+ else
+ need_decompress = false;
+ return 0;
+}
+
+int rgw_compression_info_from_attrset(const map<string, bufferlist>& attrs,
+ bool& need_decompress,
+ RGWCompressionInfo& cs_info)
+{
+ auto value = attrs.find(RGW_ATTR_COMPRESSION);
+ if (value == attrs.end()) {
+ need_decompress = false;
+ return 0;
+ }
+ return rgw_compression_info_from_attr(value->second, need_decompress, cs_info);
+}
+
+//------------RGWPutObj_Compress---------------
+
+int RGWPutObj_Compress::process(bufferlist&& in, uint64_t logical_offset)
+{
+ bufferlist out;
+ if (in.length() > 0) {
+ // compression stuff
+ if ((logical_offset > 0 && compressed) || // if previous part was compressed
+ (logical_offset == 0)) { // or it's the first part
+ ldout(cct, 10) << "Compression for rgw is enabled, compress part " << in.length() << dendl;
+ int cr = compressor->compress(in, out);
+ if (cr < 0) {
+ if (logical_offset > 0) {
+ lderr(cct) << "Compression failed with exit code " << cr
+ << " for next part, compression process failed" << dendl;
+ return -EIO;
+ }
+ compressed = false;
+ ldout(cct, 5) << "Compression failed with exit code " << cr
+ << " for first part, storing uncompressed" << dendl;
+ out.claim(in);
+ } else {
+ compressed = true;
+
+ compression_block newbl;
+ size_t bs = blocks.size();
+ newbl.old_ofs = logical_offset;
+ newbl.new_ofs = bs > 0 ? blocks[bs-1].len + blocks[bs-1].new_ofs : 0;
+ newbl.len = out.length();
+ blocks.push_back(newbl);
+ }
+ } else {
+ compressed = false;
+ out.claim(in);
+ }
+ // end of compression stuff
+ }
+ return Pipe::process(std::move(out), logical_offset);
+}
+
+//----------------RGWGetObj_Decompress---------------------
+RGWGetObj_Decompress::RGWGetObj_Decompress(CephContext* cct_,
+ RGWCompressionInfo* cs_info_,
+ bool partial_content_,
+ RGWGetObj_Filter* next): RGWGetObj_Filter(next),
+ cct(cct_),
+ cs_info(cs_info_),
+ partial_content(partial_content_),
+ q_ofs(0),
+ q_len(0),
+ cur_ofs(0)
+{
+ compressor = Compressor::create(cct, cs_info->compression_type);
+ if (!compressor.get())
+ lderr(cct) << "Cannot load compressor of type " << cs_info->compression_type << dendl;
+}
+
+int RGWGetObj_Decompress::handle_data(bufferlist& bl, off_t bl_ofs, off_t bl_len)
+{
+ ldout(cct, 10) << "Compression for rgw is enabled, decompress part "
+ << "bl_ofs="<< bl_ofs << bl_len << dendl;
+
+ if (!compressor.get()) {
+ // if compressor isn't available - error, because cannot return decompressed data?
+ lderr(cct) << "Cannot load compressor of type " << cs_info->compression_type << dendl;
+ return -EIO;
+ }
+ bufferlist out_bl, in_bl, temp_in_bl;
+ bl.copy(bl_ofs, bl_len, temp_in_bl);
+ bl_ofs = 0;
+ int r = 0;
+ if (waiting.length() != 0) {
+ in_bl.append(waiting);
+ in_bl.append(temp_in_bl);
+ waiting.clear();
+ } else {
+ in_bl.claim(temp_in_bl);
+ }
+ bl_len = in_bl.length();
+
+ while (first_block <= last_block) {
+ bufferlist tmp;
+ off_t ofs_in_bl = first_block->new_ofs - cur_ofs;
+ if (ofs_in_bl + (off_t)first_block->len > bl_len) {
+ // not complete block, put it to waiting
+ unsigned tail = bl_len - ofs_in_bl;
+ in_bl.copy(ofs_in_bl, tail, waiting);
+ cur_ofs -= tail;
+ break;
+ }
+ in_bl.copy(ofs_in_bl, first_block->len, tmp);
+ int cr = compressor->decompress(tmp, out_bl);
+ if (cr < 0) {
+ lderr(cct) << "Decompression failed with exit code " << cr << dendl;
+ return cr;
+ }
+ ++first_block;
+ while (out_bl.length() - q_ofs >= cct->_conf->rgw_max_chunk_size)
+ {
+ off_t ch_len = std::min<off_t>(cct->_conf->rgw_max_chunk_size, q_len);
+ q_len -= ch_len;
+ r = next->handle_data(out_bl, q_ofs, ch_len);
+ if (r < 0) {
+ lderr(cct) << "handle_data failed with exit code " << r << dendl;
+ return r;
+ }
+ out_bl.splice(0, q_ofs + ch_len);
+ q_ofs = 0;
+ }
+ }
+
+ cur_ofs += bl_len;
+ off_t ch_len = std::min<off_t>(out_bl.length() - q_ofs, q_len);
+ if (ch_len > 0) {
+ r = next->handle_data(out_bl, q_ofs, ch_len);
+ if (r < 0) {
+ lderr(cct) << "handle_data failed with exit code " << r << dendl;
+ return r;
+ }
+ out_bl.splice(0, q_ofs + ch_len);
+ q_len -= ch_len;
+ q_ofs = 0;
+ }
+ return r;
+}
+
+int RGWGetObj_Decompress::fixup_range(off_t& ofs, off_t& end)
+{
+ if (partial_content) {
+ // if user set range, we need to calculate it in decompressed data
+ first_block = cs_info->blocks.begin(); last_block = cs_info->blocks.begin();
+ if (cs_info->blocks.size() > 1) {
+ vector<compression_block>::iterator fb, lb;
+ // not bad to use auto for lambda, I think
+ auto cmp_u = [] (off_t ofs, const compression_block& e) { return (uint64_t)ofs < e.old_ofs; };
+ auto cmp_l = [] (const compression_block& e, off_t ofs) { return e.old_ofs <= (uint64_t)ofs; };
+ fb = upper_bound(cs_info->blocks.begin()+1,
+ cs_info->blocks.end(),
+ ofs,
+ cmp_u);
+ first_block = fb - 1;
+ lb = lower_bound(fb,
+ cs_info->blocks.end(),
+ end,
+ cmp_l);
+ last_block = lb - 1;
+ }
+ } else {
+ first_block = cs_info->blocks.begin(); last_block = cs_info->blocks.end() - 1;
+ }
+
+ q_ofs = ofs - first_block->old_ofs;
+ q_len = end + 1 - ofs;
+
+ ofs = first_block->new_ofs;
+ end = last_block->new_ofs + last_block->len - 1;
+
+ cur_ofs = ofs;
+ waiting.clear();
+
+ return next->fixup_range(ofs, end);
+}
diff --git a/src/rgw/rgw_compression.h b/src/rgw/rgw_compression.h
new file mode 100644
index 00000000..67a1e0cc
--- /dev/null
+++ b/src/rgw/rgw_compression.h
@@ -0,0 +1,60 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_COMPRESSION_H
+#define CEPH_RGW_COMPRESSION_H
+
+#include <vector>
+
+#include "compressor/Compressor.h"
+#include "rgw_putobj.h"
+#include "rgw_op.h"
+
+int rgw_compression_info_from_attr(const bufferlist& attr,
+ bool& need_decompress,
+ RGWCompressionInfo& cs_info);
+int rgw_compression_info_from_attrset(const map<string, bufferlist>& attrs,
+ bool& need_decompress,
+ RGWCompressionInfo& cs_info);
+
+class RGWGetObj_Decompress : public RGWGetObj_Filter
+{
+ CephContext* cct;
+ CompressorRef compressor;
+ RGWCompressionInfo* cs_info;
+ bool partial_content;
+ vector<compression_block>::iterator first_block, last_block;
+ off_t q_ofs, q_len;
+ uint64_t cur_ofs;
+ bufferlist waiting;
+public:
+ RGWGetObj_Decompress(CephContext* cct_,
+ RGWCompressionInfo* cs_info_,
+ bool partial_content_,
+ RGWGetObj_Filter* next);
+ ~RGWGetObj_Decompress() override {}
+
+ int handle_data(bufferlist& bl, off_t bl_ofs, off_t bl_len) override;
+ int fixup_range(off_t& ofs, off_t& end) override;
+
+};
+
+class RGWPutObj_Compress : public rgw::putobj::Pipe
+{
+ CephContext* cct;
+ bool compressed{false};
+ CompressorRef compressor;
+ std::vector<compression_block> blocks;
+public:
+ RGWPutObj_Compress(CephContext* cct_, CompressorRef compressor,
+ rgw::putobj::DataProcessor *next)
+ : Pipe(next), cct(cct_), compressor(compressor) {}
+
+ int process(bufferlist&& data, uint64_t logical_offset) override;
+
+ bool is_compressed() { return compressed; }
+ vector<compression_block>& get_compression_blocks() { return blocks; }
+
+}; /* RGWPutObj_Compress */
+
+#endif /* CEPH_RGW_COMPRESSION_H */
diff --git a/src/rgw/rgw_coroutine.cc b/src/rgw/rgw_coroutine.cc
new file mode 100644
index 00000000..1ccefc2d
--- /dev/null
+++ b/src/rgw/rgw_coroutine.cc
@@ -0,0 +1,1058 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/ceph_json.h"
+#include "rgw_coroutine.h"
+
+// re-include our assert to clobber the system one; fix dout:
+#include "include/ceph_assert.h"
+
+#include <boost/asio/yield.hpp>
+
+#define dout_subsys ceph_subsys_rgw
+#define dout_context g_ceph_context
+
+
+class RGWCompletionManager::WaitContext : public Context {
+ RGWCompletionManager *manager;
+ void *opaque;
+public:
+ WaitContext(RGWCompletionManager *_cm, void *_opaque) : manager(_cm), opaque(_opaque) {}
+ void finish(int r) override {
+ manager->_wakeup(opaque);
+ }
+};
+
+RGWCompletionManager::RGWCompletionManager(CephContext *_cct) : cct(_cct), lock("RGWCompletionManager::lock"),
+ timer(cct, lock)
+{
+ timer.init();
+}
+
+RGWCompletionManager::~RGWCompletionManager()
+{
+ Mutex::Locker l(lock);
+ timer.cancel_all_events();
+ timer.shutdown();
+}
+
+void RGWCompletionManager::complete(RGWAioCompletionNotifier *cn, const rgw_io_id& io_id, void *user_info)
+{
+ Mutex::Locker l(lock);
+ _complete(cn, io_id, user_info);
+}
+
+void RGWCompletionManager::register_completion_notifier(RGWAioCompletionNotifier *cn)
+{
+ Mutex::Locker l(lock);
+ if (cn) {
+ cns.insert(cn);
+ }
+}
+
+void RGWCompletionManager::unregister_completion_notifier(RGWAioCompletionNotifier *cn)
+{
+ Mutex::Locker l(lock);
+ if (cn) {
+ cns.erase(cn);
+ }
+}
+
+void RGWCompletionManager::_complete(RGWAioCompletionNotifier *cn, const rgw_io_id& io_id, void *user_info)
+{
+ if (cn) {
+ cns.erase(cn);
+ }
+
+ if (complete_reqs_set.find(io_id) != complete_reqs_set.end()) {
+ /* already have completion for this io_id, don't allow multiple completions for it */
+ return;
+ }
+ complete_reqs.push_back(io_completion{io_id, user_info});
+ cond.Signal();
+}
+
+int RGWCompletionManager::get_next(io_completion *io)
+{
+ Mutex::Locker l(lock);
+ while (complete_reqs.empty()) {
+ if (going_down) {
+ return -ECANCELED;
+ }
+ cond.Wait(lock);
+ }
+ *io = complete_reqs.front();
+ complete_reqs_set.erase(io->io_id);
+ complete_reqs.pop_front();
+ return 0;
+}
+
+bool RGWCompletionManager::try_get_next(io_completion *io)
+{
+ Mutex::Locker l(lock);
+ if (complete_reqs.empty()) {
+ return false;
+ }
+ *io = complete_reqs.front();
+ complete_reqs_set.erase(io->io_id);
+ complete_reqs.pop_front();
+ return true;
+}
+
+void RGWCompletionManager::go_down()
+{
+ Mutex::Locker l(lock);
+ for (auto cn : cns) {
+ cn->unregister();
+ }
+ going_down = true;
+ cond.Signal();
+}
+
+void RGWCompletionManager::wait_interval(void *opaque, const utime_t& interval, void *user_info)
+{
+ Mutex::Locker l(lock);
+ ceph_assert(waiters.find(opaque) == waiters.end());
+ waiters[opaque] = user_info;
+ timer.add_event_after(interval, new WaitContext(this, opaque));
+}
+
+void RGWCompletionManager::wakeup(void *opaque)
+{
+ Mutex::Locker l(lock);
+ _wakeup(opaque);
+}
+
+void RGWCompletionManager::_wakeup(void *opaque)
+{
+ map<void *, void *>::iterator iter = waiters.find(opaque);
+ if (iter != waiters.end()) {
+ void *user_id = iter->second;
+ waiters.erase(iter);
+ _complete(NULL, rgw_io_id{0, -1} /* no IO id */, user_id);
+ }
+}
+
+RGWCoroutine::~RGWCoroutine() {
+ for (auto stack : spawned.entries) {
+ stack->put();
+ }
+}
+
+void RGWCoroutine::init_new_io(RGWIOProvider *io_provider)
+{
+ stack->init_new_io(io_provider);
+}
+
+void RGWCoroutine::set_io_blocked(bool flag) {
+ stack->set_io_blocked(flag);
+}
+
+void RGWCoroutine::set_sleeping(bool flag) {
+ stack->set_sleeping(flag);
+}
+
+int RGWCoroutine::io_block(int ret, int64_t io_id) {
+ return io_block(ret, rgw_io_id{io_id, -1});
+}
+
+int RGWCoroutine::io_block(int ret, const rgw_io_id& io_id) {
+ if (stack->consume_io_finish(io_id)) {
+ return 0;
+ }
+ set_io_blocked(true);
+ stack->set_io_blocked_id(io_id);
+ return ret;
+}
+
+void RGWCoroutine::io_complete(const rgw_io_id& io_id) {
+ stack->io_complete(io_id);
+}
+
+void RGWCoroutine::StatusItem::dump(Formatter *f) const {
+ ::encode_json("timestamp", timestamp, f);
+ ::encode_json("status", status, f);
+}
+
+stringstream& RGWCoroutine::Status::set_status()
+{
+ RWLock::WLocker l(lock);
+ string s = status.str();
+ status.str(string());
+ if (!timestamp.is_zero()) {
+ history.push_back(StatusItem(timestamp, s));
+ }
+ if (history.size() > (size_t)max_history) {
+ history.pop_front();
+ }
+ timestamp = ceph_clock_now();
+
+ return status;
+}
+
+int64_t RGWCoroutinesManager::get_next_io_id()
+{
+ return (int64_t)++max_io_id;
+}
+
+RGWCoroutinesStack::RGWCoroutinesStack(CephContext *_cct, RGWCoroutinesManager *_ops_mgr, RGWCoroutine *start) : cct(_cct), ops_mgr(_ops_mgr),
+ done_flag(false), error_flag(false), blocked_flag(false),
+ sleep_flag(false), interval_wait_flag(false), is_scheduled(false), is_waiting_for_child(false),
+ retcode(0), run_count(0),
+ env(NULL), parent(NULL)
+{
+ if (start) {
+ ops.push_back(start);
+ }
+ pos = ops.begin();
+}
+
+RGWCoroutinesStack::~RGWCoroutinesStack()
+{
+ for (auto op : ops) {
+ op->put();
+ }
+
+ for (auto stack : spawned.entries) {
+ stack->put();
+ }
+}
+
+int RGWCoroutinesStack::operate(RGWCoroutinesEnv *_env)
+{
+ env = _env;
+ RGWCoroutine *op = *pos;
+ op->stack = this;
+ ldout(cct, 20) << *op << ": operate()" << dendl;
+ int r = op->operate_wrapper();
+ if (r < 0) {
+ ldout(cct, 20) << *op << ": operate() returned r=" << r << dendl;
+ }
+
+ error_flag = op->is_error();
+
+ if (op->is_done()) {
+ int op_retcode = r;
+ r = unwind(op_retcode);
+ op->put();
+ done_flag = (pos == ops.end());
+ blocked_flag &= !done_flag;
+ if (done_flag) {
+ retcode = op_retcode;
+ }
+ return r;
+ }
+
+ /* should r ever be negative at this point? */
+ ceph_assert(r >= 0);
+
+ return 0;
+}
+
+string RGWCoroutinesStack::error_str()
+{
+ if (pos != ops.end()) {
+ return (*pos)->error_str();
+ }
+ return string();
+}
+
+void RGWCoroutinesStack::call(RGWCoroutine *next_op) {
+ if (!next_op) {
+ return;
+ }
+ ops.push_back(next_op);
+ if (pos != ops.end()) {
+ ++pos;
+ } else {
+ pos = ops.begin();
+ }
+}
+
+void RGWCoroutinesStack::schedule()
+{
+ env->manager->schedule(env, this);
+}
+
+void RGWCoroutinesStack::_schedule()
+{
+ env->manager->_schedule(env, this);
+}
+
+RGWCoroutinesStack *RGWCoroutinesStack::spawn(RGWCoroutine *source_op, RGWCoroutine *op, bool wait)
+{
+ if (!op) {
+ return NULL;
+ }
+
+ rgw_spawned_stacks *s = (source_op ? &source_op->spawned : &spawned);
+
+ RGWCoroutinesStack *stack = env->manager->allocate_stack();
+ s->add_pending(stack);
+ stack->parent = this;
+
+ stack->get(); /* we'll need to collect the stack */
+ stack->call(op);
+
+ env->manager->schedule(env, stack);
+
+ if (wait) {
+ set_blocked_by(stack);
+ }
+
+ return stack;
+}
+
+RGWCoroutinesStack *RGWCoroutinesStack::spawn(RGWCoroutine *op, bool wait)
+{
+ return spawn(NULL, op, wait);
+}
+
+int RGWCoroutinesStack::wait(const utime_t& interval)
+{
+ RGWCompletionManager *completion_mgr = env->manager->get_completion_mgr();
+ completion_mgr->wait_interval((void *)this, interval, (void *)this);
+ set_io_blocked(true);
+ set_interval_wait(true);
+ return 0;
+}
+
+void RGWCoroutinesStack::wakeup()
+{
+ RGWCompletionManager *completion_mgr = env->manager->get_completion_mgr();
+ completion_mgr->wakeup((void *)this);
+}
+
+void RGWCoroutinesStack::io_complete(const rgw_io_id& io_id)
+{
+ RGWCompletionManager *completion_mgr = env->manager->get_completion_mgr();
+ completion_mgr->complete(nullptr, io_id, (void *)this);
+}
+
+int RGWCoroutinesStack::unwind(int retcode)
+{
+ rgw_spawned_stacks *src_spawned = &(*pos)->spawned;
+
+ if (pos == ops.begin()) {
+ ldout(cct, 15) << "stack " << (void *)this << " end" << dendl;
+ spawned.inherit(src_spawned);
+ ops.clear();
+ pos = ops.end();
+ return retcode;
+ }
+
+ --pos;
+ ops.pop_back();
+ RGWCoroutine *op = *pos;
+ op->set_retcode(retcode);
+ op->spawned.inherit(src_spawned);
+ return 0;
+}
+
+void RGWCoroutinesStack::cancel()
+{
+ while (!ops.empty()) {
+ RGWCoroutine *op = *pos;
+ unwind(-ECANCELED);
+ op->put();
+ }
+ put();
+}
+
+bool RGWCoroutinesStack::collect(RGWCoroutine *op, int *ret, RGWCoroutinesStack *skip_stack) /* returns true if needs to be called again */
+{
+ bool need_retry = false;
+ rgw_spawned_stacks *s = (op ? &op->spawned : &spawned);
+ *ret = 0;
+ vector<RGWCoroutinesStack *> new_list;
+
+ for (vector<RGWCoroutinesStack *>::iterator iter = s->entries.begin(); iter != s->entries.end(); ++iter) {
+ RGWCoroutinesStack *stack = *iter;
+ if (stack == skip_stack || !stack->is_done()) {
+ new_list.push_back(stack);
+ if (!stack->is_done()) {
+ ldout(cct, 20) << "collect(): s=" << (void *)this << " stack=" << (void *)stack << " is still running" << dendl;
+ } else if (stack == skip_stack) {
+ ldout(cct, 20) << "collect(): s=" << (void *)this << " stack=" << (void *)stack << " explicitly skipping stack" << dendl;
+ }
+ continue;
+ }
+ int r = stack->get_ret_status();
+ stack->put();
+ if (r < 0) {
+ *ret = r;
+ ldout(cct, 20) << "collect(): s=" << (void *)this << " stack=" << (void *)stack << " encountered error (r=" << r << "), skipping next stacks" << dendl;
+ new_list.insert(new_list.end(), ++iter, s->entries.end());
+ need_retry = (iter != s->entries.end());
+ break;
+ }
+
+ ldout(cct, 20) << "collect(): s=" << (void *)this << " stack=" << (void *)stack << " is complete" << dendl;
+ }
+
+ s->entries.swap(new_list);
+ return need_retry;
+}
+
+bool RGWCoroutinesStack::collect_next(RGWCoroutine *op, int *ret, RGWCoroutinesStack **collected_stack) /* returns true if found a stack to collect */
+{
+ rgw_spawned_stacks *s = (op ? &op->spawned : &spawned);
+ *ret = 0;
+
+ if (collected_stack) {
+ *collected_stack = NULL;
+ }
+
+ for (vector<RGWCoroutinesStack *>::iterator iter = s->entries.begin(); iter != s->entries.end(); ++iter) {
+ RGWCoroutinesStack *stack = *iter;
+ if (!stack->is_done()) {
+ continue;
+ }
+ int r = stack->get_ret_status();
+ if (r < 0) {
+ *ret = r;
+ }
+
+ if (collected_stack) {
+ *collected_stack = stack;
+ }
+ stack->put();
+
+ s->entries.erase(iter);
+ return true;
+ }
+
+ return false;
+}
+
+bool RGWCoroutinesStack::collect(int *ret, RGWCoroutinesStack *skip_stack) /* returns true if needs to be called again */
+{
+ return collect(NULL, ret, skip_stack);
+}
+
+static void _aio_completion_notifier_cb(librados::completion_t cb, void *arg)
+{
+ (static_cast<RGWAioCompletionNotifier *>(arg))->cb();
+}
+
+RGWAioCompletionNotifier::RGWAioCompletionNotifier(RGWCompletionManager *_mgr, const rgw_io_id& _io_id, void *_user_data) : completion_mgr(_mgr),
+ io_id(_io_id),
+ user_data(_user_data), lock("RGWAioCompletionNotifier"), registered(true) {
+ c = librados::Rados::aio_create_completion((void *)this, NULL,
+ _aio_completion_notifier_cb);
+}
+
+RGWAioCompletionNotifier *RGWCoroutinesStack::create_completion_notifier()
+{
+ return ops_mgr->create_completion_notifier(this);
+}
+
+RGWCompletionManager *RGWCoroutinesStack::get_completion_mgr()
+{
+ return ops_mgr->get_completion_mgr();
+}
+
+bool RGWCoroutinesStack::unblock_stack(RGWCoroutinesStack **s)
+{
+ if (blocking_stacks.empty()) {
+ return false;
+ }
+
+ set<RGWCoroutinesStack *>::iterator iter = blocking_stacks.begin();
+ *s = *iter;
+ blocking_stacks.erase(iter);
+ (*s)->blocked_by_stack.erase(this);
+
+ return true;
+}
+
+void RGWCoroutinesManager::report_error(RGWCoroutinesStack *op)
+{
+ if (!op) {
+ return;
+ }
+ string err = op->error_str();
+ if (err.empty()) {
+ return;
+ }
+ lderr(cct) << "ERROR: failed operation: " << op->error_str() << dendl;
+}
+
+void RGWCoroutinesStack::dump(Formatter *f) const {
+ stringstream ss;
+ ss << (void *)this;
+ ::encode_json("stack", ss.str(), f);
+ ::encode_json("run_count", run_count, f);
+ f->open_array_section("ops");
+ for (auto& i : ops) {
+ encode_json("op", *i, f);
+ }
+ f->close_section();
+}
+
+void RGWCoroutinesStack::init_new_io(RGWIOProvider *io_provider)
+{
+ io_provider->set_io_user_info((void *)this);
+ io_provider->assign_io(env->manager->get_io_id_provider());
+}
+
+bool RGWCoroutinesStack::try_io_unblock(const rgw_io_id& io_id)
+{
+ if (!can_io_unblock(io_id)) {
+ auto p = io_finish_ids.emplace(io_id.id, io_id);
+ auto& iter = p.first;
+ bool inserted = p.second;
+ if (!inserted) { /* could not insert, entry already existed, add channel to completion mask */
+ iter->second.channels |= io_id.channels;
+ }
+ return false;
+ }
+
+ return true;
+}
+
+bool RGWCoroutinesStack::consume_io_finish(const rgw_io_id& io_id)
+{
+ auto iter = io_finish_ids.find(io_id.id);
+ if (iter == io_finish_ids.end()) {
+ return false;
+ }
+ int finish_mask = iter->second.channels;
+ bool found = (finish_mask & io_id.channels) != 0;
+
+ finish_mask &= ~(finish_mask & io_id.channels);
+
+ if (finish_mask == 0) {
+ io_finish_ids.erase(iter);
+ }
+ return found;
+}
+
+
+void RGWCoroutinesManager::handle_unblocked_stack(set<RGWCoroutinesStack *>& context_stacks, list<RGWCoroutinesStack *>& scheduled_stacks,
+ RGWCompletionManager::io_completion& io, int *blocked_count)
+{
+ ceph_assert(lock.is_wlocked());
+ RGWCoroutinesStack *stack = static_cast<RGWCoroutinesStack *>(io.user_info);
+ if (context_stacks.find(stack) == context_stacks.end()) {
+ return;
+ }
+ if (!stack->try_io_unblock(io.io_id)) {
+ return;
+ }
+ if (stack->is_io_blocked()) {
+ --(*blocked_count);
+ stack->set_io_blocked(false);
+ }
+ stack->set_interval_wait(false);
+ if (!stack->is_done()) {
+ if (!stack->is_scheduled) {
+ scheduled_stacks.push_back(stack);
+ stack->set_is_scheduled(true);
+ }
+ } else {
+ context_stacks.erase(stack);
+ stack->put();
+ }
+}
+
+void RGWCoroutinesManager::schedule(RGWCoroutinesEnv *env, RGWCoroutinesStack *stack)
+{
+ RWLock::WLocker wl(lock);
+ _schedule(env, stack);
+}
+
+void RGWCoroutinesManager::_schedule(RGWCoroutinesEnv *env, RGWCoroutinesStack *stack)
+{
+ ceph_assert(lock.is_wlocked());
+ if (!stack->is_scheduled) {
+ env->scheduled_stacks->push_back(stack);
+ stack->set_is_scheduled(true);
+ }
+ set<RGWCoroutinesStack *>& context_stacks = run_contexts[env->run_context];
+ context_stacks.insert(stack);
+}
+
+void RGWCoroutinesManager::set_sleeping(RGWCoroutine *cr, bool flag)
+{
+ cr->set_sleeping(flag);
+}
+
+void RGWCoroutinesManager::io_complete(RGWCoroutine *cr, const rgw_io_id& io_id)
+{
+ cr->io_complete(io_id);
+}
+
+int RGWCoroutinesManager::run(list<RGWCoroutinesStack *>& stacks)
+{
+ int ret = 0;
+ int blocked_count = 0;
+ int interval_wait_count = 0;
+ bool canceled = false; // set on going_down
+ RGWCoroutinesEnv env;
+ bool op_not_blocked;
+
+ uint64_t run_context = ++run_context_count;
+
+ lock.get_write();
+ set<RGWCoroutinesStack *>& context_stacks = run_contexts[run_context];
+ list<RGWCoroutinesStack *> scheduled_stacks;
+ for (auto& st : stacks) {
+ context_stacks.insert(st);
+ scheduled_stacks.push_back(st);
+ st->set_is_scheduled(true);
+ }
+ env.run_context = run_context;
+ env.manager = this;
+ env.scheduled_stacks = &scheduled_stacks;
+
+ for (list<RGWCoroutinesStack *>::iterator iter = scheduled_stacks.begin(); iter != scheduled_stacks.end() && !going_down;) {
+ RGWCompletionManager::io_completion io;
+ RGWCoroutinesStack *stack = *iter;
+ ++iter;
+ scheduled_stacks.pop_front();
+
+ if (context_stacks.find(stack) == context_stacks.end()) {
+ /* stack was probably schedule more than once due to IO, but was since complete */
+ goto next;
+ }
+ env.stack = stack;
+
+ lock.unlock();
+
+ ret = stack->operate(&env);
+
+ lock.get_write();
+
+ stack->set_is_scheduled(false);
+ if (ret < 0) {
+ ldout(cct, 20) << "stack->operate() returned ret=" << ret << dendl;
+ }
+
+ if (stack->is_error()) {
+ report_error(stack);
+ }
+
+ op_not_blocked = false;
+
+ if (stack->is_io_blocked()) {
+ ldout(cct, 20) << __func__ << ":" << " stack=" << (void *)stack << " is io blocked" << dendl;
+ if (stack->is_interval_waiting()) {
+ interval_wait_count++;
+ }
+ blocked_count++;
+ } else if (stack->is_blocked()) {
+ /* do nothing, we'll re-add the stack when the blocking stack is done,
+ * or when we're awaken
+ */
+ ldout(cct, 20) << __func__ << ":" << " stack=" << (void *)stack << " is_blocked_by_stack()=" << stack->is_blocked_by_stack()
+ << " is_sleeping=" << stack->is_sleeping() << " waiting_for_child()=" << stack->waiting_for_child() << dendl;
+ } else if (stack->is_done()) {
+ ldout(cct, 20) << __func__ << ":" << " stack=" << (void *)stack << " is done" << dendl;
+ RGWCoroutinesStack *s;
+ while (stack->unblock_stack(&s)) {
+ if (!s->is_blocked_by_stack() && !s->is_done()) {
+ if (s->is_io_blocked()) {
+ if (stack->is_interval_waiting()) {
+ interval_wait_count++;
+ }
+ blocked_count++;
+ } else {
+ s->_schedule();
+ }
+ }
+ }
+ if (stack->parent && stack->parent->waiting_for_child()) {
+ stack->parent->set_wait_for_child(false);
+ stack->parent->_schedule();
+ }
+ context_stacks.erase(stack);
+ stack->put();
+ stack = NULL;
+ } else {
+ op_not_blocked = true;
+ stack->run_count++;
+ stack->_schedule();
+ }
+
+ if (!op_not_blocked && stack) {
+ stack->run_count = 0;
+ }
+
+ while (completion_mgr->try_get_next(&io)) {
+ handle_unblocked_stack(context_stacks, scheduled_stacks, io, &blocked_count);
+ }
+
+ /*
+ * only account blocked operations that are not in interval_wait, these are stacks that
+ * were put on a wait without any real IO operations. While we mark these as io_blocked,
+ * these aren't really waiting for IOs
+ */
+ while (blocked_count - interval_wait_count >= ops_window) {
+ lock.unlock();
+ ret = completion_mgr->get_next(&io);
+ lock.get_write();
+ if (ret < 0) {
+ ldout(cct, 5) << "completion_mgr.get_next() returned ret=" << ret << dendl;
+ }
+ handle_unblocked_stack(context_stacks, scheduled_stacks, io, &blocked_count);
+ }
+
+next:
+ while (scheduled_stacks.empty() && blocked_count > 0) {
+ lock.unlock();
+ ret = completion_mgr->get_next(&io);
+ lock.get_write();
+ if (ret < 0) {
+ ldout(cct, 5) << "completion_mgr.get_next() returned ret=" << ret << dendl;
+ }
+ if (going_down) {
+ ldout(cct, 5) << __func__ << "(): was stopped, exiting" << dendl;
+ ret = -ECANCELED;
+ canceled = true;
+ break;
+ }
+ handle_unblocked_stack(context_stacks, scheduled_stacks, io, &blocked_count);
+ iter = scheduled_stacks.begin();
+ }
+ if (canceled) {
+ break;
+ }
+
+ if (iter == scheduled_stacks.end()) {
+ iter = scheduled_stacks.begin();
+ }
+ }
+
+ if (!context_stacks.empty() && !going_down) {
+ JSONFormatter formatter(true);
+ formatter.open_array_section("context_stacks");
+ for (auto& s : context_stacks) {
+ ::encode_json("entry", *s, &formatter);
+ }
+ formatter.close_section();
+ lderr(cct) << __func__ << "(): ERROR: deadlock detected, dumping remaining coroutines:\n";
+ formatter.flush(*_dout);
+ *_dout << dendl;
+ ceph_assert(context_stacks.empty() || going_down); // assert on deadlock
+ }
+
+ for (auto stack : context_stacks) {
+ ldout(cct, 20) << "clearing stack on run() exit: stack=" << (void *)stack << " nref=" << stack->get_nref() << dendl;
+ stack->cancel();
+ }
+ run_contexts.erase(run_context);
+ lock.unlock();
+
+ return ret;
+}
+
+int RGWCoroutinesManager::run(RGWCoroutine *op)
+{
+ if (!op) {
+ return 0;
+ }
+ list<RGWCoroutinesStack *> stacks;
+ RGWCoroutinesStack *stack = allocate_stack();
+ op->get();
+ stack->call(op);
+
+ stacks.push_back(stack);
+
+ int r = run(stacks);
+ if (r < 0) {
+ ldout(cct, 20) << "run(stacks) returned r=" << r << dendl;
+ } else {
+ r = op->get_ret_status();
+ }
+ op->put();
+
+ return r;
+}
+
+RGWAioCompletionNotifier *RGWCoroutinesManager::create_completion_notifier(RGWCoroutinesStack *stack)
+{
+ rgw_io_id io_id{get_next_io_id(), -1};
+ RGWAioCompletionNotifier *cn = new RGWAioCompletionNotifier(completion_mgr, io_id, (void *)stack);
+ completion_mgr->register_completion_notifier(cn);
+ return cn;
+}
+
+void RGWCoroutinesManager::dump(Formatter *f) const {
+ RWLock::RLocker rl(lock);
+
+ f->open_array_section("run_contexts");
+ for (auto& i : run_contexts) {
+ f->open_object_section("context");
+ ::encode_json("id", i.first, f);
+ f->open_array_section("entries");
+ for (auto& s : i.second) {
+ ::encode_json("entry", *s, f);
+ }
+ f->close_section();
+ f->close_section();
+ }
+ f->close_section();
+}
+
+RGWCoroutinesStack *RGWCoroutinesManager::allocate_stack() {
+ return new RGWCoroutinesStack(cct, this);
+}
+
+string RGWCoroutinesManager::get_id()
+{
+ if (!id.empty()) {
+ return id;
+ }
+ stringstream ss;
+ ss << (void *)this;
+ return ss.str();
+}
+
+void RGWCoroutinesManagerRegistry::add(RGWCoroutinesManager *mgr)
+{
+ RWLock::WLocker wl(lock);
+ if (managers.find(mgr) == managers.end()) {
+ managers.insert(mgr);
+ get();
+ }
+}
+
+void RGWCoroutinesManagerRegistry::remove(RGWCoroutinesManager *mgr)
+{
+ RWLock::WLocker wl(lock);
+ if (managers.find(mgr) != managers.end()) {
+ managers.erase(mgr);
+ put();
+ }
+}
+
+RGWCoroutinesManagerRegistry::~RGWCoroutinesManagerRegistry()
+{
+ AdminSocket *admin_socket = cct->get_admin_socket();
+ if (!admin_command.empty()) {
+ admin_socket->unregister_command(admin_command);
+ }
+}
+
+int RGWCoroutinesManagerRegistry::hook_to_admin_command(const string& command)
+{
+ AdminSocket *admin_socket = cct->get_admin_socket();
+ if (!admin_command.empty()) {
+ admin_socket->unregister_command(admin_command);
+ }
+ admin_command = command;
+ int r = admin_socket->register_command(admin_command, admin_command, this,
+ "dump current coroutines stack state");
+ if (r < 0) {
+ lderr(cct) << "ERROR: fail to register admin socket command (r=" << r << ")" << dendl;
+ return r;
+ }
+ return 0;
+}
+
+bool RGWCoroutinesManagerRegistry::call(std::string_view command,
+ const cmdmap_t& cmdmap,
+ std::string_view format,
+ bufferlist& out) {
+ RWLock::RLocker rl(lock);
+ stringstream ss;
+ JSONFormatter f;
+ ::encode_json("cr_managers", *this, &f);
+ f.flush(ss);
+ out.append(ss);
+ return true;
+}
+
+void RGWCoroutinesManagerRegistry::dump(Formatter *f) const {
+ f->open_array_section("coroutine_managers");
+ for (auto m : managers) {
+ ::encode_json("entry", *m, f);
+ }
+ f->close_section();
+}
+
+void RGWCoroutine::call(RGWCoroutine *op)
+{
+ if (op) {
+ stack->call(op);
+ } else {
+ // the call()er expects this to set a retcode
+ retcode = 0;
+ }
+}
+
+RGWCoroutinesStack *RGWCoroutine::spawn(RGWCoroutine *op, bool wait)
+{
+ return stack->spawn(this, op, wait);
+}
+
+bool RGWCoroutine::collect(int *ret, RGWCoroutinesStack *skip_stack) /* returns true if needs to be called again */
+{
+ return stack->collect(this, ret, skip_stack);
+}
+
+bool RGWCoroutine::collect_next(int *ret, RGWCoroutinesStack **collected_stack) /* returns true if found a stack to collect */
+{
+ return stack->collect_next(this, ret, collected_stack);
+}
+
+int RGWCoroutine::wait(const utime_t& interval)
+{
+ return stack->wait(interval);
+}
+
+void RGWCoroutine::wait_for_child()
+{
+ /* should only wait for child if there is a child that is not done yet, and no complete children */
+ if (spawned.entries.empty()) {
+ return;
+ }
+ for (vector<RGWCoroutinesStack *>::iterator iter = spawned.entries.begin(); iter != spawned.entries.end(); ++iter) {
+ if ((*iter)->is_done()) {
+ return;
+ }
+ }
+ stack->set_wait_for_child(true);
+}
+
+string RGWCoroutine::to_str() const
+{
+ return typeid(*this).name();
+}
+
+ostream& operator<<(ostream& out, const RGWCoroutine& cr)
+{
+ out << "cr:s=" << (void *)cr.get_stack() << ":op=" << (void *)&cr << ":" << typeid(cr).name();
+ return out;
+}
+
+bool RGWCoroutine::drain_children(int num_cr_left, RGWCoroutinesStack *skip_stack)
+{
+ bool done = false;
+ ceph_assert(num_cr_left >= 0);
+ if (num_cr_left == 0 && skip_stack) {
+ num_cr_left = 1;
+ }
+ reenter(&drain_cr) {
+ while (num_spawned() > (size_t)num_cr_left) {
+ yield wait_for_child();
+ int ret;
+ while (collect(&ret, skip_stack)) {
+ if (ret < 0) {
+ ldout(cct, 10) << "collect() returned ret=" << ret << dendl;
+ /* we should have reported this error */
+ log_error() << "ERROR: collect() returned error (ret=" << ret << ")";
+ }
+ }
+ }
+ done = true;
+ }
+ return done;
+}
+
+void RGWCoroutine::wakeup()
+{
+ stack->wakeup();
+}
+
+RGWCoroutinesEnv *RGWCoroutine::get_env() const
+{
+ return stack->get_env();
+}
+
+void RGWCoroutine::dump(Formatter *f) const {
+ if (!description.str().empty()) {
+ encode_json("description", description.str(), f);
+ }
+ encode_json("type", to_str(), f);
+ if (!spawned.entries.empty()) {
+ f->open_array_section("spawned");
+ for (auto& i : spawned.entries) {
+ char buf[32];
+ snprintf(buf, sizeof(buf), "%p", (void *)i);
+ encode_json("stack", string(buf), f);
+ }
+ f->close_section();
+ }
+ if (!status.history.empty()) {
+ encode_json("history", status.history, f);
+ }
+
+ if (!status.status.str().empty()) {
+ f->open_object_section("status");
+ encode_json("status", status.status.str(), f);
+ encode_json("timestamp", status.timestamp, f);
+ f->close_section();
+ }
+}
+
+RGWSimpleCoroutine::~RGWSimpleCoroutine()
+{
+ if (!called_cleanup) {
+ request_cleanup();
+ }
+}
+
+void RGWSimpleCoroutine::call_cleanup()
+{
+ called_cleanup = true;
+ request_cleanup();
+}
+
+int RGWSimpleCoroutine::operate()
+{
+ int ret = 0;
+ reenter(this) {
+ yield return state_init();
+ yield return state_send_request();
+ yield return state_request_complete();
+ yield return state_all_complete();
+ drain_all();
+ call_cleanup();
+ return set_state(RGWCoroutine_Done, ret);
+ }
+ return 0;
+}
+
+int RGWSimpleCoroutine::state_init()
+{
+ int ret = init();
+ if (ret < 0) {
+ call_cleanup();
+ return set_state(RGWCoroutine_Error, ret);
+ }
+ return 0;
+}
+
+int RGWSimpleCoroutine::state_send_request()
+{
+ int ret = send_request();
+ if (ret < 0) {
+ call_cleanup();
+ return set_state(RGWCoroutine_Error, ret);
+ }
+ return io_block(0);
+}
+
+int RGWSimpleCoroutine::state_request_complete()
+{
+ int ret = request_complete();
+ if (ret < 0) {
+ call_cleanup();
+ return set_state(RGWCoroutine_Error, ret);
+ }
+ return 0;
+}
+
+int RGWSimpleCoroutine::state_all_complete()
+{
+ int ret = finish();
+ if (ret < 0) {
+ call_cleanup();
+ return set_state(RGWCoroutine_Error, ret);
+ }
+ return 0;
+}
+
+
diff --git a/src/rgw/rgw_coroutine.h b/src/rgw/rgw_coroutine.h
new file mode 100644
index 00000000..e8173b3f
--- /dev/null
+++ b/src/rgw/rgw_coroutine.h
@@ -0,0 +1,674 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_COROUTINE_H
+#define CEPH_RGW_COROUTINE_H
+
+#ifdef _ASSERT_H
+#define NEED_ASSERT_H
+#pragma push_macro("_ASSERT_H")
+#endif
+
+#include <boost/asio.hpp>
+#include <boost/intrusive_ptr.hpp>
+
+#ifdef NEED_ASSERT_H
+#pragma pop_macro("_ASSERT_H")
+#endif
+
+#include "include/utime.h"
+#include "common/RefCountedObj.h"
+#include "common/debug.h"
+#include "common/Timer.h"
+#include "common/admin_socket.h"
+
+#include "rgw_common.h"
+#include <boost/asio/coroutine.hpp>
+
+#include <atomic>
+
+#define RGW_ASYNC_OPS_MGR_WINDOW 100
+
+class RGWCoroutinesStack;
+class RGWCoroutinesManager;
+class RGWAioCompletionNotifier;
+
+class RGWCompletionManager : public RefCountedObject {
+ friend class RGWCoroutinesManager;
+
+ CephContext *cct;
+
+ struct io_completion {
+ rgw_io_id io_id;
+ void *user_info;
+ };
+ list<io_completion> complete_reqs;
+ set<rgw_io_id> complete_reqs_set;
+ using NotifierRef = boost::intrusive_ptr<RGWAioCompletionNotifier>;
+ set<NotifierRef> cns;
+
+ Mutex lock;
+ Cond cond;
+
+ SafeTimer timer;
+
+ std::atomic<bool> going_down = { false };
+
+ map<void *, void *> waiters;
+
+ class WaitContext;
+
+protected:
+ void _wakeup(void *opaque);
+ void _complete(RGWAioCompletionNotifier *cn, const rgw_io_id& io_id, void *user_info);
+public:
+ explicit RGWCompletionManager(CephContext *_cct);
+ ~RGWCompletionManager() override;
+
+ void complete(RGWAioCompletionNotifier *cn, const rgw_io_id& io_id, void *user_info);
+ int get_next(io_completion *io);
+ bool try_get_next(io_completion *io);
+
+ void go_down();
+
+ /*
+ * wait for interval length to complete user_info
+ */
+ void wait_interval(void *opaque, const utime_t& interval, void *user_info);
+ void wakeup(void *opaque);
+
+ void register_completion_notifier(RGWAioCompletionNotifier *cn);
+ void unregister_completion_notifier(RGWAioCompletionNotifier *cn);
+};
+
+/* a single use librados aio completion notifier that hooks into the RGWCompletionManager */
+class RGWAioCompletionNotifier : public RefCountedObject {
+ librados::AioCompletion *c;
+ RGWCompletionManager *completion_mgr;
+ rgw_io_id io_id;
+ void *user_data;
+ Mutex lock;
+ bool registered;
+
+public:
+ RGWAioCompletionNotifier(RGWCompletionManager *_mgr, const rgw_io_id& _io_id, void *_user_data);
+ ~RGWAioCompletionNotifier() override {
+ c->release();
+ lock.Lock();
+ bool need_unregister = registered;
+ if (registered) {
+ completion_mgr->get();
+ }
+ registered = false;
+ lock.Unlock();
+ if (need_unregister) {
+ completion_mgr->unregister_completion_notifier(this);
+ completion_mgr->put();
+ }
+ }
+
+ librados::AioCompletion *completion() {
+ return c;
+ }
+
+ void unregister() {
+ Mutex::Locker l(lock);
+ if (!registered) {
+ return;
+ }
+ registered = false;
+ }
+
+ void cb() {
+ lock.Lock();
+ if (!registered) {
+ lock.Unlock();
+ put();
+ return;
+ }
+ completion_mgr->get();
+ registered = false;
+ lock.Unlock();
+ completion_mgr->complete(this, io_id, user_data);
+ completion_mgr->put();
+ put();
+ }
+};
+
+// completion notifier with opaque payload (ie a reference-counted pointer)
+template <typename T>
+class RGWAioCompletionNotifierWith : public RGWAioCompletionNotifier {
+ T value;
+public:
+ RGWAioCompletionNotifierWith(RGWCompletionManager *mgr,
+ const rgw_io_id& io_id, void *user_data,
+ T value)
+ : RGWAioCompletionNotifier(mgr, io_id, user_data), value(std::move(value))
+ {}
+};
+
+struct RGWCoroutinesEnv {
+ uint64_t run_context;
+ RGWCoroutinesManager *manager;
+ list<RGWCoroutinesStack *> *scheduled_stacks;
+ RGWCoroutinesStack *stack;
+
+ RGWCoroutinesEnv() : run_context(0), manager(NULL), scheduled_stacks(NULL), stack(NULL) {}
+};
+
+enum RGWCoroutineState {
+ RGWCoroutine_Error = -2,
+ RGWCoroutine_Done = -1,
+ RGWCoroutine_Run = 0,
+};
+
+struct rgw_spawned_stacks {
+ vector<RGWCoroutinesStack *> entries;
+
+ rgw_spawned_stacks() {}
+
+ void add_pending(RGWCoroutinesStack *s) {
+ entries.push_back(s);
+ }
+
+ void inherit(rgw_spawned_stacks *source) {
+ for (vector<RGWCoroutinesStack *>::iterator iter = source->entries.begin();
+ iter != source->entries.end(); ++iter) {
+ add_pending(*iter);
+ }
+ source->entries.clear();
+ }
+};
+
+
+
+class RGWCoroutine : public RefCountedObject, public boost::asio::coroutine {
+ friend class RGWCoroutinesStack;
+
+ struct StatusItem {
+ utime_t timestamp;
+ string status;
+
+ StatusItem(utime_t& t, const string& s) : timestamp(t), status(s) {}
+
+ void dump(Formatter *f) const;
+ };
+
+#define MAX_COROUTINE_HISTORY 10
+
+ struct Status {
+ CephContext *cct;
+ RWLock lock;
+ int max_history;
+
+ utime_t timestamp;
+ stringstream status;
+
+ explicit Status(CephContext *_cct) : cct(_cct), lock("RGWCoroutine::Status::lock"), max_history(MAX_COROUTINE_HISTORY) {}
+
+ deque<StatusItem> history;
+
+ stringstream& set_status();
+ } status;
+
+ stringstream description;
+
+protected:
+ bool _yield_ret;
+ boost::asio::coroutine drain_cr;
+
+ CephContext *cct;
+
+ RGWCoroutinesStack *stack;
+ int retcode;
+ int state;
+
+ rgw_spawned_stacks spawned;
+
+ stringstream error_stream;
+
+ int set_state(int s, int ret = 0) {
+ retcode = ret;
+ state = s;
+ return ret;
+ }
+ int set_cr_error(int ret) {
+ return set_state(RGWCoroutine_Error, ret);
+ }
+ int set_cr_done() {
+ return set_state(RGWCoroutine_Done, 0);
+ }
+ void set_io_blocked(bool flag);
+
+ void reset_description() {
+ description.str(string());
+ }
+
+ stringstream& set_description() {
+ return description;
+ }
+ stringstream& set_status() {
+ return status.set_status();
+ }
+
+ stringstream& set_status(const string& s) {
+ stringstream& status = set_status();
+ status << s;
+ return status;
+ }
+
+ virtual int operate_wrapper() {
+ return operate();
+ }
+public:
+ RGWCoroutine(CephContext *_cct) : status(_cct), _yield_ret(false), cct(_cct), stack(NULL), retcode(0), state(RGWCoroutine_Run) {}
+ ~RGWCoroutine() override;
+
+ virtual int operate() = 0;
+
+ bool is_done() { return (state == RGWCoroutine_Done || state == RGWCoroutine_Error); }
+ bool is_error() { return (state == RGWCoroutine_Error); }
+
+ stringstream& log_error() { return error_stream; }
+ string error_str() {
+ return error_stream.str();
+ }
+
+ void set_retcode(int r) {
+ retcode = r;
+ }
+
+ int get_ret_status() {
+ return retcode;
+ }
+
+ void call(RGWCoroutine *op); /* call at the same stack we're in */
+ RGWCoroutinesStack *spawn(RGWCoroutine *op, bool wait); /* execute on a different stack */
+ bool collect(int *ret, RGWCoroutinesStack *skip_stack); /* returns true if needs to be called again */
+ bool collect_next(int *ret, RGWCoroutinesStack **collected_stack = NULL); /* returns true if found a stack to collect */
+
+ int wait(const utime_t& interval);
+ bool drain_children(int num_cr_left, RGWCoroutinesStack *skip_stack = NULL); /* returns true if needed to be called again */
+ void wakeup();
+ void set_sleeping(bool flag); /* put in sleep, or wakeup from sleep */
+
+ size_t num_spawned() {
+ return spawned.entries.size();
+ }
+
+ void wait_for_child();
+
+ virtual string to_str() const;
+
+ RGWCoroutinesStack *get_stack() const {
+ return stack;
+ }
+
+ RGWCoroutinesEnv *get_env() const;
+
+ void dump(Formatter *f) const;
+
+ void init_new_io(RGWIOProvider *io_provider); /* only links the default io id */
+
+ int io_block(int ret = 0) {
+ return io_block(ret, -1);
+ }
+ int io_block(int ret, int64_t io_id);
+ int io_block(int ret, const rgw_io_id& io_id);
+ void io_complete() {
+ io_complete(rgw_io_id{});
+ }
+ void io_complete(const rgw_io_id& io_id);
+};
+
+ostream& operator<<(ostream& out, const RGWCoroutine& cr);
+
+#define yield_until_true(x) \
+do { \
+ do { \
+ yield _yield_ret = x; \
+ } while (!_yield_ret); \
+ _yield_ret = false; \
+} while (0)
+
+#define drain_all() \
+ drain_cr = boost::asio::coroutine(); \
+ yield_until_true(drain_children(0))
+
+#define drain_all_but(n) \
+ drain_cr = boost::asio::coroutine(); \
+ yield_until_true(drain_children(n))
+
+#define drain_all_but_stack(stack) \
+ drain_cr = boost::asio::coroutine(); \
+ yield_until_true(drain_children(1, stack))
+
+template <class T>
+class RGWConsumerCR : public RGWCoroutine {
+ list<T> product;
+
+public:
+ explicit RGWConsumerCR(CephContext *_cct) : RGWCoroutine(_cct) {}
+
+ bool has_product() {
+ return !product.empty();
+ }
+
+ void wait_for_product() {
+ if (!has_product()) {
+ set_sleeping(true);
+ }
+ }
+
+ bool consume(T *p) {
+ if (product.empty()) {
+ return false;
+ }
+ *p = product.front();
+ product.pop_front();
+ return true;
+ }
+
+ void receive(const T& p, bool wakeup = true);
+ void receive(list<T>& l, bool wakeup = true);
+};
+
+class RGWCoroutinesStack : public RefCountedObject {
+ friend class RGWCoroutine;
+ friend class RGWCoroutinesManager;
+
+ CephContext *cct;
+
+ RGWCoroutinesManager *ops_mgr;
+
+ list<RGWCoroutine *> ops;
+ list<RGWCoroutine *>::iterator pos;
+
+ rgw_spawned_stacks spawned;
+
+ set<RGWCoroutinesStack *> blocked_by_stack;
+ set<RGWCoroutinesStack *> blocking_stacks;
+
+ map<int64_t, rgw_io_id> io_finish_ids;
+ rgw_io_id io_blocked_id;
+
+ bool done_flag;
+ bool error_flag;
+ bool blocked_flag;
+ bool sleep_flag;
+ bool interval_wait_flag;
+
+ bool is_scheduled;
+
+ bool is_waiting_for_child;
+
+ int retcode;
+
+ uint64_t run_count;
+
+protected:
+ RGWCoroutinesEnv *env;
+ RGWCoroutinesStack *parent;
+
+ RGWCoroutinesStack *spawn(RGWCoroutine *source_op, RGWCoroutine *next_op, bool wait);
+ bool collect(RGWCoroutine *op, int *ret, RGWCoroutinesStack *skip_stack); /* returns true if needs to be called again */
+ bool collect_next(RGWCoroutine *op, int *ret, RGWCoroutinesStack **collected_stack); /* returns true if found a stack to collect */
+public:
+ RGWCoroutinesStack(CephContext *_cct, RGWCoroutinesManager *_ops_mgr, RGWCoroutine *start = NULL);
+ ~RGWCoroutinesStack() override;
+
+ int operate(RGWCoroutinesEnv *env);
+
+ bool is_done() {
+ return done_flag;
+ }
+ bool is_error() {
+ return error_flag;
+ }
+ bool is_blocked_by_stack() {
+ return !blocked_by_stack.empty();
+ }
+ void set_io_blocked(bool flag) {
+ blocked_flag = flag;
+ }
+ void set_io_blocked_id(const rgw_io_id& io_id) {
+ io_blocked_id = io_id;
+ }
+ bool is_io_blocked() {
+ return blocked_flag && !done_flag;
+ }
+ bool can_io_unblock(const rgw_io_id& io_id) {
+ return ((io_blocked_id.id < 0) ||
+ io_blocked_id.intersects(io_id));
+ }
+ bool try_io_unblock(const rgw_io_id& io_id);
+ bool consume_io_finish(const rgw_io_id& io_id);
+ void set_interval_wait(bool flag) {
+ interval_wait_flag = flag;
+ }
+ bool is_interval_waiting() {
+ return interval_wait_flag;
+ }
+ void set_sleeping(bool flag) {
+ bool wakeup = sleep_flag & !flag;
+ sleep_flag = flag;
+ if (wakeup) {
+ schedule();
+ }
+ }
+ bool is_sleeping() {
+ return sleep_flag;
+ }
+ void set_is_scheduled(bool flag) {
+ is_scheduled = flag;
+ }
+
+ bool is_blocked() {
+ return is_blocked_by_stack() || is_sleeping() ||
+ is_io_blocked() || waiting_for_child() ;
+ }
+
+ void schedule();
+ void _schedule();
+
+ int get_ret_status() {
+ return retcode;
+ }
+
+ string error_str();
+
+ void call(RGWCoroutine *next_op);
+ RGWCoroutinesStack *spawn(RGWCoroutine *next_op, bool wait);
+ int unwind(int retcode);
+
+ int wait(const utime_t& interval);
+ void wakeup();
+ void io_complete() {
+ io_complete(rgw_io_id{});
+ }
+ void io_complete(const rgw_io_id& io_id);
+
+ bool collect(int *ret, RGWCoroutinesStack *skip_stack); /* returns true if needs to be called again */
+
+ void cancel();
+
+ RGWAioCompletionNotifier *create_completion_notifier();
+ template <typename T>
+ RGWAioCompletionNotifier *create_completion_notifier(T value);
+ RGWCompletionManager *get_completion_mgr();
+
+ void set_blocked_by(RGWCoroutinesStack *s) {
+ blocked_by_stack.insert(s);
+ s->blocking_stacks.insert(this);
+ }
+
+ void set_wait_for_child(bool flag) {
+ is_waiting_for_child = flag;
+ }
+
+ bool waiting_for_child() {
+ return is_waiting_for_child;
+ }
+
+ bool unblock_stack(RGWCoroutinesStack **s);
+
+ RGWCoroutinesEnv *get_env() const { return env; }
+
+ void dump(Formatter *f) const;
+
+ void init_new_io(RGWIOProvider *io_provider);
+};
+
+template <class T>
+void RGWConsumerCR<T>::receive(list<T>& l, bool wakeup)
+{
+ product.splice(product.end(), l);
+ if (wakeup) {
+ set_sleeping(false);
+ }
+}
+
+
+template <class T>
+void RGWConsumerCR<T>::receive(const T& p, bool wakeup)
+{
+ product.push_back(p);
+ if (wakeup) {
+ set_sleeping(false);
+ }
+}
+
+class RGWCoroutinesManagerRegistry : public RefCountedObject, public AdminSocketHook {
+ CephContext *cct;
+
+ set<RGWCoroutinesManager *> managers;
+ RWLock lock;
+
+ string admin_command;
+
+public:
+ explicit RGWCoroutinesManagerRegistry(CephContext *_cct) : cct(_cct), lock("RGWCoroutinesRegistry::lock") {}
+ ~RGWCoroutinesManagerRegistry() override;
+
+ void add(RGWCoroutinesManager *mgr);
+ void remove(RGWCoroutinesManager *mgr);
+
+ int hook_to_admin_command(const string& command);
+ bool call(std::string_view command, const cmdmap_t& cmdmap,
+ std::string_view format, bufferlist& out) override;
+
+ void dump(Formatter *f) const;
+};
+
+class RGWCoroutinesManager {
+ CephContext *cct;
+ std::atomic<bool> going_down = { false };
+
+ std::atomic<int64_t> run_context_count = { 0 };
+ map<uint64_t, set<RGWCoroutinesStack *> > run_contexts;
+
+ std::atomic<int64_t> max_io_id = { 0 };
+
+ RWLock lock;
+
+ RGWIOIDProvider io_id_provider;
+
+ void handle_unblocked_stack(set<RGWCoroutinesStack *>& context_stacks, list<RGWCoroutinesStack *>& scheduled_stacks,
+ RGWCompletionManager::io_completion& io, int *waiting_count);
+protected:
+ RGWCompletionManager *completion_mgr;
+ RGWCoroutinesManagerRegistry *cr_registry;
+
+ int ops_window;
+
+ string id;
+
+ void put_completion_notifier(RGWAioCompletionNotifier *cn);
+public:
+ RGWCoroutinesManager(CephContext *_cct, RGWCoroutinesManagerRegistry *_cr_registry) : cct(_cct), lock("RGWCoroutinesManager::lock"),
+ cr_registry(_cr_registry), ops_window(RGW_ASYNC_OPS_MGR_WINDOW) {
+ completion_mgr = new RGWCompletionManager(cct);
+ if (cr_registry) {
+ cr_registry->add(this);
+ }
+ }
+ virtual ~RGWCoroutinesManager() {
+ stop();
+ completion_mgr->put();
+ if (cr_registry) {
+ cr_registry->remove(this);
+ }
+ }
+
+ int run(list<RGWCoroutinesStack *>& ops);
+ int run(RGWCoroutine *op);
+ void stop() {
+ bool expected = false;
+ if (going_down.compare_exchange_strong(expected, true)) {
+ completion_mgr->go_down();
+ }
+ }
+
+ virtual void report_error(RGWCoroutinesStack *op);
+
+ RGWAioCompletionNotifier *create_completion_notifier(RGWCoroutinesStack *stack);
+ template <typename T>
+ RGWAioCompletionNotifier *create_completion_notifier(RGWCoroutinesStack *stack, T value);
+ RGWCompletionManager *get_completion_mgr() { return completion_mgr; }
+
+ void schedule(RGWCoroutinesEnv *env, RGWCoroutinesStack *stack);
+ void _schedule(RGWCoroutinesEnv *env, RGWCoroutinesStack *stack);
+ RGWCoroutinesStack *allocate_stack();
+
+ int64_t get_next_io_id();
+
+ void set_sleeping(RGWCoroutine *cr, bool flag);
+ void io_complete(RGWCoroutine *cr, const rgw_io_id& io_id);
+
+ virtual string get_id();
+ void dump(Formatter *f) const;
+
+ RGWIOIDProvider& get_io_id_provider() {
+ return io_id_provider;
+ }
+};
+
+template <typename T>
+RGWAioCompletionNotifier *RGWCoroutinesManager::create_completion_notifier(RGWCoroutinesStack *stack, T value)
+{
+ rgw_io_id io_id{get_next_io_id(), -1};
+ RGWAioCompletionNotifier *cn = new RGWAioCompletionNotifierWith<T>(completion_mgr, io_id, (void *)stack, std::move(value));
+ completion_mgr->register_completion_notifier(cn);
+ return cn;
+}
+
+template <typename T>
+RGWAioCompletionNotifier *RGWCoroutinesStack::create_completion_notifier(T value)
+{
+ return ops_mgr->create_completion_notifier(this, std::move(value));
+}
+
+class RGWSimpleCoroutine : public RGWCoroutine {
+ bool called_cleanup;
+
+ int operate() override;
+
+ int state_init();
+ int state_send_request();
+ int state_request_complete();
+ int state_all_complete();
+
+ void call_cleanup();
+
+public:
+ RGWSimpleCoroutine(CephContext *_cct) : RGWCoroutine(_cct), called_cleanup(false) {}
+ ~RGWSimpleCoroutine() override;
+
+ virtual int init() { return 0; }
+ virtual int send_request() = 0;
+ virtual int request_complete() = 0;
+ virtual int finish() { return 0; }
+ virtual void request_cleanup() {}
+};
+
+#endif
diff --git a/src/rgw/rgw_cors.cc b/src/rgw/rgw_cors.cc
new file mode 100644
index 00000000..bfe83d64
--- /dev/null
+++ b/src/rgw/rgw_cors.cc
@@ -0,0 +1,194 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2013 eNovance SAS <licensing@enovance.com>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include <string.h>
+
+#include <iostream>
+#include <map>
+
+#include <boost/algorithm/string.hpp>
+
+#include "include/types.h"
+#include "common/debug.h"
+#include "include/str_list.h"
+#include "common/Formatter.h"
+
+#include "rgw_cors.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+void RGWCORSRule::dump_origins() {
+ unsigned num_origins = allowed_origins.size();
+ dout(10) << "Allowed origins : " << num_origins << dendl;
+ for(set<string>::iterator it = allowed_origins.begin();
+ it != allowed_origins.end();
+ ++it) {
+ dout(10) << *it << "," << dendl;
+ }
+}
+
+void RGWCORSRule::erase_origin_if_present(string& origin, bool *rule_empty) {
+ set<string>::iterator it = allowed_origins.find(origin);
+ if (!rule_empty)
+ return;
+ *rule_empty = false;
+ if (it != allowed_origins.end()) {
+ dout(10) << "Found origin " << origin << ", set size:" <<
+ allowed_origins.size() << dendl;
+ allowed_origins.erase(it);
+ *rule_empty = (allowed_origins.empty());
+ }
+}
+
+/*
+ * make attrs look-like-this
+ * does not convert underscores or dashes
+ *
+ * Per CORS specification, section 3:
+ * ===
+ * "Converting a string to ASCII lowercase" means replacing all characters in the
+ * range U+0041 LATIN CAPITAL LETTER A to U+005A LATIN CAPITAL LETTER Z with
+ * the corresponding characters in the range U+0061 LATIN SMALL LETTER A to
+ * U+007A LATIN SMALL LETTER Z).
+ * ===
+ *
+ * @todo When UTF-8 is allowed in HTTP headers, this function will need to change
+ */
+string lowercase_http_attr(const string& orig)
+{
+ const char *s = orig.c_str();
+ char buf[orig.size() + 1];
+ buf[orig.size()] = '\0';
+
+ for (size_t i = 0; i < orig.size(); ++i, ++s) {
+ buf[i] = tolower(*s);
+ }
+ return string(buf);
+}
+
+
+static bool is_string_in_set(set<string>& s, string h) {
+ if ((s.find("*") != s.end()) ||
+ (s.find(h) != s.end())) {
+ return true;
+ }
+ /* The header can be Content-*-type, or Content-* */
+ for(set<string>::iterator it = s.begin();
+ it != s.end(); ++it) {
+ size_t off;
+ if ((off = (*it).find("*"))!=string::npos) {
+ list<string> ssplit;
+ unsigned flen = 0;
+
+ get_str_list((*it), "* \t", ssplit);
+ if (off != 0) {
+ string sl = ssplit.front();
+ flen = sl.length();
+ dout(10) << "Finding " << sl << ", in " << h << ", at offset 0" << dendl;
+ if (!boost::algorithm::starts_with(h,sl))
+ continue;
+ ssplit.pop_front();
+ }
+ if (off != ((*it).length() - 1)) {
+ string sl = ssplit.front();
+ dout(10) << "Finding " << sl << ", in " << h
+ << ", at offset not less than " << flen << dendl;
+ if (h.size() < sl.size() ||
+ h.compare((h.size() - sl.size()), sl.size(), sl) != 0)
+ continue;
+ ssplit.pop_front();
+ }
+ if (!ssplit.empty())
+ continue;
+ return true;
+ }
+ }
+ return false;
+}
+
+bool RGWCORSRule::has_wildcard_origin() {
+ if (allowed_origins.find("*") != allowed_origins.end())
+ return true;
+
+ return false;
+}
+
+bool RGWCORSRule::is_origin_present(const char *o) {
+ string origin = o;
+ return is_string_in_set(allowed_origins, origin);
+}
+
+bool RGWCORSRule::is_header_allowed(const char *h, size_t len) {
+ string hdr(h, len);
+ if(lowercase_allowed_hdrs.empty()) {
+ set<string>::iterator iter;
+ for (iter = allowed_hdrs.begin(); iter != allowed_hdrs.end(); ++iter) {
+ lowercase_allowed_hdrs.insert(lowercase_http_attr(*iter));
+ }
+ }
+ return is_string_in_set(lowercase_allowed_hdrs, lowercase_http_attr(hdr));
+}
+
+void RGWCORSRule::format_exp_headers(string& s) {
+ s = "";
+ for (const auto& header : exposable_hdrs) {
+ if (s.length() > 0)
+ s.append(",");
+ // these values are sent to clients in a 'Access-Control-Expose-Headers'
+ // response header, so we escape '\n' and '\r' to avoid header injection
+ std::string tmp = boost::replace_all_copy(header, "\n", "\\n");
+ boost::replace_all_copy(std::back_inserter(s), tmp, "\r", "\\r");
+ }
+}
+
+RGWCORSRule * RGWCORSConfiguration::host_name_rule(const char *origin) {
+ for(list<RGWCORSRule>::iterator it_r = rules.begin();
+ it_r != rules.end(); ++it_r) {
+ RGWCORSRule& r = (*it_r);
+ if (r.is_origin_present(origin))
+ return &r;
+ }
+ return NULL;
+}
+
+void RGWCORSConfiguration::erase_host_name_rule(string& origin) {
+ bool rule_empty;
+ unsigned loop = 0;
+ /*Erase the host name from that rule*/
+ dout(10) << "Num of rules : " << rules.size() << dendl;
+ for(list<RGWCORSRule>::iterator it_r = rules.begin();
+ it_r != rules.end(); ++it_r, loop++) {
+ RGWCORSRule& r = (*it_r);
+ r.erase_origin_if_present(origin, &rule_empty);
+ dout(10) << "Origin:" << origin << ", rule num:"
+ << loop << ", emptying now:" << rule_empty << dendl;
+ if (rule_empty) {
+ rules.erase(it_r);
+ break;
+ }
+ }
+}
+
+void RGWCORSConfiguration::dump() {
+ unsigned loop = 1;
+ unsigned num_rules = rules.size();
+ dout(10) << "Number of rules: " << num_rules << dendl;
+ for(list<RGWCORSRule>::iterator it = rules.begin();
+ it!= rules.end(); ++it, loop++) {
+ dout(10) << " <<<<<<< Rule " << loop << " >>>>>>> " << dendl;
+ (*it).dump_origins();
+ }
+}
diff --git a/src/rgw/rgw_cors.h b/src/rgw/rgw_cors.h
new file mode 100644
index 00000000..62e34d45
--- /dev/null
+++ b/src/rgw/rgw_cors.h
@@ -0,0 +1,136 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2013 eNovance SAS <licensing@enovance.com>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_RGW_CORS_H
+#define CEPH_RGW_CORS_H
+
+#include <map>
+#include <string>
+#include <include/types.h>
+
+#define RGW_CORS_GET 0x1
+#define RGW_CORS_PUT 0x2
+#define RGW_CORS_HEAD 0x4
+#define RGW_CORS_POST 0x8
+#define RGW_CORS_DELETE 0x10
+#define RGW_CORS_COPY 0x20
+#define RGW_CORS_ALL (RGW_CORS_GET | \
+ RGW_CORS_PUT | \
+ RGW_CORS_HEAD | \
+ RGW_CORS_POST | \
+ RGW_CORS_DELETE | \
+ RGW_CORS_COPY)
+
+#define CORS_MAX_AGE_INVALID ((uint32_t)-1)
+
+class RGWCORSRule
+{
+protected:
+ uint32_t max_age;
+ uint8_t allowed_methods;
+ std::string id;
+ std::set<string> allowed_hdrs; /* If you change this, you need to discard lowercase_allowed_hdrs */
+ std::set<string> lowercase_allowed_hdrs; /* Not built until needed in RGWCORSRule::is_header_allowed */
+ std::set<string> allowed_origins;
+ std::list<string> exposable_hdrs;
+
+public:
+ RGWCORSRule() : max_age(CORS_MAX_AGE_INVALID),allowed_methods(0) {}
+ RGWCORSRule(std::set<string>& o, std::set<string>& h,
+ std::list<string>& e, uint8_t f, uint32_t a)
+ :max_age(a),
+ allowed_methods(f),
+ allowed_hdrs(h),
+ allowed_origins(o),
+ exposable_hdrs(e) {}
+ virtual ~RGWCORSRule() {}
+
+ std::string& get_id() { return id; }
+ uint32_t get_max_age() { return max_age; }
+ uint8_t get_allowed_methods() { return allowed_methods; }
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(max_age, bl);
+ encode(allowed_methods, bl);
+ encode(id, bl);
+ encode(allowed_hdrs, bl);
+ encode(allowed_origins, bl);
+ encode(exposable_hdrs, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(max_age, bl);
+ decode(allowed_methods, bl);
+ decode(id, bl);
+ decode(allowed_hdrs, bl);
+ decode(allowed_origins, bl);
+ decode(exposable_hdrs, bl);
+ DECODE_FINISH(bl);
+ }
+ bool has_wildcard_origin();
+ bool is_origin_present(const char *o);
+ void format_exp_headers(std::string& s);
+ void erase_origin_if_present(std::string& origin, bool *rule_empty);
+ void dump_origins();
+ void dump(Formatter *f) const;
+ bool is_header_allowed(const char *hdr, size_t len);
+};
+WRITE_CLASS_ENCODER(RGWCORSRule)
+
+class RGWCORSConfiguration
+{
+ protected:
+ std::list<RGWCORSRule> rules;
+ public:
+ RGWCORSConfiguration() {}
+ ~RGWCORSConfiguration() {}
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(rules, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(rules, bl);
+ DECODE_FINISH(bl);
+ }
+ void dump(Formatter *f) const;
+ std::list<RGWCORSRule>& get_rules() {
+ return rules;
+ }
+ bool is_empty() {
+ return rules.empty();
+ }
+ void get_origins_list(const char *origin, std::list<string>& origins);
+ RGWCORSRule * host_name_rule(const char *origin);
+ void erase_host_name_rule(std::string& origin);
+ void dump();
+ void stack_rule(RGWCORSRule& r) {
+ rules.push_front(r);
+ }
+};
+WRITE_CLASS_ENCODER(RGWCORSConfiguration)
+
+static inline int validate_name_string(string& o) {
+ if (o.length() == 0)
+ return -1;
+ if (o.find_first_of("*") != o.find_last_of("*"))
+ return -1;
+ return 0;
+}
+#endif /*CEPH_RGW_CORS_H*/
diff --git a/src/rgw/rgw_cors_s3.cc b/src/rgw/rgw_cors_s3.cc
new file mode 100644
index 00000000..fe7bd438
--- /dev/null
+++ b/src/rgw/rgw_cors_s3.cc
@@ -0,0 +1,245 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2013 eNovance SAS <licensing@enovance.com>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include <string.h>
+#include <limits.h>
+
+#include <iostream>
+#include <map>
+
+#include "include/types.h"
+
+#include "rgw_cors_s3.h"
+#include "rgw_user.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+
+void RGWCORSRule_S3::to_xml(XMLFormatter& f) {
+
+ f.open_object_section("CORSRule");
+ /*ID if present*/
+ if (id.length() > 0) {
+ f.dump_string("ID", id);
+ }
+ /*AllowedMethods*/
+ if (allowed_methods & RGW_CORS_GET)
+ f.dump_string("AllowedMethod", "GET");
+ if (allowed_methods & RGW_CORS_PUT)
+ f.dump_string("AllowedMethod", "PUT");
+ if (allowed_methods & RGW_CORS_DELETE)
+ f.dump_string("AllowedMethod", "DELETE");
+ if (allowed_methods & RGW_CORS_HEAD)
+ f.dump_string("AllowedMethod", "HEAD");
+ if (allowed_methods & RGW_CORS_POST)
+ f.dump_string("AllowedMethod", "POST");
+ if (allowed_methods & RGW_CORS_COPY)
+ f.dump_string("AllowedMethod", "COPY");
+ /*AllowedOrigins*/
+ for(set<string>::iterator it = allowed_origins.begin();
+ it != allowed_origins.end();
+ ++it) {
+ string host = *it;
+ f.dump_string("AllowedOrigin", host);
+ }
+ /*AllowedHeader*/
+ for(set<string>::iterator it = allowed_hdrs.begin();
+ it != allowed_hdrs.end(); ++it) {
+ f.dump_string("AllowedHeader", *it);
+ }
+ /*MaxAgeSeconds*/
+ if (max_age != CORS_MAX_AGE_INVALID) {
+ f.dump_unsigned("MaxAgeSeconds", max_age);
+ }
+ /*ExposeHeader*/
+ for(list<string>::iterator it = exposable_hdrs.begin();
+ it != exposable_hdrs.end(); ++it) {
+ f.dump_string("ExposeHeader", *it);
+ }
+ f.close_section();
+}
+
+bool RGWCORSRule_S3::xml_end(const char *el) {
+ XMLObjIter iter = find("AllowedMethod");
+ XMLObj *obj;
+ /*Check all the allowedmethods*/
+ obj = iter.get_next();
+ if (obj) {
+ for( ; obj; obj = iter.get_next()) {
+ const char *s = obj->get_data().c_str();
+ dout(10) << "RGWCORSRule::xml_end, el : " << el << ", data : " << s << dendl;
+ if (strcasecmp(s, "GET") == 0) {
+ allowed_methods |= RGW_CORS_GET;
+ } else if (strcasecmp(s, "POST") == 0) {
+ allowed_methods |= RGW_CORS_POST;
+ } else if (strcasecmp(s, "DELETE") == 0) {
+ allowed_methods |= RGW_CORS_DELETE;
+ } else if (strcasecmp(s, "HEAD") == 0) {
+ allowed_methods |= RGW_CORS_HEAD;
+ } else if (strcasecmp(s, "PUT") == 0) {
+ allowed_methods |= RGW_CORS_PUT;
+ } else if (strcasecmp(s, "COPY") == 0) {
+ allowed_methods |= RGW_CORS_COPY;
+ } else {
+ return false;
+ }
+ }
+ }
+ /*Check the id's len, it should be less than 255*/
+ XMLObj *xml_id = find_first("ID");
+ if (xml_id != NULL) {
+ string data = xml_id->get_data();
+ if (data.length() > 255) {
+ dout(0) << "RGWCORSRule has id of length greater than 255" << dendl;
+ return false;
+ }
+ dout(10) << "RGWCORRule id : " << data << dendl;
+ id = data;
+ }
+ /*Check if there is atleast one AllowedOrigin*/
+ iter = find("AllowedOrigin");
+ if (!(obj = iter.get_next())) {
+ dout(0) << "RGWCORSRule does not have even one AllowedOrigin" << dendl;
+ return false;
+ }
+ for( ; obj; obj = iter.get_next()) {
+ dout(10) << "RGWCORSRule - origin : " << obj->get_data() << dendl;
+ /*Just take the hostname*/
+ string host = obj->get_data();
+ if (validate_name_string(host) != 0)
+ return false;
+ allowed_origins.insert(allowed_origins.end(), host);
+ }
+ /*Check of max_age*/
+ iter = find("MaxAgeSeconds");
+ if ((obj = iter.get_next())) {
+ char *end = NULL;
+
+ unsigned long long ull = strtoull(obj->get_data().c_str(), &end, 10);
+ if (*end != '\0') {
+ dout(0) << "RGWCORSRule's MaxAgeSeconds " << obj->get_data() << " is an invalid integer" << dendl;
+ return false;
+ }
+ if (ull >= 0x100000000ull) {
+ max_age = CORS_MAX_AGE_INVALID;
+ } else {
+ max_age = (uint32_t)ull;
+ }
+ dout(10) << "RGWCORSRule : max_age : " << max_age << dendl;
+ }
+ /*Check and update ExposeHeader*/
+ iter = find("ExposeHeader");
+ if ((obj = iter.get_next())) {
+ for(; obj; obj = iter.get_next()) {
+ dout(10) << "RGWCORSRule - exp_hdr : " << obj->get_data() << dendl;
+ exposable_hdrs.push_back(obj->get_data());
+ }
+ }
+ /*Check and update AllowedHeader*/
+ iter = find("AllowedHeader");
+ if ((obj = iter.get_next())) {
+ for(; obj; obj = iter.get_next()) {
+ dout(10) << "RGWCORSRule - allowed_hdr : " << obj->get_data() << dendl;
+ string s = obj->get_data();
+ if (validate_name_string(s) != 0)
+ return false;
+ allowed_hdrs.insert(allowed_hdrs.end(), s);
+ }
+ }
+ return true;
+}
+
+void RGWCORSConfiguration_S3::to_xml(ostream& out) {
+ XMLFormatter f;
+ f.open_object_section_in_ns("CORSConfiguration", XMLNS_AWS_S3);
+ for(list<RGWCORSRule>::iterator it = rules.begin();
+ it != rules.end(); ++it) {
+ (static_cast<RGWCORSRule_S3 &>(*it)).to_xml(f);
+ }
+ f.close_section();
+ f.flush(out);
+}
+
+bool RGWCORSConfiguration_S3::xml_end(const char *el) {
+ XMLObjIter iter = find("CORSRule");
+ RGWCORSRule_S3 *obj;
+ if (!(obj = static_cast<RGWCORSRule_S3 *>(iter.get_next()))) {
+ dout(0) << "CORSConfiguration should have atleast one CORSRule" << dendl;
+ return false;
+ }
+ for(; obj; obj = static_cast<RGWCORSRule_S3 *>(iter.get_next())) {
+ rules.push_back(*obj);
+ }
+ return true;
+}
+
+class CORSRuleID_S3 : public XMLObj {
+ public:
+ CORSRuleID_S3() {}
+ ~CORSRuleID_S3() override {}
+};
+
+class CORSRuleAllowedOrigin_S3 : public XMLObj {
+ public:
+ CORSRuleAllowedOrigin_S3() {}
+ ~CORSRuleAllowedOrigin_S3() override {}
+};
+
+class CORSRuleAllowedMethod_S3 : public XMLObj {
+ public:
+ CORSRuleAllowedMethod_S3() {}
+ ~CORSRuleAllowedMethod_S3() override {}
+};
+
+class CORSRuleAllowedHeader_S3 : public XMLObj {
+ public:
+ CORSRuleAllowedHeader_S3() {}
+ ~CORSRuleAllowedHeader_S3() override {}
+};
+
+class CORSRuleMaxAgeSeconds_S3 : public XMLObj {
+ public:
+ CORSRuleMaxAgeSeconds_S3() {}
+ ~CORSRuleMaxAgeSeconds_S3() override {}
+};
+
+class CORSRuleExposeHeader_S3 : public XMLObj {
+ public:
+ CORSRuleExposeHeader_S3() {}
+ ~CORSRuleExposeHeader_S3() override {}
+};
+
+XMLObj *RGWCORSXMLParser_S3::alloc_obj(const char *el) {
+ if (strcmp(el, "CORSConfiguration") == 0) {
+ return new RGWCORSConfiguration_S3;
+ } else if (strcmp(el, "CORSRule") == 0) {
+ return new RGWCORSRule_S3;
+ } else if (strcmp(el, "ID") == 0) {
+ return new CORSRuleID_S3;
+ } else if (strcmp(el, "AllowedOrigin") == 0) {
+ return new CORSRuleAllowedOrigin_S3;
+ } else if (strcmp(el, "AllowedMethod") == 0) {
+ return new CORSRuleAllowedMethod_S3;
+ } else if (strcmp(el, "AllowedHeader") == 0) {
+ return new CORSRuleAllowedHeader_S3;
+ } else if (strcmp(el, "MaxAgeSeconds") == 0) {
+ return new CORSRuleMaxAgeSeconds_S3;
+ } else if (strcmp(el, "ExposeHeader") == 0) {
+ return new CORSRuleExposeHeader_S3;
+ }
+ return NULL;
+}
+
diff --git a/src/rgw/rgw_cors_s3.h b/src/rgw/rgw_cors_s3.h
new file mode 100644
index 00000000..9097e5f1
--- /dev/null
+++ b/src/rgw/rgw_cors_s3.h
@@ -0,0 +1,56 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2013 eNovance SAS <licensing@enovance.com>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_RGW_CORS_S3_H
+#define CEPH_RGW_CORS_S3_H
+
+#include <map>
+#include <string>
+#include <iosfwd>
+
+#include <include/types.h>
+#include <common/Formatter.h>
+#include "rgw_xml.h"
+#include "rgw_cors.h"
+
+class RGWCORSRule_S3 : public RGWCORSRule, public XMLObj
+{
+ public:
+ RGWCORSRule_S3() {}
+ ~RGWCORSRule_S3() override {}
+
+ bool xml_end(const char *el) override;
+ void to_xml(XMLFormatter& f);
+};
+
+class RGWCORSConfiguration_S3 : public RGWCORSConfiguration, public XMLObj
+{
+ public:
+ RGWCORSConfiguration_S3() {}
+ ~RGWCORSConfiguration_S3() override {}
+
+ bool xml_end(const char *el) override;
+ void to_xml(ostream& out);
+};
+
+class RGWCORSXMLParser_S3 : public RGWXMLParser
+{
+ CephContext *cct;
+
+ XMLObj *alloc_obj(const char *el) override;
+public:
+ explicit RGWCORSXMLParser_S3(CephContext *_cct) : cct(_cct) {}
+};
+#endif /*CEPH_RGW_CORS_S3_H*/
diff --git a/src/rgw/rgw_cors_swift.h b/src/rgw/rgw_cors_swift.h
new file mode 100644
index 00000000..da5a2afc
--- /dev/null
+++ b/src/rgw/rgw_cors_swift.h
@@ -0,0 +1,76 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2013 eNovance SAS <licensing@enovance.com>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_RGW_CORS_SWIFT3_H
+#define CEPH_RGW_CORS_SWIFT3_H
+
+#include <map>
+#include <string>
+#include <vector>
+#include <include/types.h>
+#include <include/str_list.h>
+
+#include "rgw_cors.h"
+
+class RGWCORSConfiguration_SWIFT : public RGWCORSConfiguration
+{
+ public:
+ RGWCORSConfiguration_SWIFT() {}
+ ~RGWCORSConfiguration_SWIFT() {}
+ int create_update(const char *allow_origins, const char *allow_headers,
+ const char *expose_headers, const char *max_age) {
+ set<string> o, h, oc;
+ list<string> e;
+ unsigned long a = CORS_MAX_AGE_INVALID;
+ uint8_t flags = RGW_CORS_ALL;
+
+ string ao = allow_origins;
+ get_str_set(ao, oc);
+ if (oc.empty())
+ return -EINVAL;
+ for(set<string>::iterator it = oc.begin(); it != oc.end(); ++it) {
+ string host = *it;
+ if (validate_name_string(host) != 0)
+ return -EINVAL;
+ o.insert(o.end(), host);
+ }
+ if (allow_headers) {
+ string ah = allow_headers;
+ get_str_set(ah, h);
+ for(set<string>::iterator it = h.begin();
+ it != h.end(); ++it) {
+ string s = (*it);
+ if (validate_name_string(s) != 0)
+ return -EINVAL;
+ }
+ }
+
+ if (expose_headers) {
+ string eh = expose_headers;
+ get_str_list(eh, e);
+ }
+ if (max_age) {
+ char *end = NULL;
+ a = strtoul(max_age, &end, 10);
+ if (a == ULONG_MAX)
+ a = CORS_MAX_AGE_INVALID;
+ }
+
+ RGWCORSRule rule(o, h, e, flags, a);
+ stack_rule(rule);
+ return 0;
+ }
+};
+#endif /*CEPH_RGW_CORS_SWIFT3_H*/
diff --git a/src/rgw/rgw_cr_rados.cc b/src/rgw/rgw_cr_rados.cc
new file mode 100644
index 00000000..66d05e08
--- /dev/null
+++ b/src/rgw/rgw_cr_rados.cc
@@ -0,0 +1,916 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "include/compat.h"
+#include "rgw_rados.h"
+#include "rgw_zone.h"
+#include "rgw_coroutine.h"
+#include "rgw_cr_rados.h"
+#include "rgw_sync_counters.h"
+
+#include "services/svc_zone.h"
+#include "services/svc_zone_utils.h"
+#include "services/svc_sys_obj.h"
+
+#include "cls/lock/cls_lock_client.h"
+#include "cls/rgw/cls_rgw_client.h"
+
+#include <boost/asio/yield.hpp>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+bool RGWAsyncRadosProcessor::RGWWQ::_enqueue(RGWAsyncRadosRequest *req) {
+ if (processor->is_going_down()) {
+ return false;
+ }
+ req->get();
+ processor->m_req_queue.push_back(req);
+ dout(20) << "enqueued request req=" << hex << req << dec << dendl;
+ _dump_queue();
+ return true;
+}
+
+bool RGWAsyncRadosProcessor::RGWWQ::_empty() {
+ return processor->m_req_queue.empty();
+}
+
+RGWAsyncRadosRequest *RGWAsyncRadosProcessor::RGWWQ::_dequeue() {
+ if (processor->m_req_queue.empty())
+ return NULL;
+ RGWAsyncRadosRequest *req = processor->m_req_queue.front();
+ processor->m_req_queue.pop_front();
+ dout(20) << "dequeued request req=" << hex << req << dec << dendl;
+ _dump_queue();
+ return req;
+}
+
+void RGWAsyncRadosProcessor::RGWWQ::_process(RGWAsyncRadosRequest *req, ThreadPool::TPHandle& handle) {
+ processor->handle_request(req);
+ processor->req_throttle.put(1);
+}
+
+void RGWAsyncRadosProcessor::RGWWQ::_dump_queue() {
+ if (!g_conf()->subsys.should_gather<ceph_subsys_rgw, 20>()) {
+ return;
+ }
+ deque<RGWAsyncRadosRequest *>::iterator iter;
+ if (processor->m_req_queue.empty()) {
+ dout(20) << "RGWWQ: empty" << dendl;
+ return;
+ }
+ dout(20) << "RGWWQ:" << dendl;
+ for (iter = processor->m_req_queue.begin(); iter != processor->m_req_queue.end(); ++iter) {
+ dout(20) << "req: " << hex << *iter << dec << dendl;
+ }
+}
+
+RGWAsyncRadosProcessor::RGWAsyncRadosProcessor(RGWRados *_store, int num_threads)
+ : store(_store), m_tp(store->ctx(), "RGWAsyncRadosProcessor::m_tp", "rados_async", num_threads),
+ req_throttle(store->ctx(), "rgw_async_rados_ops", num_threads * 2),
+ req_wq(this, g_conf()->rgw_op_thread_timeout,
+ g_conf()->rgw_op_thread_suicide_timeout, &m_tp) {
+}
+
+void RGWAsyncRadosProcessor::start() {
+ m_tp.start();
+}
+
+void RGWAsyncRadosProcessor::stop() {
+ going_down = true;
+ m_tp.drain(&req_wq);
+ m_tp.stop();
+ for (auto iter = m_req_queue.begin(); iter != m_req_queue.end(); ++iter) {
+ (*iter)->put();
+ }
+}
+
+void RGWAsyncRadosProcessor::handle_request(RGWAsyncRadosRequest *req) {
+ req->send_request();
+ req->put();
+}
+
+void RGWAsyncRadosProcessor::queue(RGWAsyncRadosRequest *req) {
+ req_throttle.get(1);
+ req_wq.queue(req);
+}
+
+int RGWAsyncGetSystemObj::_send_request()
+{
+ map<string, bufferlist> *pattrs = want_attrs ? &attrs : nullptr;
+
+ auto sysobj = obj_ctx.get_obj(obj);
+ return sysobj.rop()
+ .set_objv_tracker(&objv_tracker)
+ .set_attrs(pattrs)
+ .set_raw_attrs(raw_attrs)
+ .read(&bl);
+}
+
+RGWAsyncGetSystemObj::RGWAsyncGetSystemObj(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, RGWSI_SysObj *_svc,
+ RGWObjVersionTracker *_objv_tracker, const rgw_raw_obj& _obj,
+ bool want_attrs, bool raw_attrs)
+ : RGWAsyncRadosRequest(caller, cn), obj_ctx(_svc),
+ obj(_obj), want_attrs(want_attrs), raw_attrs(raw_attrs)
+{
+ if (_objv_tracker) {
+ objv_tracker = *_objv_tracker;
+ }
+}
+
+int RGWSimpleRadosReadAttrsCR::send_request()
+{
+ req = new RGWAsyncGetSystemObj(this, stack->create_completion_notifier(),
+ svc, nullptr, obj, true, raw_attrs);
+ async_rados->queue(req);
+ return 0;
+}
+
+int RGWSimpleRadosReadAttrsCR::request_complete()
+{
+ if (pattrs) {
+ *pattrs = std::move(req->attrs);
+ }
+ return req->get_ret_status();
+}
+
+int RGWAsyncPutSystemObj::_send_request()
+{
+ auto obj_ctx = svc->init_obj_ctx();
+ auto sysobj = obj_ctx.get_obj(obj);
+ return sysobj.wop()
+ .set_objv_tracker(&objv_tracker)
+ .set_exclusive(exclusive)
+ .write_data(bl);
+}
+
+RGWAsyncPutSystemObj::RGWAsyncPutSystemObj(RGWCoroutine *caller, RGWAioCompletionNotifier *cn,
+ RGWSI_SysObj *_svc,
+ RGWObjVersionTracker *_objv_tracker, const rgw_raw_obj& _obj,
+ bool _exclusive, bufferlist _bl)
+ : RGWAsyncRadosRequest(caller, cn), svc(_svc),
+ obj(_obj), exclusive(_exclusive), bl(std::move(_bl))
+{
+ if (_objv_tracker) {
+ objv_tracker = *_objv_tracker;
+ }
+}
+
+int RGWAsyncPutSystemObjAttrs::_send_request()
+{
+ auto obj_ctx = svc->init_obj_ctx();
+ auto sysobj = obj_ctx.get_obj(obj);
+ return sysobj.wop()
+ .set_objv_tracker(&objv_tracker)
+ .set_exclusive(false)
+ .set_attrs(attrs)
+ .write_attrs();
+}
+
+RGWAsyncPutSystemObjAttrs::RGWAsyncPutSystemObjAttrs(RGWCoroutine *caller, RGWAioCompletionNotifier *cn,
+ RGWSI_SysObj *_svc,
+ RGWObjVersionTracker *_objv_tracker, const rgw_raw_obj& _obj,
+ map<string, bufferlist> _attrs)
+ : RGWAsyncRadosRequest(caller, cn), svc(_svc),
+ obj(_obj), attrs(std::move(_attrs))
+{
+ if (_objv_tracker) {
+ objv_tracker = *_objv_tracker;
+ }
+}
+
+
+RGWOmapAppend::RGWOmapAppend(RGWAsyncRadosProcessor *_async_rados, RGWRados *_store, const rgw_raw_obj& _obj,
+ uint64_t _window_size)
+ : RGWConsumerCR<string>(_store->ctx()), async_rados(_async_rados),
+ store(_store), obj(_obj), going_down(false), num_pending_entries(0), window_size(_window_size), total_entries(0)
+{
+}
+
+int RGWAsyncLockSystemObj::_send_request()
+{
+ rgw_rados_ref ref;
+ int r = store->get_raw_obj_ref(obj, &ref);
+ if (r < 0) {
+ lderr(store->ctx()) << "ERROR: failed to get ref for (" << obj << ") ret=" << r << dendl;
+ return r;
+ }
+
+ rados::cls::lock::Lock l(lock_name);
+ utime_t duration(duration_secs, 0);
+ l.set_duration(duration);
+ l.set_cookie(cookie);
+ l.set_may_renew(true);
+
+ return l.lock_exclusive(&ref.ioctx, ref.obj.oid);
+}
+
+RGWAsyncLockSystemObj::RGWAsyncLockSystemObj(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, RGWRados *_store,
+ RGWObjVersionTracker *_objv_tracker, const rgw_raw_obj& _obj,
+ const string& _name, const string& _cookie, uint32_t _duration_secs) : RGWAsyncRadosRequest(caller, cn), store(_store),
+ obj(_obj),
+ lock_name(_name),
+ cookie(_cookie),
+ duration_secs(_duration_secs)
+{
+}
+
+int RGWAsyncUnlockSystemObj::_send_request()
+{
+ rgw_rados_ref ref;
+ int r = store->get_raw_obj_ref(obj, &ref);
+ if (r < 0) {
+ lderr(store->ctx()) << "ERROR: failed to get ref for (" << obj << ") ret=" << r << dendl;
+ return r;
+ }
+
+ rados::cls::lock::Lock l(lock_name);
+
+ l.set_cookie(cookie);
+
+ return l.unlock(&ref.ioctx, ref.obj.oid);
+}
+
+RGWAsyncUnlockSystemObj::RGWAsyncUnlockSystemObj(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, RGWRados *_store,
+ RGWObjVersionTracker *_objv_tracker, const rgw_raw_obj& _obj,
+ const string& _name, const string& _cookie) : RGWAsyncRadosRequest(caller, cn), store(_store),
+ obj(_obj),
+ lock_name(_name), cookie(_cookie)
+{
+}
+
+RGWRadosSetOmapKeysCR::RGWRadosSetOmapKeysCR(RGWRados *_store,
+ const rgw_raw_obj& _obj,
+ map<string, bufferlist>& _entries) : RGWSimpleCoroutine(_store->ctx()),
+ store(_store),
+ entries(_entries),
+ obj(_obj), cn(NULL)
+{
+ stringstream& s = set_description();
+ s << "set omap keys dest=" << obj << " keys=[" << s.str() << "]";
+ for (auto i = entries.begin(); i != entries.end(); ++i) {
+ if (i != entries.begin()) {
+ s << ", ";
+ }
+ s << i->first;
+ }
+ s << "]";
+}
+
+int RGWRadosSetOmapKeysCR::send_request()
+{
+ int r = store->get_raw_obj_ref(obj, &ref);
+ if (r < 0) {
+ lderr(store->ctx()) << "ERROR: failed to get ref for (" << obj << ") ret=" << r << dendl;
+ return r;
+ }
+
+ set_status() << "sending request";
+
+ librados::ObjectWriteOperation op;
+ op.omap_set(entries);
+
+ cn = stack->create_completion_notifier();
+ return ref.ioctx.aio_operate(ref.obj.oid, cn->completion(), &op);
+}
+
+int RGWRadosSetOmapKeysCR::request_complete()
+{
+ int r = cn->completion()->get_return_value();
+
+ set_status() << "request complete; ret=" << r;
+
+ return r;
+}
+
+RGWRadosGetOmapKeysCR::RGWRadosGetOmapKeysCR(RGWRados *_store,
+ const rgw_raw_obj& _obj,
+ const string& _marker,
+ int _max_entries,
+ ResultPtr _result)
+ : RGWSimpleCoroutine(_store->ctx()), store(_store), obj(_obj),
+ marker(_marker), max_entries(_max_entries),
+ result(std::move(_result))
+{
+ ceph_assert(result); // must be allocated
+ set_description() << "get omap keys dest=" << obj << " marker=" << marker;
+}
+
+int RGWRadosGetOmapKeysCR::send_request() {
+ int r = store->get_raw_obj_ref(obj, &result->ref);
+ if (r < 0) {
+ lderr(store->ctx()) << "ERROR: failed to get ref for (" << obj << ") ret=" << r << dendl;
+ return r;
+ }
+
+ set_status() << "send request";
+
+ librados::ObjectReadOperation op;
+ op.omap_get_keys2(marker, max_entries, &result->entries, &result->more, nullptr);
+
+ cn = stack->create_completion_notifier(result);
+ return result->ref.ioctx.aio_operate(result->ref.obj.oid, cn->completion(), &op, NULL);
+}
+
+int RGWRadosGetOmapKeysCR::request_complete()
+{
+ int r = cn->completion()->get_return_value();
+
+ set_status() << "request complete; ret=" << r;
+
+ return r;
+}
+
+RGWRadosRemoveOmapKeysCR::RGWRadosRemoveOmapKeysCR(RGWRados *_store,
+ const rgw_raw_obj& _obj,
+ const set<string>& _keys) : RGWSimpleCoroutine(_store->ctx()),
+ store(_store),
+ keys(_keys),
+ obj(_obj), cn(NULL)
+{
+ set_description() << "remove omap keys dest=" << obj << " keys=" << keys;
+}
+
+int RGWRadosRemoveOmapKeysCR::send_request() {
+ int r = store->get_raw_obj_ref(obj, &ref);
+ if (r < 0) {
+ lderr(store->ctx()) << "ERROR: failed to get ref for (" << obj << ") ret=" << r << dendl;
+ return r;
+ }
+
+ set_status() << "send request";
+
+ librados::ObjectWriteOperation op;
+ op.omap_rm_keys(keys);
+
+ cn = stack->create_completion_notifier();
+ return ref.ioctx.aio_operate(ref.obj.oid, cn->completion(), &op);
+}
+
+int RGWRadosRemoveOmapKeysCR::request_complete()
+{
+ int r = cn->completion()->get_return_value();
+
+ set_status() << "request complete; ret=" << r;
+
+ return r;
+}
+
+RGWRadosRemoveCR::RGWRadosRemoveCR(RGWRados *store, const rgw_raw_obj& obj)
+ : RGWSimpleCoroutine(store->ctx()), store(store), obj(obj)
+{
+ set_description() << "remove dest=" << obj;
+}
+
+int RGWRadosRemoveCR::send_request()
+{
+ auto rados = store->get_rados_handle();
+ int r = rados->ioctx_create(obj.pool.name.c_str(), ioctx);
+ if (r < 0) {
+ lderr(cct) << "ERROR: failed to open pool (" << obj.pool.name << ") ret=" << r << dendl;
+ return r;
+ }
+ ioctx.locator_set_key(obj.loc);
+
+ set_status() << "send request";
+
+ librados::ObjectWriteOperation op;
+ op.remove();
+
+ cn = stack->create_completion_notifier();
+ return ioctx.aio_operate(obj.oid, cn->completion(), &op);
+}
+
+int RGWRadosRemoveCR::request_complete()
+{
+ int r = cn->completion()->get_return_value();
+
+ set_status() << "request complete; ret=" << r;
+
+ return r;
+}
+
+RGWSimpleRadosLockCR::RGWSimpleRadosLockCR(RGWAsyncRadosProcessor *_async_rados, RGWRados *_store,
+ const rgw_raw_obj& _obj,
+ const string& _lock_name,
+ const string& _cookie,
+ uint32_t _duration) : RGWSimpleCoroutine(_store->ctx()),
+ async_rados(_async_rados),
+ store(_store),
+ lock_name(_lock_name),
+ cookie(_cookie),
+ duration(_duration),
+ obj(_obj),
+ req(NULL)
+{
+ set_description() << "rados lock dest=" << obj << " lock=" << lock_name << " cookie=" << cookie << " duration=" << duration;
+}
+
+void RGWSimpleRadosLockCR::request_cleanup()
+{
+ if (req) {
+ req->finish();
+ req = NULL;
+ }
+}
+
+int RGWSimpleRadosLockCR::send_request()
+{
+ set_status() << "sending request";
+ req = new RGWAsyncLockSystemObj(this, stack->create_completion_notifier(),
+ store, NULL, obj, lock_name, cookie, duration);
+ async_rados->queue(req);
+ return 0;
+}
+
+int RGWSimpleRadosLockCR::request_complete()
+{
+ set_status() << "request complete; ret=" << req->get_ret_status();
+ return req->get_ret_status();
+}
+
+RGWSimpleRadosUnlockCR::RGWSimpleRadosUnlockCR(RGWAsyncRadosProcessor *_async_rados, RGWRados *_store,
+ const rgw_raw_obj& _obj,
+ const string& _lock_name,
+ const string& _cookie) : RGWSimpleCoroutine(_store->ctx()),
+ async_rados(_async_rados),
+ store(_store),
+ lock_name(_lock_name),
+ cookie(_cookie),
+ obj(_obj),
+ req(NULL)
+{
+ set_description() << "rados unlock dest=" << obj << " lock=" << lock_name << " cookie=" << cookie;
+}
+
+void RGWSimpleRadosUnlockCR::request_cleanup()
+{
+ if (req) {
+ req->finish();
+ req = NULL;
+ }
+}
+
+int RGWSimpleRadosUnlockCR::send_request()
+{
+ set_status() << "sending request";
+
+ req = new RGWAsyncUnlockSystemObj(this, stack->create_completion_notifier(),
+ store, NULL, obj, lock_name, cookie);
+ async_rados->queue(req);
+ return 0;
+}
+
+int RGWSimpleRadosUnlockCR::request_complete()
+{
+ set_status() << "request complete; ret=" << req->get_ret_status();
+ return req->get_ret_status();
+}
+
+int RGWOmapAppend::operate() {
+ reenter(this) {
+ for (;;) {
+ if (!has_product() && going_down) {
+ set_status() << "going down";
+ break;
+ }
+ set_status() << "waiting for product";
+ yield wait_for_product();
+ yield {
+ string entry;
+ while (consume(&entry)) {
+ set_status() << "adding entry: " << entry;
+ entries[entry] = bufferlist();
+ if (entries.size() >= window_size) {
+ break;
+ }
+ }
+ if (entries.size() >= window_size || going_down) {
+ set_status() << "flushing to omap";
+ call(new RGWRadosSetOmapKeysCR(store, obj, entries));
+ entries.clear();
+ }
+ }
+ if (get_ret_status() < 0) {
+ ldout(cct, 0) << "ERROR: failed to store entries in omap" << dendl;
+ return set_state(RGWCoroutine_Error);
+ }
+ }
+ /* done with coroutine */
+ return set_state(RGWCoroutine_Done);
+ }
+ return 0;
+}
+
+void RGWOmapAppend::flush_pending() {
+ receive(pending_entries);
+ num_pending_entries = 0;
+}
+
+bool RGWOmapAppend::append(const string& s) {
+ if (is_done()) {
+ return false;
+ }
+ ++total_entries;
+ pending_entries.push_back(s);
+ if (++num_pending_entries >= (int)window_size) {
+ flush_pending();
+ }
+ return true;
+}
+
+bool RGWOmapAppend::finish() {
+ going_down = true;
+ flush_pending();
+ set_sleeping(false);
+ return (!is_done());
+}
+
+int RGWAsyncGetBucketInstanceInfo::_send_request()
+{
+ RGWSysObjectCtx obj_ctx = store->svc.sysobj->init_obj_ctx();
+ int r = store->get_bucket_instance_from_oid(obj_ctx, oid, bucket_info, NULL, NULL);
+ if (r < 0) {
+ ldout(store->ctx(), 0) << "ERROR: failed to get bucket instance info for "
+ << oid << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+RGWRadosBILogTrimCR::RGWRadosBILogTrimCR(RGWRados *store,
+ const RGWBucketInfo& bucket_info,
+ int shard_id,
+ const std::string& start_marker,
+ const std::string& end_marker)
+ : RGWSimpleCoroutine(store->ctx()), bs(store),
+ start_marker(BucketIndexShardsManager::get_shard_marker(start_marker)),
+ end_marker(BucketIndexShardsManager::get_shard_marker(end_marker))
+{
+ bs.init(bucket_info, shard_id);
+}
+
+int RGWRadosBILogTrimCR::send_request()
+{
+ bufferlist in;
+ cls_rgw_bi_log_trim_op call;
+ call.start_marker = std::move(start_marker);
+ call.end_marker = std::move(end_marker);
+ encode(call, in);
+
+ librados::ObjectWriteOperation op;
+ op.exec(RGW_CLASS, RGW_BI_LOG_TRIM, in);
+
+ cn = stack->create_completion_notifier();
+ return bs.index_ctx.aio_operate(bs.bucket_obj, cn->completion(), &op);
+}
+
+int RGWRadosBILogTrimCR::request_complete()
+{
+ int r = cn->completion()->get_return_value();
+ set_status() << "request complete; ret=" << r;
+ return r;
+}
+
+int RGWAsyncFetchRemoteObj::_send_request()
+{
+ RGWObjectCtx obj_ctx(store);
+
+ string user_id;
+ char buf[16];
+ snprintf(buf, sizeof(buf), ".%lld", (long long)store->instance_id());
+ map<string, bufferlist> attrs;
+
+ rgw_obj src_obj(bucket_info.bucket, key);
+
+ rgw_obj dest_obj(bucket_info.bucket, dest_key.value_or(key));
+
+ std::optional<uint64_t> bytes_transferred;
+ int r = store->fetch_remote_obj(obj_ctx,
+ user_id,
+ NULL, /* req_info */
+ source_zone,
+ dest_obj,
+ src_obj,
+ bucket_info, /* dest */
+ bucket_info, /* source */
+ dest_placement_rule,
+ NULL, /* real_time* src_mtime, */
+ NULL, /* real_time* mtime, */
+ NULL, /* const real_time* mod_ptr, */
+ NULL, /* const real_time* unmod_ptr, */
+ false, /* high precision time */
+ NULL, /* const char *if_match, */
+ NULL, /* const char *if_nomatch, */
+ RGWRados::ATTRSMOD_NONE,
+ copy_if_newer,
+ attrs,
+ RGWObjCategory::Main,
+ versioned_epoch,
+ real_time(), /* delete_at */
+ NULL, /* string *ptag, */
+ NULL, /* string *petag, */
+ NULL, /* void (*progress_cb)(off_t, void *), */
+ NULL, /* void *progress_data*); */
+ &zones_trace,
+ &bytes_transferred);
+
+ if (r < 0) {
+ ldout(store->ctx(), 0) << "store->fetch_remote_obj() returned r=" << r << dendl;
+ if (counters) {
+ counters->inc(sync_counters::l_fetch_err, 1);
+ }
+ } else if (counters) {
+ if (bytes_transferred) {
+ counters->inc(sync_counters::l_fetch, *bytes_transferred);
+ } else {
+ counters->inc(sync_counters::l_fetch_not_modified);
+ }
+ }
+ return r;
+}
+
+int RGWAsyncStatRemoteObj::_send_request()
+{
+ RGWObjectCtx obj_ctx(store);
+
+ string user_id;
+ char buf[16];
+ snprintf(buf, sizeof(buf), ".%lld", (long long)store->instance_id());
+
+ rgw_obj src_obj(bucket_info.bucket, key);
+
+ rgw_obj dest_obj(src_obj);
+
+ int r = store->stat_remote_obj(obj_ctx,
+ user_id,
+ nullptr, /* req_info */
+ source_zone,
+ src_obj,
+ bucket_info, /* source */
+ pmtime, /* real_time* src_mtime, */
+ psize, /* uint64_t * */
+ nullptr, /* const real_time* mod_ptr, */
+ nullptr, /* const real_time* unmod_ptr, */
+ true, /* high precision time */
+ nullptr, /* const char *if_match, */
+ nullptr, /* const char *if_nomatch, */
+ pattrs,
+ pheaders,
+ nullptr,
+ nullptr, /* string *ptag, */
+ petag); /* string *petag, */
+
+ if (r < 0) {
+ ldout(store->ctx(), 0) << "store->fetch_remote_obj() returned r=" << r << dendl;
+ }
+ return r;
+}
+
+
+int RGWAsyncRemoveObj::_send_request()
+{
+ RGWObjectCtx obj_ctx(store);
+
+ rgw_obj obj(bucket_info.bucket, key);
+
+ ldout(store->ctx(), 0) << __func__ << "(): deleting obj=" << obj << dendl;
+
+ obj_ctx.set_atomic(obj);
+
+ RGWObjState *state;
+
+ int ret = store->get_obj_state(&obj_ctx, bucket_info, obj, &state);
+ if (ret < 0) {
+ ldout(store->ctx(), 20) << __func__ << "(): get_obj_state() obj=" << obj << " returned ret=" << ret << dendl;
+ return ret;
+ }
+
+ /* has there been any racing object write? */
+ if (del_if_older && (state->mtime > timestamp)) {
+ ldout(store->ctx(), 20) << __func__ << "(): skipping object removal obj=" << obj << " (obj mtime=" << state->mtime << ", request timestamp=" << timestamp << ")" << dendl;
+ return 0;
+ }
+
+ RGWAccessControlPolicy policy;
+
+ /* decode policy */
+ map<string, bufferlist>::iterator iter = state->attrset.find(RGW_ATTR_ACL);
+ if (iter != state->attrset.end()) {
+ auto bliter = iter->second.cbegin();
+ try {
+ policy.decode(bliter);
+ } catch (buffer::error& err) {
+ ldout(store->ctx(), 0) << "ERROR: could not decode policy, caught buffer::error" << dendl;
+ return -EIO;
+ }
+ }
+
+ RGWRados::Object del_target(store, bucket_info, obj_ctx, obj);
+ RGWRados::Object::Delete del_op(&del_target);
+
+ del_op.params.bucket_owner = bucket_info.owner;
+ del_op.params.obj_owner = policy.get_owner();
+ if (del_if_older) {
+ del_op.params.unmod_since = timestamp;
+ }
+ if (versioned) {
+ del_op.params.versioning_status = BUCKET_VERSIONED;
+ }
+ del_op.params.olh_epoch = versioned_epoch;
+ del_op.params.marker_version_id = marker_version_id;
+ del_op.params.obj_owner.set_id(owner);
+ del_op.params.obj_owner.set_name(owner_display_name);
+ del_op.params.mtime = timestamp;
+ del_op.params.high_precision_time = true;
+ del_op.params.zones_trace = &zones_trace;
+
+ ret = del_op.delete_obj();
+ if (ret < 0) {
+ ldout(store->ctx(), 20) << __func__ << "(): delete_obj() obj=" << obj << " returned ret=" << ret << dendl;
+ }
+ return ret;
+}
+
+int RGWContinuousLeaseCR::operate()
+{
+ if (aborted) {
+ caller->set_sleeping(false);
+ return set_cr_done();
+ }
+ reenter(this) {
+ while (!going_down) {
+ yield call(new RGWSimpleRadosLockCR(async_rados, store, obj, lock_name, cookie, interval));
+
+ caller->set_sleeping(false); /* will only be relevant when we return, that's why we can do it early */
+ if (retcode < 0) {
+ set_locked(false);
+ ldout(store->ctx(), 20) << *this << ": couldn't lock " << obj << ":" << lock_name << ": retcode=" << retcode << dendl;
+ return set_state(RGWCoroutine_Error, retcode);
+ }
+ set_locked(true);
+ yield wait(utime_t(interval / 2, 0));
+ }
+ set_locked(false); /* moot at this point anyway */
+ yield call(new RGWSimpleRadosUnlockCR(async_rados, store, obj, lock_name, cookie));
+ return set_state(RGWCoroutine_Done);
+ }
+ return 0;
+}
+
+RGWRadosTimelogAddCR::RGWRadosTimelogAddCR(RGWRados *_store, const string& _oid,
+ const cls_log_entry& entry) : RGWSimpleCoroutine(_store->ctx()),
+ store(_store),
+ oid(_oid), cn(NULL)
+{
+ stringstream& s = set_description();
+ s << "timelog add entry oid=" << oid << "entry={id=" << entry.id << ", section=" << entry.section << ", name=" << entry.name << "}";
+ entries.push_back(entry);
+}
+
+int RGWRadosTimelogAddCR::send_request()
+{
+ set_status() << "sending request";
+
+ cn = stack->create_completion_notifier();
+ return store->time_log_add(oid, entries, cn->completion(), true);
+}
+
+int RGWRadosTimelogAddCR::request_complete()
+{
+ int r = cn->completion()->get_return_value();
+
+ set_status() << "request complete; ret=" << r;
+
+ return r;
+}
+
+RGWRadosTimelogTrimCR::RGWRadosTimelogTrimCR(RGWRados *store,
+ const std::string& oid,
+ const real_time& start_time,
+ const real_time& end_time,
+ const std::string& from_marker,
+ const std::string& to_marker)
+ : RGWSimpleCoroutine(store->ctx()), store(store), oid(oid),
+ start_time(start_time), end_time(end_time),
+ from_marker(from_marker), to_marker(to_marker)
+{
+ set_description() << "timelog trim oid=" << oid
+ << " start_time=" << start_time << " end_time=" << end_time
+ << " from_marker=" << from_marker << " to_marker=" << to_marker;
+}
+
+int RGWRadosTimelogTrimCR::send_request()
+{
+ set_status() << "sending request";
+
+ cn = stack->create_completion_notifier();
+ return store->time_log_trim(oid, start_time, end_time, from_marker,
+ to_marker, cn->completion());
+}
+
+int RGWRadosTimelogTrimCR::request_complete()
+{
+ int r = cn->completion()->get_return_value();
+
+ set_status() << "request complete; ret=" << r;
+
+ return r;
+}
+
+
+RGWSyncLogTrimCR::RGWSyncLogTrimCR(RGWRados *store, const std::string& oid,
+ const std::string& to_marker,
+ std::string *last_trim_marker)
+ : RGWRadosTimelogTrimCR(store, oid, real_time{}, real_time{},
+ std::string{}, to_marker),
+ cct(store->ctx()), last_trim_marker(last_trim_marker)
+{
+}
+
+int RGWSyncLogTrimCR::request_complete()
+{
+ int r = RGWRadosTimelogTrimCR::request_complete();
+ if (r != -ENODATA) {
+ return r;
+ }
+ // nothing left to trim, update last_trim_marker
+ if (*last_trim_marker < to_marker && to_marker != max_marker) {
+ *last_trim_marker = to_marker;
+ }
+ return 0;
+}
+
+
+int RGWAsyncStatObj::_send_request()
+{
+ rgw_raw_obj raw_obj;
+ store->obj_to_raw(bucket_info.placement_rule, obj, &raw_obj);
+ return store->raw_obj_stat(raw_obj, psize, pmtime, pepoch,
+ nullptr, nullptr, objv_tracker);
+}
+
+RGWStatObjCR::RGWStatObjCR(RGWAsyncRadosProcessor *async_rados, RGWRados *store,
+ const RGWBucketInfo& _bucket_info, const rgw_obj& obj, uint64_t *psize,
+ real_time* pmtime, uint64_t *pepoch,
+ RGWObjVersionTracker *objv_tracker)
+ : RGWSimpleCoroutine(store->ctx()), store(store), async_rados(async_rados),
+ bucket_info(_bucket_info), obj(obj), psize(psize), pmtime(pmtime), pepoch(pepoch),
+ objv_tracker(objv_tracker)
+{
+}
+
+void RGWStatObjCR::request_cleanup()
+{
+ if (req) {
+ req->finish();
+ req = NULL;
+ }
+}
+
+int RGWStatObjCR::send_request()
+{
+ req = new RGWAsyncStatObj(this, stack->create_completion_notifier(),
+ store, bucket_info, obj, psize, pmtime, pepoch, objv_tracker);
+ async_rados->queue(req);
+ return 0;
+}
+
+int RGWStatObjCR::request_complete()
+{
+ return req->get_ret_status();
+}
+
+RGWRadosNotifyCR::RGWRadosNotifyCR(RGWRados *store, const rgw_raw_obj& obj,
+ bufferlist& request, uint64_t timeout_ms,
+ bufferlist *response)
+ : RGWSimpleCoroutine(store->ctx()), store(store), obj(obj),
+ request(request), timeout_ms(timeout_ms), response(response)
+{
+ set_description() << "notify dest=" << obj;
+}
+
+int RGWRadosNotifyCR::send_request()
+{
+ int r = store->get_raw_obj_ref(obj, &ref);
+ if (r < 0) {
+ lderr(store->ctx()) << "ERROR: failed to get ref for (" << obj << ") ret=" << r << dendl;
+ return r;
+ }
+
+ set_status() << "sending request";
+
+ cn = stack->create_completion_notifier();
+ return ref.ioctx.aio_notify(ref.obj.oid, cn->completion(), request,
+ timeout_ms, response);
+}
+
+int RGWRadosNotifyCR::request_complete()
+{
+ int r = cn->completion()->get_return_value();
+
+ set_status() << "request complete; ret=" << r;
+
+ return r;
+}
diff --git a/src/rgw/rgw_cr_rados.h b/src/rgw/rgw_cr_rados.h
new file mode 100644
index 00000000..70b52f35
--- /dev/null
+++ b/src/rgw/rgw_cr_rados.h
@@ -0,0 +1,1351 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_CR_RADOS_H
+#define CEPH_RGW_CR_RADOS_H
+
+#include <boost/intrusive_ptr.hpp>
+#include "include/ceph_assert.h"
+#include "rgw_coroutine.h"
+#include "rgw_rados.h"
+#include "common/WorkQueue.h"
+#include "common/Throttle.h"
+
+#include <atomic>
+
+#include "services/svc_sys_obj.h"
+
+class RGWAsyncRadosRequest : public RefCountedObject {
+ RGWCoroutine *caller;
+ RGWAioCompletionNotifier *notifier;
+
+ int retcode;
+
+ Mutex lock;
+
+protected:
+ virtual int _send_request() = 0;
+public:
+ RGWAsyncRadosRequest(RGWCoroutine *_caller, RGWAioCompletionNotifier *_cn) : caller(_caller), notifier(_cn), retcode(0),
+ lock("RGWAsyncRadosRequest::lock") {
+ }
+ ~RGWAsyncRadosRequest() override {
+ if (notifier) {
+ notifier->put();
+ }
+ }
+
+ void send_request() {
+ get();
+ retcode = _send_request();
+ {
+ Mutex::Locker l(lock);
+ if (notifier) {
+ notifier->cb(); // drops its own ref
+ notifier = nullptr;
+ }
+ }
+ put();
+ }
+
+ int get_ret_status() { return retcode; }
+
+ void finish() {
+ {
+ Mutex::Locker l(lock);
+ if (notifier) {
+ // we won't call notifier->cb() to drop its ref, so drop it here
+ notifier->put();
+ notifier = nullptr;
+ }
+ }
+ put();
+ }
+};
+
+
+class RGWAsyncRadosProcessor {
+ deque<RGWAsyncRadosRequest *> m_req_queue;
+ std::atomic<bool> going_down = { false };
+protected:
+ RGWRados *store;
+ ThreadPool m_tp;
+ Throttle req_throttle;
+
+ struct RGWWQ : public ThreadPool::WorkQueue<RGWAsyncRadosRequest> {
+ RGWAsyncRadosProcessor *processor;
+ RGWWQ(RGWAsyncRadosProcessor *p, time_t timeout, time_t suicide_timeout, ThreadPool *tp)
+ : ThreadPool::WorkQueue<RGWAsyncRadosRequest>("RGWWQ", timeout, suicide_timeout, tp), processor(p) {}
+
+ bool _enqueue(RGWAsyncRadosRequest *req) override;
+ void _dequeue(RGWAsyncRadosRequest *req) override {
+ ceph_abort();
+ }
+ bool _empty() override;
+ RGWAsyncRadosRequest *_dequeue() override;
+ using ThreadPool::WorkQueue<RGWAsyncRadosRequest>::_process;
+ void _process(RGWAsyncRadosRequest *req, ThreadPool::TPHandle& handle) override;
+ void _dump_queue();
+ void _clear() override {
+ ceph_assert(processor->m_req_queue.empty());
+ }
+ } req_wq;
+
+public:
+ RGWAsyncRadosProcessor(RGWRados *_store, int num_threads);
+ ~RGWAsyncRadosProcessor() {}
+ void start();
+ void stop();
+ void handle_request(RGWAsyncRadosRequest *req);
+ void queue(RGWAsyncRadosRequest *req);
+
+ bool is_going_down() {
+ return going_down;
+ }
+};
+
+template <class P>
+class RGWSimpleWriteOnlyAsyncCR : public RGWSimpleCoroutine {
+ RGWAsyncRadosProcessor *async_rados;
+ RGWRados *store;
+
+ P params;
+
+ class Request : public RGWAsyncRadosRequest {
+ RGWRados *store;
+ P params;
+ protected:
+ int _send_request() override;
+ public:
+ Request(RGWCoroutine *caller,
+ RGWAioCompletionNotifier *cn,
+ RGWRados *store,
+ const P& _params) : RGWAsyncRadosRequest(caller, cn),
+ store(store),
+ params(_params) {}
+ } *req{nullptr};
+
+ public:
+ RGWSimpleWriteOnlyAsyncCR(RGWAsyncRadosProcessor *_async_rados,
+ RGWRados *_store,
+ const P& _params) : RGWSimpleCoroutine(_store->ctx()),
+ async_rados(_async_rados),
+ store(_store),
+ params(_params) {}
+
+ ~RGWSimpleWriteOnlyAsyncCR() override {
+ request_cleanup();
+ }
+ void request_cleanup() override {
+ if (req) {
+ req->finish();
+ req = NULL;
+ }
+ }
+
+ int send_request() override {
+ req = new Request(this,
+ stack->create_completion_notifier(),
+ store,
+ params);
+
+ async_rados->queue(req);
+ return 0;
+ }
+ int request_complete() override {
+ return req->get_ret_status();
+ }
+};
+
+
+template <class P, class R>
+class RGWSimpleAsyncCR : public RGWSimpleCoroutine {
+ RGWAsyncRadosProcessor *async_rados;
+ RGWRados *store;
+
+ P params;
+ std::shared_ptr<R> result;
+
+ class Request : public RGWAsyncRadosRequest {
+ RGWRados *store;
+ P params;
+ std::shared_ptr<R> result;
+ protected:
+ int _send_request() override;
+ public:
+ Request(RGWCoroutine *caller,
+ RGWAioCompletionNotifier *cn,
+ RGWRados *_store,
+ const P& _params,
+ std::shared_ptr<R>& _result) : RGWAsyncRadosRequest(caller, cn),
+ store(_store),
+ params(_params),
+ result(_result) {}
+ } *req{nullptr};
+
+ public:
+ RGWSimpleAsyncCR(RGWAsyncRadosProcessor *_async_rados,
+ RGWRados *_store,
+ const P& _params,
+ std::shared_ptr<R>& _result) : RGWSimpleCoroutine(_store->ctx()),
+ async_rados(_async_rados),
+ store(_store),
+ params(_params),
+ result(_result) {}
+
+ ~RGWSimpleAsyncCR() override {
+ request_cleanup();
+ }
+ void request_cleanup() override {
+ if (req) {
+ req->finish();
+ req = NULL;
+ }
+ }
+
+ int send_request() override {
+ req = new Request(this,
+ stack->create_completion_notifier(),
+ store,
+ params,
+ result);
+
+ async_rados->queue(req);
+ return 0;
+ }
+ int request_complete() override {
+ return req->get_ret_status();
+ }
+};
+
+
+class RGWAsyncGetSystemObj : public RGWAsyncRadosRequest {
+ RGWSysObjectCtx obj_ctx;
+ RGWObjVersionTracker objv_tracker;
+ rgw_raw_obj obj;
+ const bool want_attrs;
+ const bool raw_attrs;
+protected:
+ int _send_request() override;
+public:
+ RGWAsyncGetSystemObj(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, RGWSI_SysObj *_svc,
+ RGWObjVersionTracker *_objv_tracker, const rgw_raw_obj& _obj,
+ bool want_attrs, bool raw_attrs);
+
+ bufferlist bl;
+ map<string, bufferlist> attrs;
+};
+
+class RGWAsyncPutSystemObj : public RGWAsyncRadosRequest {
+ RGWSI_SysObj *svc;
+ rgw_raw_obj obj;
+ bool exclusive;
+ bufferlist bl;
+
+protected:
+ int _send_request() override;
+public:
+ RGWAsyncPutSystemObj(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, RGWSI_SysObj *_svc,
+ RGWObjVersionTracker *_objv_tracker, const rgw_raw_obj& _obj,
+ bool _exclusive, bufferlist _bl);
+
+ RGWObjVersionTracker objv_tracker;
+};
+
+class RGWAsyncPutSystemObjAttrs : public RGWAsyncRadosRequest {
+ RGWSI_SysObj *svc;
+ rgw_raw_obj obj;
+ map<string, bufferlist> attrs;
+
+protected:
+ int _send_request() override;
+public:
+ RGWAsyncPutSystemObjAttrs(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, RGWSI_SysObj *_svc,
+ RGWObjVersionTracker *_objv_tracker, const rgw_raw_obj& _obj,
+ map<string, bufferlist> _attrs);
+
+ RGWObjVersionTracker objv_tracker;
+};
+
+class RGWAsyncLockSystemObj : public RGWAsyncRadosRequest {
+ RGWRados *store;
+ rgw_raw_obj obj;
+ string lock_name;
+ string cookie;
+ uint32_t duration_secs;
+
+protected:
+ int _send_request() override;
+public:
+ RGWAsyncLockSystemObj(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, RGWRados *_store,
+ RGWObjVersionTracker *_objv_tracker, const rgw_raw_obj& _obj,
+ const string& _name, const string& _cookie, uint32_t _duration_secs);
+};
+
+class RGWAsyncUnlockSystemObj : public RGWAsyncRadosRequest {
+ RGWRados *store;
+ rgw_raw_obj obj;
+ string lock_name;
+ string cookie;
+
+protected:
+ int _send_request() override;
+public:
+ RGWAsyncUnlockSystemObj(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, RGWRados *_store,
+ RGWObjVersionTracker *_objv_tracker, const rgw_raw_obj& _obj,
+ const string& _name, const string& _cookie);
+};
+
+template <class T>
+class RGWSimpleRadosReadCR : public RGWSimpleCoroutine {
+ RGWAsyncRadosProcessor *async_rados;
+ RGWSI_SysObj *svc;
+
+ rgw_raw_obj obj;
+ T *result;
+ /// on ENOENT, call handle_data() with an empty object instead of failing
+ const bool empty_on_enoent;
+ RGWObjVersionTracker *objv_tracker;
+ RGWAsyncGetSystemObj *req{nullptr};
+
+public:
+ RGWSimpleRadosReadCR(RGWAsyncRadosProcessor *_async_rados, RGWSI_SysObj *_svc,
+ const rgw_raw_obj& _obj,
+ T *_result, bool empty_on_enoent = true,
+ RGWObjVersionTracker *objv_tracker = nullptr)
+ : RGWSimpleCoroutine(_svc->ctx()), async_rados(_async_rados), svc(_svc),
+ obj(_obj), result(_result),
+ empty_on_enoent(empty_on_enoent), objv_tracker(objv_tracker) {}
+ ~RGWSimpleRadosReadCR() override {
+ request_cleanup();
+ }
+
+ void request_cleanup() override {
+ if (req) {
+ req->finish();
+ req = NULL;
+ }
+ }
+
+ int send_request() override;
+ int request_complete() override;
+
+ virtual int handle_data(T& data) {
+ return 0;
+ }
+};
+
+template <class T>
+int RGWSimpleRadosReadCR<T>::send_request()
+{
+ req = new RGWAsyncGetSystemObj(this, stack->create_completion_notifier(), svc,
+ objv_tracker, obj, false, false);
+ async_rados->queue(req);
+ return 0;
+}
+
+template <class T>
+int RGWSimpleRadosReadCR<T>::request_complete()
+{
+ int ret = req->get_ret_status();
+ retcode = ret;
+ if (ret == -ENOENT && empty_on_enoent) {
+ *result = T();
+ } else {
+ if (ret < 0) {
+ return ret;
+ }
+ try {
+ auto iter = req->bl.cbegin();
+ if (iter.end()) {
+ // allow successful reads with empty buffers. ReadSyncStatus coroutines
+ // depend on this to be able to read without locking, because the
+ // cls lock from InitSyncStatus will create an empty object if it didn't
+ // exist
+ *result = T();
+ } else {
+ decode(*result, iter);
+ }
+ } catch (buffer::error& err) {
+ return -EIO;
+ }
+ }
+
+ return handle_data(*result);
+}
+
+class RGWSimpleRadosReadAttrsCR : public RGWSimpleCoroutine {
+ RGWAsyncRadosProcessor *async_rados;
+ RGWSI_SysObj *svc;
+
+ rgw_raw_obj obj;
+ map<string, bufferlist> *pattrs;
+ bool raw_attrs;
+ RGWAsyncGetSystemObj *req;
+
+public:
+ RGWSimpleRadosReadAttrsCR(RGWAsyncRadosProcessor *_async_rados, RGWSI_SysObj *_svc,
+ const rgw_raw_obj& _obj,
+ map<string, bufferlist> *_pattrs, bool _raw_attrs) : RGWSimpleCoroutine(_svc->ctx()),
+ async_rados(_async_rados), svc(_svc),
+ obj(_obj),
+ pattrs(_pattrs),
+ raw_attrs(_raw_attrs),
+ req(NULL) {}
+ ~RGWSimpleRadosReadAttrsCR() override {
+ request_cleanup();
+ }
+
+ void request_cleanup() override {
+ if (req) {
+ req->finish();
+ req = NULL;
+ }
+ }
+
+ int send_request() override;
+ int request_complete() override;
+};
+
+template <class T>
+class RGWSimpleRadosWriteCR : public RGWSimpleCoroutine {
+ RGWAsyncRadosProcessor *async_rados;
+ RGWSI_SysObj *svc;
+ bufferlist bl;
+ rgw_raw_obj obj;
+ RGWObjVersionTracker *objv_tracker;
+ RGWAsyncPutSystemObj *req{nullptr};
+
+public:
+ RGWSimpleRadosWriteCR(RGWAsyncRadosProcessor *_async_rados, RGWSI_SysObj *_svc,
+ const rgw_raw_obj& _obj,
+ const T& _data, RGWObjVersionTracker *objv_tracker = nullptr)
+ : RGWSimpleCoroutine(_svc->ctx()), async_rados(_async_rados),
+ svc(_svc), obj(_obj), objv_tracker(objv_tracker) {
+ encode(_data, bl);
+ }
+
+ ~RGWSimpleRadosWriteCR() override {
+ request_cleanup();
+ }
+
+ void request_cleanup() override {
+ if (req) {
+ req->finish();
+ req = NULL;
+ }
+ }
+
+ int send_request() override {
+ req = new RGWAsyncPutSystemObj(this, stack->create_completion_notifier(),
+ svc, objv_tracker, obj, false, std::move(bl));
+ async_rados->queue(req);
+ return 0;
+ }
+
+ int request_complete() override {
+ if (objv_tracker) { // copy the updated version
+ *objv_tracker = req->objv_tracker;
+ }
+ return req->get_ret_status();
+ }
+};
+
+class RGWSimpleRadosWriteAttrsCR : public RGWSimpleCoroutine {
+ RGWAsyncRadosProcessor *async_rados;
+ RGWSI_SysObj *svc;
+ RGWObjVersionTracker *objv_tracker;
+
+ rgw_raw_obj obj;
+ map<string, bufferlist> attrs;
+ RGWAsyncPutSystemObjAttrs *req = nullptr;
+
+public:
+ RGWSimpleRadosWriteAttrsCR(RGWAsyncRadosProcessor *_async_rados,
+ RGWSI_SysObj *_svc, const rgw_raw_obj& _obj,
+ map<string, bufferlist> _attrs,
+ RGWObjVersionTracker *objv_tracker = nullptr)
+ : RGWSimpleCoroutine(_svc->ctx()), async_rados(_async_rados),
+ svc(_svc), objv_tracker(objv_tracker), obj(_obj),
+ attrs(std::move(_attrs)) {
+ }
+ ~RGWSimpleRadosWriteAttrsCR() override {
+ request_cleanup();
+ }
+
+ void request_cleanup() override {
+ if (req) {
+ req->finish();
+ req = NULL;
+ }
+ }
+
+ int send_request() override {
+ req = new RGWAsyncPutSystemObjAttrs(this, stack->create_completion_notifier(),
+ svc, objv_tracker, obj, std::move(attrs));
+ async_rados->queue(req);
+ return 0;
+ }
+
+ int request_complete() override {
+ if (objv_tracker) { // copy the updated version
+ *objv_tracker = req->objv_tracker;
+ }
+ return req->get_ret_status();
+ }
+};
+
+class RGWRadosSetOmapKeysCR : public RGWSimpleCoroutine {
+ RGWRados *store;
+ map<string, bufferlist> entries;
+
+ rgw_rados_ref ref;
+
+ rgw_raw_obj obj;
+
+ boost::intrusive_ptr<RGWAioCompletionNotifier> cn;
+
+public:
+ RGWRadosSetOmapKeysCR(RGWRados *_store,
+ const rgw_raw_obj& _obj,
+ map<string, bufferlist>& _entries);
+
+ int send_request() override;
+ int request_complete() override;
+};
+
+class RGWRadosGetOmapKeysCR : public RGWSimpleCoroutine {
+ public:
+ struct Result {
+ rgw_rados_ref ref;
+ std::set<std::string> entries;
+ bool more = false;
+ };
+ using ResultPtr = std::shared_ptr<Result>;
+
+ RGWRadosGetOmapKeysCR(RGWRados *_store, const rgw_raw_obj& _obj,
+ const string& _marker, int _max_entries,
+ ResultPtr result);
+
+ int send_request() override;
+ int request_complete() override;
+
+ private:
+ RGWRados *store;
+ rgw_raw_obj obj;
+ string marker;
+ int max_entries;
+ ResultPtr result;
+ boost::intrusive_ptr<RGWAioCompletionNotifier> cn;
+};
+
+class RGWRadosRemoveOmapKeysCR : public RGWSimpleCoroutine {
+ RGWRados *store;
+
+ rgw_rados_ref ref;
+
+ set<string> keys;
+
+ rgw_raw_obj obj;
+
+ boost::intrusive_ptr<RGWAioCompletionNotifier> cn;
+
+public:
+ RGWRadosRemoveOmapKeysCR(RGWRados *_store,
+ const rgw_raw_obj& _obj,
+ const set<string>& _keys);
+
+ int send_request() override;
+
+ int request_complete() override;
+};
+
+class RGWRadosRemoveCR : public RGWSimpleCoroutine {
+ RGWRados *store;
+ librados::IoCtx ioctx;
+ const rgw_raw_obj obj;
+ boost::intrusive_ptr<RGWAioCompletionNotifier> cn;
+
+public:
+ RGWRadosRemoveCR(RGWRados *store, const rgw_raw_obj& obj);
+
+ int send_request() override;
+ int request_complete() override;
+};
+
+class RGWSimpleRadosLockCR : public RGWSimpleCoroutine {
+ RGWAsyncRadosProcessor *async_rados;
+ RGWRados *store;
+ string lock_name;
+ string cookie;
+ uint32_t duration;
+
+ rgw_raw_obj obj;
+
+ RGWAsyncLockSystemObj *req;
+
+public:
+ RGWSimpleRadosLockCR(RGWAsyncRadosProcessor *_async_rados, RGWRados *_store,
+ const rgw_raw_obj& _obj,
+ const string& _lock_name,
+ const string& _cookie,
+ uint32_t _duration);
+ ~RGWSimpleRadosLockCR() override {
+ request_cleanup();
+ }
+ void request_cleanup() override;
+
+ int send_request() override;
+ int request_complete() override;
+
+ static std::string gen_random_cookie(CephContext* cct) {
+#define COOKIE_LEN 16
+ char buf[COOKIE_LEN + 1];
+ gen_rand_alphanumeric(cct, buf, sizeof(buf) - 1);
+ return buf;
+ }
+};
+
+class RGWSimpleRadosUnlockCR : public RGWSimpleCoroutine {
+ RGWAsyncRadosProcessor *async_rados;
+ RGWRados *store;
+ string lock_name;
+ string cookie;
+
+ rgw_raw_obj obj;
+
+ RGWAsyncUnlockSystemObj *req;
+
+public:
+ RGWSimpleRadosUnlockCR(RGWAsyncRadosProcessor *_async_rados, RGWRados *_store,
+ const rgw_raw_obj& _obj,
+ const string& _lock_name,
+ const string& _cookie);
+ ~RGWSimpleRadosUnlockCR() override {
+ request_cleanup();
+ }
+ void request_cleanup() override;
+
+ int send_request() override;
+ int request_complete() override;
+};
+
+#define OMAP_APPEND_MAX_ENTRIES_DEFAULT 100
+
+class RGWOmapAppend : public RGWConsumerCR<string> {
+ RGWAsyncRadosProcessor *async_rados;
+ RGWRados *store;
+
+ rgw_raw_obj obj;
+
+ bool going_down;
+
+ int num_pending_entries;
+ list<string> pending_entries;
+
+ map<string, bufferlist> entries;
+
+ uint64_t window_size;
+ uint64_t total_entries;
+public:
+ RGWOmapAppend(RGWAsyncRadosProcessor *_async_rados, RGWRados *_store,
+ const rgw_raw_obj& _obj,
+ uint64_t _window_size = OMAP_APPEND_MAX_ENTRIES_DEFAULT);
+ int operate() override;
+ void flush_pending();
+ bool append(const string& s);
+ bool finish();
+
+ uint64_t get_total_entries() {
+ return total_entries;
+ }
+
+ const rgw_raw_obj& get_obj() {
+ return obj;
+ }
+};
+
+class RGWAsyncWait : public RGWAsyncRadosRequest {
+ CephContext *cct;
+ Mutex *lock;
+ Cond *cond;
+ utime_t interval;
+protected:
+ int _send_request() override {
+ Mutex::Locker l(*lock);
+ return cond->WaitInterval(*lock, interval);
+ }
+public:
+ RGWAsyncWait(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, CephContext *_cct,
+ Mutex *_lock, Cond *_cond, int _secs) : RGWAsyncRadosRequest(caller, cn),
+ cct(_cct),
+ lock(_lock), cond(_cond), interval(_secs, 0) {}
+
+ void wakeup() {
+ Mutex::Locker l(*lock);
+ cond->Signal();
+ }
+};
+
+class RGWWaitCR : public RGWSimpleCoroutine {
+ CephContext *cct;
+ RGWAsyncRadosProcessor *async_rados;
+ Mutex *lock;
+ Cond *cond;
+ int secs;
+
+ RGWAsyncWait *req;
+
+public:
+ RGWWaitCR(RGWAsyncRadosProcessor *_async_rados, CephContext *_cct,
+ Mutex *_lock, Cond *_cond,
+ int _secs) : RGWSimpleCoroutine(_cct), cct(_cct),
+ async_rados(_async_rados), lock(_lock), cond(_cond), secs(_secs), req(NULL) {
+ }
+ ~RGWWaitCR() override {
+ request_cleanup();
+ }
+
+ void request_cleanup() override {
+ if (req) {
+ wakeup();
+ req->finish();
+ req = NULL;
+ }
+ }
+
+ int send_request() override {
+ req = new RGWAsyncWait(this, stack->create_completion_notifier(), cct, lock, cond, secs);
+ async_rados->queue(req);
+ return 0;
+ }
+
+ int request_complete() override {
+ return req->get_ret_status();
+ }
+
+ void wakeup() {
+ req->wakeup();
+ }
+};
+
+class RGWShardedOmapCRManager {
+ RGWAsyncRadosProcessor *async_rados;
+ RGWRados *store;
+ RGWCoroutine *op;
+
+ int num_shards;
+
+ vector<RGWOmapAppend *> shards;
+public:
+ RGWShardedOmapCRManager(RGWAsyncRadosProcessor *_async_rados, RGWRados *_store, RGWCoroutine *_op, int _num_shards, const rgw_pool& pool, const string& oid_prefix)
+ : async_rados(_async_rados),
+ store(_store), op(_op), num_shards(_num_shards) {
+ shards.reserve(num_shards);
+ for (int i = 0; i < num_shards; ++i) {
+ char buf[oid_prefix.size() + 16];
+ snprintf(buf, sizeof(buf), "%s.%d", oid_prefix.c_str(), i);
+ RGWOmapAppend *shard = new RGWOmapAppend(async_rados, store, rgw_raw_obj(pool, buf));
+ shard->get();
+ shards.push_back(shard);
+ op->spawn(shard, false);
+ }
+ }
+
+ ~RGWShardedOmapCRManager() {
+ for (auto shard : shards) {
+ shard->put();
+ }
+ }
+
+ bool append(const string& entry, int shard_id) {
+ return shards[shard_id]->append(entry);
+ }
+ bool finish() {
+ bool success = true;
+ for (vector<RGWOmapAppend *>::iterator iter = shards.begin(); iter != shards.end(); ++iter) {
+ success &= ((*iter)->finish() && (!(*iter)->is_error()));
+ }
+ return success;
+ }
+
+ uint64_t get_total_entries(int shard_id) {
+ return shards[shard_id]->get_total_entries();
+ }
+};
+
+class RGWAsyncGetBucketInstanceInfo : public RGWAsyncRadosRequest {
+ RGWRados *store;
+ const std::string oid;
+
+protected:
+ int _send_request() override;
+public:
+ RGWAsyncGetBucketInstanceInfo(RGWCoroutine *caller, RGWAioCompletionNotifier *cn,
+ RGWRados *_store, const std::string& oid)
+ : RGWAsyncRadosRequest(caller, cn), store(_store), oid(oid) {}
+
+ RGWBucketInfo bucket_info;
+};
+
+class RGWGetBucketInstanceInfoCR : public RGWSimpleCoroutine {
+ RGWAsyncRadosProcessor *async_rados;
+ RGWRados *store;
+ const std::string oid;
+ RGWBucketInfo *bucket_info;
+
+ RGWAsyncGetBucketInstanceInfo *req{nullptr};
+
+public:
+ // metadata key constructor
+ RGWGetBucketInstanceInfoCR(RGWAsyncRadosProcessor *_async_rados, RGWRados *_store,
+ const std::string& meta_key, RGWBucketInfo *_bucket_info)
+ : RGWSimpleCoroutine(_store->ctx()), async_rados(_async_rados), store(_store),
+ oid(RGW_BUCKET_INSTANCE_MD_PREFIX + meta_key),
+ bucket_info(_bucket_info) {}
+ // rgw_bucket constructor
+ RGWGetBucketInstanceInfoCR(RGWAsyncRadosProcessor *_async_rados, RGWRados *_store,
+ const rgw_bucket& bucket, RGWBucketInfo *_bucket_info)
+ : RGWSimpleCoroutine(_store->ctx()), async_rados(_async_rados), store(_store),
+ oid(RGW_BUCKET_INSTANCE_MD_PREFIX + bucket.get_key(':')),
+ bucket_info(_bucket_info) {}
+ ~RGWGetBucketInstanceInfoCR() override {
+ request_cleanup();
+ }
+ void request_cleanup() override {
+ if (req) {
+ req->finish();
+ req = NULL;
+ }
+ }
+
+ int send_request() override {
+ req = new RGWAsyncGetBucketInstanceInfo(this, stack->create_completion_notifier(), store, oid);
+ async_rados->queue(req);
+ return 0;
+ }
+ int request_complete() override {
+ if (bucket_info) {
+ *bucket_info = std::move(req->bucket_info);
+ }
+ return req->get_ret_status();
+ }
+};
+
+class RGWRadosBILogTrimCR : public RGWSimpleCoroutine {
+ RGWRados::BucketShard bs;
+ std::string start_marker;
+ std::string end_marker;
+ boost::intrusive_ptr<RGWAioCompletionNotifier> cn;
+ public:
+ RGWRadosBILogTrimCR(RGWRados *store, const RGWBucketInfo& bucket_info,
+ int shard_id, const std::string& start_marker,
+ const std::string& end_marker);
+
+ int send_request() override;
+ int request_complete() override;
+};
+
+class RGWAsyncFetchRemoteObj : public RGWAsyncRadosRequest {
+ RGWRados *store;
+ string source_zone;
+
+ RGWBucketInfo bucket_info;
+ std::optional<rgw_placement_rule> dest_placement_rule;
+
+ rgw_obj_key key;
+ std::optional<rgw_obj_key> dest_key;
+ std::optional<uint64_t> versioned_epoch;
+
+ real_time src_mtime;
+
+ bool copy_if_newer;
+ rgw_zone_set zones_trace;
+ PerfCounters* counters;
+
+protected:
+ int _send_request() override;
+public:
+ RGWAsyncFetchRemoteObj(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, RGWRados *_store,
+ const string& _source_zone,
+ RGWBucketInfo& _bucket_info,
+ std::optional<rgw_placement_rule> _dest_placement_rule,
+ const rgw_obj_key& _key,
+ const std::optional<rgw_obj_key>& _dest_key,
+ std::optional<uint64_t> _versioned_epoch,
+ bool _if_newer, rgw_zone_set *_zones_trace,
+ PerfCounters* counters)
+ : RGWAsyncRadosRequest(caller, cn), store(_store),
+ source_zone(_source_zone),
+ bucket_info(_bucket_info),
+ dest_placement_rule(_dest_placement_rule),
+ key(_key),
+ dest_key(_dest_key),
+ versioned_epoch(_versioned_epoch),
+ copy_if_newer(_if_newer), counters(counters)
+ {
+ if (_zones_trace) {
+ zones_trace = *_zones_trace;
+ }
+ }
+};
+
+class RGWFetchRemoteObjCR : public RGWSimpleCoroutine {
+ CephContext *cct;
+ RGWAsyncRadosProcessor *async_rados;
+ RGWRados *store;
+ string source_zone;
+
+ RGWBucketInfo bucket_info;
+ std::optional<rgw_placement_rule> dest_placement_rule;
+
+ rgw_obj_key key;
+ std::optional<rgw_obj_key> dest_key;
+ std::optional<uint64_t> versioned_epoch;
+
+ real_time src_mtime;
+
+ bool copy_if_newer;
+
+ RGWAsyncFetchRemoteObj *req;
+ rgw_zone_set *zones_trace;
+ PerfCounters* counters;
+
+public:
+ RGWFetchRemoteObjCR(RGWAsyncRadosProcessor *_async_rados, RGWRados *_store,
+ const string& _source_zone,
+ RGWBucketInfo& _bucket_info,
+ std::optional<rgw_placement_rule> _dest_placement_rule,
+ const rgw_obj_key& _key,
+ const std::optional<rgw_obj_key>& _dest_key,
+ std::optional<uint64_t> _versioned_epoch,
+ bool _if_newer, rgw_zone_set *_zones_trace,
+ PerfCounters* counters)
+ : RGWSimpleCoroutine(_store->ctx()), cct(_store->ctx()),
+ async_rados(_async_rados), store(_store),
+ source_zone(_source_zone),
+ bucket_info(_bucket_info),
+ dest_placement_rule(_dest_placement_rule),
+ key(_key),
+ dest_key(_dest_key),
+ versioned_epoch(_versioned_epoch),
+ copy_if_newer(_if_newer), req(NULL),
+ zones_trace(_zones_trace), counters(counters) {}
+
+
+ ~RGWFetchRemoteObjCR() override {
+ request_cleanup();
+ }
+
+ void request_cleanup() override {
+ if (req) {
+ req->finish();
+ req = NULL;
+ }
+ }
+
+ int send_request() override {
+ req = new RGWAsyncFetchRemoteObj(this, stack->create_completion_notifier(), store,
+ source_zone, bucket_info, dest_placement_rule,
+ key, dest_key, versioned_epoch, copy_if_newer,
+ zones_trace, counters);
+ async_rados->queue(req);
+ return 0;
+ }
+
+ int request_complete() override {
+ return req->get_ret_status();
+ }
+};
+
+class RGWAsyncStatRemoteObj : public RGWAsyncRadosRequest {
+ RGWRados *store;
+ string source_zone;
+
+ RGWBucketInfo bucket_info;
+
+ rgw_obj_key key;
+
+ ceph::real_time *pmtime;
+ uint64_t *psize;
+ string *petag;
+ map<string, bufferlist> *pattrs;
+ map<string, string> *pheaders;
+
+protected:
+ int _send_request() override;
+public:
+ RGWAsyncStatRemoteObj(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, RGWRados *_store,
+ const string& _source_zone,
+ RGWBucketInfo& _bucket_info,
+ const rgw_obj_key& _key,
+ ceph::real_time *_pmtime,
+ uint64_t *_psize,
+ string *_petag,
+ map<string, bufferlist> *_pattrs,
+ map<string, string> *_pheaders) : RGWAsyncRadosRequest(caller, cn), store(_store),
+ source_zone(_source_zone),
+ bucket_info(_bucket_info),
+ key(_key),
+ pmtime(_pmtime),
+ psize(_psize),
+ petag(_petag),
+ pattrs(_pattrs),
+ pheaders(_pheaders) {}
+};
+
+class RGWStatRemoteObjCR : public RGWSimpleCoroutine {
+ CephContext *cct;
+ RGWAsyncRadosProcessor *async_rados;
+ RGWRados *store;
+ string source_zone;
+
+ RGWBucketInfo bucket_info;
+
+ rgw_obj_key key;
+
+ ceph::real_time *pmtime;
+ uint64_t *psize;
+ string *petag;
+ map<string, bufferlist> *pattrs;
+ map<string, string> *pheaders;
+
+ RGWAsyncStatRemoteObj *req;
+
+public:
+ RGWStatRemoteObjCR(RGWAsyncRadosProcessor *_async_rados, RGWRados *_store,
+ const string& _source_zone,
+ RGWBucketInfo& _bucket_info,
+ const rgw_obj_key& _key,
+ ceph::real_time *_pmtime,
+ uint64_t *_psize,
+ string *_petag,
+ map<string, bufferlist> *_pattrs,
+ map<string, string> *_pheaders) : RGWSimpleCoroutine(_store->ctx()), cct(_store->ctx()),
+ async_rados(_async_rados), store(_store),
+ source_zone(_source_zone),
+ bucket_info(_bucket_info),
+ key(_key),
+ pmtime(_pmtime),
+ psize(_psize),
+ petag(_petag),
+ pattrs(_pattrs),
+ pheaders(_pheaders),
+ req(NULL) {}
+
+
+ ~RGWStatRemoteObjCR() override {
+ request_cleanup();
+ }
+
+ void request_cleanup() override {
+ if (req) {
+ req->finish();
+ req = NULL;
+ }
+ }
+
+ int send_request() override {
+ req = new RGWAsyncStatRemoteObj(this, stack->create_completion_notifier(), store, source_zone,
+ bucket_info, key, pmtime, psize, petag, pattrs, pheaders);
+ async_rados->queue(req);
+ return 0;
+ }
+
+ int request_complete() override {
+ return req->get_ret_status();
+ }
+};
+
+class RGWAsyncRemoveObj : public RGWAsyncRadosRequest {
+ RGWRados *store;
+ string source_zone;
+
+ RGWBucketInfo bucket_info;
+
+ rgw_obj_key key;
+ string owner;
+ string owner_display_name;
+ bool versioned;
+ uint64_t versioned_epoch;
+ string marker_version_id;
+
+ bool del_if_older;
+ ceph::real_time timestamp;
+ rgw_zone_set zones_trace;
+
+protected:
+ int _send_request() override;
+public:
+ RGWAsyncRemoveObj(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, RGWRados *_store,
+ const string& _source_zone,
+ RGWBucketInfo& _bucket_info,
+ const rgw_obj_key& _key,
+ const string& _owner,
+ const string& _owner_display_name,
+ bool _versioned,
+ uint64_t _versioned_epoch,
+ bool _delete_marker,
+ bool _if_older,
+ real_time& _timestamp,
+ rgw_zone_set* _zones_trace) : RGWAsyncRadosRequest(caller, cn), store(_store),
+ source_zone(_source_zone),
+ bucket_info(_bucket_info),
+ key(_key),
+ owner(_owner),
+ owner_display_name(_owner_display_name),
+ versioned(_versioned),
+ versioned_epoch(_versioned_epoch),
+ del_if_older(_if_older),
+ timestamp(_timestamp) {
+ if (_delete_marker) {
+ marker_version_id = key.instance;
+ }
+
+ if (_zones_trace) {
+ zones_trace = *_zones_trace;
+ }
+ }
+};
+
+class RGWRemoveObjCR : public RGWSimpleCoroutine {
+ CephContext *cct;
+ RGWAsyncRadosProcessor *async_rados;
+ RGWRados *store;
+ string source_zone;
+
+ RGWBucketInfo bucket_info;
+
+ rgw_obj_key key;
+ bool versioned;
+ uint64_t versioned_epoch;
+ bool delete_marker;
+ string owner;
+ string owner_display_name;
+
+ bool del_if_older;
+ real_time timestamp;
+
+ RGWAsyncRemoveObj *req;
+
+ rgw_zone_set *zones_trace;
+
+public:
+ RGWRemoveObjCR(RGWAsyncRadosProcessor *_async_rados, RGWRados *_store,
+ const string& _source_zone,
+ RGWBucketInfo& _bucket_info,
+ const rgw_obj_key& _key,
+ bool _versioned,
+ uint64_t _versioned_epoch,
+ string *_owner,
+ string *_owner_display_name,
+ bool _delete_marker,
+ real_time *_timestamp,
+ rgw_zone_set *_zones_trace) : RGWSimpleCoroutine(_store->ctx()), cct(_store->ctx()),
+ async_rados(_async_rados), store(_store),
+ source_zone(_source_zone),
+ bucket_info(_bucket_info),
+ key(_key),
+ versioned(_versioned),
+ versioned_epoch(_versioned_epoch),
+ delete_marker(_delete_marker), req(NULL), zones_trace(_zones_trace) {
+ del_if_older = (_timestamp != NULL);
+ if (_timestamp) {
+ timestamp = *_timestamp;
+ }
+
+ if (_owner) {
+ owner = *_owner;
+ }
+
+ if (_owner_display_name) {
+ owner_display_name = *_owner_display_name;
+ }
+ }
+ ~RGWRemoveObjCR() override {
+ request_cleanup();
+ }
+
+ void request_cleanup() override {
+ if (req) {
+ req->finish();
+ req = NULL;
+ }
+ }
+
+ int send_request() override {
+ req = new RGWAsyncRemoveObj(this, stack->create_completion_notifier(), store, source_zone, bucket_info,
+ key, owner, owner_display_name, versioned, versioned_epoch,
+ delete_marker, del_if_older, timestamp, zones_trace);
+ async_rados->queue(req);
+ return 0;
+ }
+
+ int request_complete() override {
+ return req->get_ret_status();
+ }
+};
+
+class RGWContinuousLeaseCR : public RGWCoroutine {
+ RGWAsyncRadosProcessor *async_rados;
+ RGWRados *store;
+
+ const rgw_raw_obj obj;
+
+ const string lock_name;
+ const string cookie;
+
+ int interval;
+
+ Mutex lock;
+ std::atomic<bool> going_down = { false };
+ bool locked{false};
+
+ RGWCoroutine *caller;
+
+ bool aborted{false};
+
+public:
+ RGWContinuousLeaseCR(RGWAsyncRadosProcessor *_async_rados, RGWRados *_store,
+ const rgw_raw_obj& _obj,
+ const string& _lock_name, int _interval, RGWCoroutine *_caller)
+ : RGWCoroutine(_store->ctx()), async_rados(_async_rados), store(_store),
+ obj(_obj), lock_name(_lock_name),
+ cookie(RGWSimpleRadosLockCR::gen_random_cookie(cct)),
+ interval(_interval), lock("RGWContinuousLeaseCR"), caller(_caller)
+ {}
+
+ int operate() override;
+
+ bool is_locked() {
+ Mutex::Locker l(lock);
+ return locked;
+ }
+
+ void set_locked(bool status) {
+ Mutex::Locker l(lock);
+ locked = status;
+ }
+
+ void go_down() {
+ going_down = true;
+ wakeup();
+ }
+
+ void abort() {
+ aborted = true;
+ }
+};
+
+class RGWRadosTimelogAddCR : public RGWSimpleCoroutine {
+ RGWRados *store;
+ list<cls_log_entry> entries;
+
+ string oid;
+
+ boost::intrusive_ptr<RGWAioCompletionNotifier> cn;
+
+public:
+ RGWRadosTimelogAddCR(RGWRados *_store, const string& _oid,
+ const cls_log_entry& entry);
+
+ int send_request() override;
+ int request_complete() override;
+};
+
+class RGWRadosTimelogTrimCR : public RGWSimpleCoroutine {
+ RGWRados *store;
+ boost::intrusive_ptr<RGWAioCompletionNotifier> cn;
+ protected:
+ std::string oid;
+ real_time start_time;
+ real_time end_time;
+ std::string from_marker;
+ std::string to_marker;
+
+ public:
+ RGWRadosTimelogTrimCR(RGWRados *store, const std::string& oid,
+ const real_time& start_time, const real_time& end_time,
+ const std::string& from_marker,
+ const std::string& to_marker);
+
+ int send_request() override;
+ int request_complete() override;
+};
+
+// wrapper to update last_trim_marker on success
+class RGWSyncLogTrimCR : public RGWRadosTimelogTrimCR {
+ CephContext *cct;
+ std::string *last_trim_marker;
+ public:
+ // a marker that compares greater than any timestamp-based index
+ static constexpr const char* max_marker = "99999999";
+
+ RGWSyncLogTrimCR(RGWRados *store, const std::string& oid,
+ const std::string& to_marker, std::string *last_trim_marker);
+ int request_complete() override;
+};
+
+class RGWAsyncStatObj : public RGWAsyncRadosRequest {
+ RGWRados *store;
+ RGWBucketInfo bucket_info;
+ rgw_obj obj;
+ uint64_t *psize;
+ real_time *pmtime;
+ uint64_t *pepoch;
+ RGWObjVersionTracker *objv_tracker;
+protected:
+ int _send_request() override;
+public:
+ RGWAsyncStatObj(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, RGWRados *store,
+ const RGWBucketInfo& _bucket_info, const rgw_obj& obj, uint64_t *psize = nullptr,
+ real_time *pmtime = nullptr, uint64_t *pepoch = nullptr,
+ RGWObjVersionTracker *objv_tracker = nullptr)
+ : RGWAsyncRadosRequest(caller, cn), store(store), obj(obj), psize(psize),
+ pmtime(pmtime), pepoch(pepoch), objv_tracker(objv_tracker) {}
+};
+
+class RGWStatObjCR : public RGWSimpleCoroutine {
+ RGWRados *store;
+ RGWAsyncRadosProcessor *async_rados;
+ RGWBucketInfo bucket_info;
+ rgw_obj obj;
+ uint64_t *psize;
+ real_time *pmtime;
+ uint64_t *pepoch;
+ RGWObjVersionTracker *objv_tracker;
+ RGWAsyncStatObj *req = nullptr;
+ public:
+ RGWStatObjCR(RGWAsyncRadosProcessor *async_rados, RGWRados *store,
+ const RGWBucketInfo& _bucket_info, const rgw_obj& obj, uint64_t *psize = nullptr,
+ real_time* pmtime = nullptr, uint64_t *pepoch = nullptr,
+ RGWObjVersionTracker *objv_tracker = nullptr);
+ ~RGWStatObjCR() override {
+ request_cleanup();
+ }
+ void request_cleanup() override;
+
+ int send_request() override;
+ int request_complete() override;
+};
+
+/// coroutine wrapper for IoCtx::aio_notify()
+class RGWRadosNotifyCR : public RGWSimpleCoroutine {
+ RGWRados *const store;
+ const rgw_raw_obj obj;
+ bufferlist request;
+ const uint64_t timeout_ms;
+ bufferlist *response;
+ rgw_rados_ref ref;
+ boost::intrusive_ptr<RGWAioCompletionNotifier> cn;
+
+public:
+ RGWRadosNotifyCR(RGWRados *store, const rgw_raw_obj& obj,
+ bufferlist& request, uint64_t timeout_ms,
+ bufferlist *response);
+
+ int send_request() override;
+ int request_complete() override;
+};
+
+#endif
diff --git a/src/rgw/rgw_cr_rest.cc b/src/rgw/rgw_cr_rest.cc
new file mode 100644
index 00000000..6a5e38a2
--- /dev/null
+++ b/src/rgw/rgw_cr_rest.cc
@@ -0,0 +1,349 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "rgw_cr_rest.h"
+
+#include "rgw_coroutine.h"
+
+// re-include our assert to clobber the system one; fix dout:
+#include "include/ceph_assert.h"
+
+#include <boost/asio/yield.hpp>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+RGWCRHTTPGetDataCB::RGWCRHTTPGetDataCB(RGWCoroutinesEnv *_env, RGWCoroutine *_cr, RGWHTTPStreamRWRequest *_req) : lock("RGWCRHTTPGetDataCB"), env(_env), cr(_cr), req(_req) {
+ io_id = req->get_io_id(RGWHTTPClient::HTTPCLIENT_IO_READ |RGWHTTPClient::HTTPCLIENT_IO_CONTROL);
+ req->set_in_cb(this);
+}
+
+#define GET_DATA_WINDOW_SIZE 2 * 1024 * 1024
+
+int RGWCRHTTPGetDataCB::handle_data(bufferlist& bl, bool *pause) {
+ if (data.length() < GET_DATA_WINDOW_SIZE / 2) {
+ notified = false;
+ }
+
+ {
+ uint64_t bl_len = bl.length();
+
+ Mutex::Locker l(lock);
+
+ if (!got_all_extra_data) {
+ uint64_t max = extra_data_len - extra_data.length();
+ if (max > bl_len) {
+ max = bl_len;
+ }
+ bl.splice(0, max, &extra_data);
+ bl_len -= max;
+ got_all_extra_data = extra_data.length() == extra_data_len;
+ }
+
+ data.append(bl);
+ }
+
+ uint64_t data_len = data.length();
+ if (data_len >= GET_DATA_WINDOW_SIZE && !notified) {
+ notified = true;
+ env->manager->io_complete(cr, io_id);
+ }
+ if (data_len >= 2 * GET_DATA_WINDOW_SIZE) {
+ *pause = true;
+ paused = true;
+ }
+ return 0;
+}
+
+void RGWCRHTTPGetDataCB::claim_data(bufferlist *dest, uint64_t max) {
+ bool need_to_unpause = false;
+
+ {
+ Mutex::Locker l(lock);
+
+ if (data.length() == 0) {
+ return;
+ }
+
+ if (data.length() < max) {
+ max = data.length();
+ }
+
+ data.splice(0, max, dest);
+ need_to_unpause = (paused && data.length() <= GET_DATA_WINDOW_SIZE);
+ }
+
+ if (need_to_unpause) {
+ req->unpause_receive();
+ }
+}
+
+RGWStreamReadHTTPResourceCRF::~RGWStreamReadHTTPResourceCRF()
+{
+ if (req) {
+ req->cancel();
+ req->wait();
+ delete req;
+ }
+}
+
+int RGWStreamReadHTTPResourceCRF::init()
+{
+ env->stack->init_new_io(req);
+
+ in_cb.emplace(env, caller, req);
+
+ int r = http_manager->add_request(req);
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+int RGWStreamWriteHTTPResourceCRF::send()
+{
+ env->stack->init_new_io(req);
+
+ req->set_write_drain_cb(&write_drain_notify_cb);
+
+ int r = http_manager->add_request(req);
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+bool RGWStreamReadHTTPResourceCRF::has_attrs()
+{
+ return got_attrs;
+}
+
+void RGWStreamReadHTTPResourceCRF::get_attrs(std::map<string, string> *attrs)
+{
+ req->get_out_headers(attrs);
+}
+
+int RGWStreamReadHTTPResourceCRF::decode_rest_obj(map<string, string>& headers, bufferlist& extra_data) {
+ /* basic generic implementation */
+ for (auto header : headers) {
+ const string& val = header.second;
+
+ rest_obj.attrs[header.first] = val;
+ }
+
+ return 0;
+}
+
+int RGWStreamReadHTTPResourceCRF::read(bufferlist *out, uint64_t max_size, bool *io_pending)
+{
+ reenter(&read_state) {
+ io_read_mask = req->get_io_id(RGWHTTPClient::HTTPCLIENT_IO_READ | RGWHTTPClient::HTTPCLIENT_IO_CONTROL);
+ while (!req->is_done() ||
+ in_cb->has_data()) {
+ *io_pending = true;
+ if (!in_cb->has_data()) {
+ yield caller->io_block(0, io_read_mask);
+ }
+ got_attrs = true;
+ if (need_extra_data() && !got_extra_data) {
+ if (!in_cb->has_all_extra_data()) {
+ continue;
+ }
+ extra_data.claim_append(in_cb->get_extra_data());
+ map<string, string> attrs;
+ req->get_out_headers(&attrs);
+ int ret = decode_rest_obj(attrs, extra_data);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: " << __func__ << " decode_rest_obj() returned ret=" << ret << dendl;
+ return ret;
+ }
+ got_extra_data = true;
+ }
+ *io_pending = false;
+ in_cb->claim_data(out, max_size);
+ if (out->length() == 0) {
+ /* this may happen if we just read the prepended extra_data and didn't have any data
+ * after. In that case, retry reading, so that caller doesn't assume it's EOF.
+ */
+ continue;
+ }
+ if (!req->is_done() || out->length() >= max_size) {
+ yield;
+ }
+ }
+ }
+ return 0;
+}
+
+bool RGWStreamReadHTTPResourceCRF::is_done()
+{
+ return req->is_done();
+}
+
+RGWStreamWriteHTTPResourceCRF::~RGWStreamWriteHTTPResourceCRF()
+{
+ if (req) {
+ req->cancel();
+ req->wait();
+ delete req;
+ }
+}
+
+void RGWStreamWriteHTTPResourceCRF::send_ready(const rgw_rest_obj& rest_obj)
+{
+ req->set_send_length(rest_obj.content_len);
+ for (auto h : rest_obj.attrs) {
+ req->append_header(h.first, h.second);
+ }
+}
+
+#define PENDING_WRITES_WINDOW (1 * 1024 * 1024)
+
+void RGWStreamWriteHTTPResourceCRF::write_drain_notify(uint64_t pending_size)
+{
+ lock_guard l(blocked_lock);
+ if (is_blocked && (pending_size < PENDING_WRITES_WINDOW / 2)) {
+ env->manager->io_complete(caller, req->get_io_id(RGWHTTPClient::HTTPCLIENT_IO_WRITE | RGWHTTPClient::HTTPCLIENT_IO_CONTROL));
+ is_blocked = false;
+ }
+}
+
+void RGWStreamWriteHTTPResourceCRF::WriteDrainNotify::notify(uint64_t pending_size)
+{
+ crf->write_drain_notify(pending_size);
+}
+
+int RGWStreamWriteHTTPResourceCRF::write(bufferlist& data, bool *io_pending)
+{
+ reenter(&write_state) {
+ while (!req->is_done()) {
+ *io_pending = false;
+ if (req->get_pending_send_size() >= PENDING_WRITES_WINDOW) {
+ *io_pending = true;
+ {
+ lock_guard l(blocked_lock);
+ is_blocked = true;
+
+ /* it's ok to unlock here, even if io_complete() arrives before io_block(), it'll wakeup
+ * correctly */
+ }
+ yield caller->io_block(0, req->get_io_id(RGWHTTPClient::HTTPCLIENT_IO_WRITE | RGWHTTPClient::HTTPCLIENT_IO_CONTROL));
+ }
+ yield req->add_send_data(data);
+ }
+ return req->get_status();
+ }
+ return 0;
+}
+
+int RGWStreamWriteHTTPResourceCRF::drain_writes(bool *need_retry)
+{
+ reenter(&drain_state) {
+ *need_retry = true;
+ yield req->finish_write();
+ *need_retry = !req->is_done();
+ while (!req->is_done()) {
+ yield caller->io_block(0, req->get_io_id(RGWHTTPClient::HTTPCLIENT_IO_CONTROL));
+ *need_retry = !req->is_done();
+ }
+
+ map<string, string> headers;
+ req->get_out_headers(&headers);
+ handle_headers(headers);
+
+ return req->get_req_retcode();
+ }
+ return 0;
+}
+
+RGWStreamSpliceCR::RGWStreamSpliceCR(CephContext *_cct, RGWHTTPManager *_mgr,
+ shared_ptr<RGWStreamReadHTTPResourceCRF>& _in_crf,
+ shared_ptr<RGWStreamWriteHTTPResourceCRF>& _out_crf) : RGWCoroutine(_cct), cct(_cct), http_manager(_mgr),
+ in_crf(_in_crf), out_crf(_out_crf) {}
+RGWStreamSpliceCR::~RGWStreamSpliceCR() { }
+
+int RGWStreamSpliceCR::operate() {
+ reenter(this) {
+ {
+ int ret = in_crf->init();
+ if (ret < 0) {
+ return set_cr_error(ret);
+ }
+ }
+
+ do {
+
+ bl.clear();
+
+ do {
+ yield {
+ ret = in_crf->read(&bl, 4 * 1024 * 1024, &need_retry);
+ if (ret < 0) {
+ return set_cr_error(ret);
+ }
+ }
+
+ if (retcode < 0) {
+ ldout(cct, 20) << __func__ << ": in_crf->read() retcode=" << retcode << dendl;
+ return set_cr_error(ret);
+ }
+ } while (need_retry);
+
+ ldout(cct, 20) << "read " << bl.length() << " bytes" << dendl;
+
+ if (!in_crf->has_attrs()) {
+ assert (bl.length() == 0);
+ continue;
+ }
+
+ if (!sent_attrs) {
+ int ret = out_crf->init();
+ if (ret < 0) {
+ return set_cr_error(ret);
+ }
+ out_crf->send_ready(in_crf->get_rest_obj());
+ ret = out_crf->send();
+ if (ret < 0) {
+ return set_cr_error(ret);
+ }
+ sent_attrs = true;
+ }
+
+ if (bl.length() == 0 && in_crf->is_done()) {
+ break;
+ }
+
+ total_read += bl.length();
+
+ do {
+ yield {
+ ldout(cct, 20) << "writing " << bl.length() << " bytes" << dendl;
+ ret = out_crf->write(bl, &need_retry);
+ if (ret < 0) {
+ return set_cr_error(ret);
+ }
+ }
+
+ if (retcode < 0) {
+ ldout(cct, 20) << __func__ << ": out_crf->write() retcode=" << retcode << dendl;
+ return set_cr_error(ret);
+ }
+ } while (need_retry);
+ } while (true);
+
+ do {
+ yield {
+ int ret = out_crf->drain_writes(&need_retry);
+ if (ret < 0) {
+ return set_cr_error(ret);
+ }
+ }
+ } while (need_retry);
+
+ return set_cr_done();
+ }
+ return 0;
+}
+
diff --git a/src/rgw/rgw_cr_rest.h b/src/rgw/rgw_cr_rest.h
new file mode 100644
index 00000000..a73828b3
--- /dev/null
+++ b/src/rgw/rgw_cr_rest.h
@@ -0,0 +1,593 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_CR_REST_H
+#define CEPH_RGW_CR_REST_H
+
+#include <boost/intrusive_ptr.hpp>
+#include <mutex>
+#include "include/ceph_assert.h" // boost header clobbers our assert.h
+
+#include "rgw_coroutine.h"
+#include "rgw_rest_conn.h"
+
+
+struct rgw_rest_obj {
+ rgw_obj_key key;
+ uint64_t content_len;
+ std::map<string, string> attrs;
+ std::map<string, string> custom_attrs;
+ RGWAccessControlPolicy acls;
+
+ void init(const rgw_obj_key& _key) {
+ key = _key;
+ }
+};
+
+class RGWReadRawRESTResourceCR : public RGWSimpleCoroutine {
+ bufferlist *result;
+ protected:
+ RGWRESTConn *conn;
+ RGWHTTPManager *http_manager;
+ string path;
+ param_vec_t params;
+ param_vec_t extra_headers;
+public:
+ boost::intrusive_ptr<RGWRESTReadResource> http_op;
+ RGWReadRawRESTResourceCR(CephContext *_cct, RGWRESTConn *_conn,
+ RGWHTTPManager *_http_manager, const string& _path,
+ rgw_http_param_pair *params, bufferlist *_result)
+ : RGWSimpleCoroutine(_cct), result(_result), conn(_conn), http_manager(_http_manager),
+ path(_path), params(make_param_list(params))
+ {}
+
+ RGWReadRawRESTResourceCR(CephContext *_cct, RGWRESTConn *_conn,
+ RGWHTTPManager *_http_manager, const string& _path,
+ rgw_http_param_pair *params)
+ : RGWSimpleCoroutine(_cct), conn(_conn), http_manager(_http_manager),
+ path(_path), params(make_param_list(params))
+ {}
+
+ RGWReadRawRESTResourceCR(CephContext *_cct, RGWRESTConn *_conn,
+ RGWHTTPManager *_http_manager, const string& _path,
+ rgw_http_param_pair *params, param_vec_t &hdrs)
+ : RGWSimpleCoroutine(_cct), conn(_conn), http_manager(_http_manager),
+ path(_path), params(make_param_list(params)),
+ extra_headers(hdrs)
+ {}
+
+ RGWReadRawRESTResourceCR(CephContext *_cct, RGWRESTConn *_conn,
+ RGWHTTPManager *_http_manager, const string& _path,
+ rgw_http_param_pair *params,
+ std::map <std::string, std::string> *hdrs)
+ : RGWSimpleCoroutine(_cct), conn(_conn), http_manager(_http_manager),
+ path(_path), params(make_param_list(params)),
+ extra_headers(make_param_list(hdrs))
+ {}
+
+
+ ~RGWReadRawRESTResourceCR() override {
+ request_cleanup();
+ }
+
+ int send_request() override {
+ auto op = boost::intrusive_ptr<RGWRESTReadResource>(
+ new RGWRESTReadResource(conn, path, params, &extra_headers, http_manager));
+
+ init_new_io(op.get());
+
+ int ret = op->aio_read();
+ if (ret < 0) {
+ log_error() << "failed to send http operation: " << op->to_str()
+ << " ret=" << ret << std::endl;
+ op->put();
+ return ret;
+ }
+ std::swap(http_op, op); // store reference in http_op on success
+ return 0;
+ }
+
+
+
+ virtual int wait_result() {
+ return http_op->wait(result);
+ }
+
+ int request_complete() override {
+ int ret;
+
+ ret = wait_result();
+
+ auto op = std::move(http_op); // release ref on return
+ if (ret < 0) {
+ error_stream << "http operation failed: " << op->to_str()
+ << " status=" << op->get_http_status() << std::endl;
+ op->put();
+ return ret;
+ }
+ op->put();
+ return 0;
+ }
+
+ void request_cleanup() override {
+ if (http_op) {
+ http_op->put();
+ http_op = NULL;
+ }
+ }
+
+};
+
+
+template <class T>
+class RGWReadRESTResourceCR : public RGWReadRawRESTResourceCR {
+ T *result;
+ public:
+ RGWReadRESTResourceCR(CephContext *_cct, RGWRESTConn *_conn,
+ RGWHTTPManager *_http_manager, const string& _path,
+ rgw_http_param_pair *params, T *_result)
+ : RGWReadRawRESTResourceCR(_cct, _conn, _http_manager, _path, params), result(_result)
+ {}
+
+ RGWReadRESTResourceCR(CephContext *_cct, RGWRESTConn *_conn,
+ RGWHTTPManager *_http_manager, const string& _path,
+ rgw_http_param_pair *params,
+ std::map <std::string, std::string> *hdrs,
+ T *_result)
+ : RGWReadRawRESTResourceCR(_cct, _conn, _http_manager, _path, params, hdrs), result(_result)
+ {}
+
+ int wait_result() override {
+ return http_op->wait(result);
+ }
+
+};
+
+template <class T, class E = int>
+class RGWSendRawRESTResourceCR: public RGWSimpleCoroutine {
+ protected:
+ RGWRESTConn *conn;
+ RGWHTTPManager *http_manager;
+ string method;
+ string path;
+ param_vec_t params;
+ param_vec_t headers;
+ map<string, string> *attrs;
+ T *result;
+ E *err_result;
+ bufferlist input_bl;
+ bool send_content_length=false;
+ boost::intrusive_ptr<RGWRESTSendResource> http_op;
+
+ public:
+ RGWSendRawRESTResourceCR(CephContext *_cct, RGWRESTConn *_conn,
+ RGWHTTPManager *_http_manager,
+ const string& _method, const string& _path,
+ rgw_http_param_pair *_params,
+ map<string, string> *_attrs,
+ bufferlist& _input, T *_result,
+ bool _send_content_length,
+ E *_err_result = nullptr)
+ : RGWSimpleCoroutine(_cct), conn(_conn), http_manager(_http_manager),
+ method(_method), path(_path), params(make_param_list(_params)),
+ headers(make_param_list(_attrs)), attrs(_attrs),
+ result(_result), err_result(_err_result),
+ input_bl(_input), send_content_length(_send_content_length) {}
+
+ RGWSendRawRESTResourceCR(CephContext *_cct, RGWRESTConn *_conn,
+ RGWHTTPManager *_http_manager,
+ const string& _method, const string& _path,
+ rgw_http_param_pair *_params, map<string, string> *_attrs,
+ T *_result, E *_err_result = nullptr)
+ : RGWSimpleCoroutine(_cct), conn(_conn), http_manager(_http_manager),
+ method(_method), path(_path), params(make_param_list(_params)), headers(make_param_list(_attrs)), attrs(_attrs), result(_result),
+ err_result(_err_result) {}
+
+ ~RGWSendRawRESTResourceCR() override {
+ request_cleanup();
+ }
+
+ int send_request() override {
+ auto op = boost::intrusive_ptr<RGWRESTSendResource>(
+ new RGWRESTSendResource(conn, method, path, params, &headers, http_manager));
+
+ init_new_io(op.get());
+
+ int ret = op->aio_send(input_bl);
+ if (ret < 0) {
+ lsubdout(cct, rgw, 0) << "ERROR: failed to send request" << dendl;
+ op->put();
+ return ret;
+ }
+ std::swap(http_op, op); // store reference in http_op on success
+ return 0;
+ }
+
+ int request_complete() override {
+ int ret;
+ if (result || err_result) {
+ ret = http_op->wait(result, err_result);
+ } else {
+ bufferlist bl;
+ ret = http_op->wait(&bl);
+ }
+ auto op = std::move(http_op); // release ref on return
+ if (ret < 0) {
+ error_stream << "http operation failed: " << op->to_str()
+ << " status=" << op->get_http_status() << std::endl;
+ lsubdout(cct, rgw, 5) << "failed to wait for op, ret=" << ret
+ << ": " << op->to_str() << dendl;
+ op->put();
+ return ret;
+ }
+ op->put();
+ return 0;
+ }
+
+ void request_cleanup() override {
+ if (http_op) {
+ http_op->put();
+ http_op = NULL;
+ }
+ }
+};
+
+template <class S, class T, class E = int>
+class RGWSendRESTResourceCR : public RGWSendRawRESTResourceCR<T, E> {
+ public:
+ RGWSendRESTResourceCR(CephContext *_cct, RGWRESTConn *_conn,
+ RGWHTTPManager *_http_manager,
+ const string& _method, const string& _path,
+ rgw_http_param_pair *_params, map<string, string> *_attrs,
+ S& _input, T *_result, E *_err_result = nullptr)
+ : RGWSendRawRESTResourceCR<T, E>(_cct, _conn, _http_manager, _method, _path, _params, _attrs, _result, _err_result) {
+
+ JSONFormatter jf;
+ encode_json("data", _input, &jf);
+ std::stringstream ss;
+ jf.flush(ss);
+ //bufferlist bl;
+ this->input_bl.append(ss.str());
+ }
+
+};
+
+template <class S, class T, class E = int>
+class RGWPostRESTResourceCR : public RGWSendRESTResourceCR<S, T, E> {
+public:
+ RGWPostRESTResourceCR(CephContext *_cct, RGWRESTConn *_conn,
+ RGWHTTPManager *_http_manager,
+ const string& _path,
+ rgw_http_param_pair *_params, S& _input,
+ T *_result, E *_err_result = nullptr)
+ : RGWSendRESTResourceCR<S, T, E>(_cct, _conn, _http_manager,
+ "POST", _path,
+ _params, nullptr, _input,
+ _result, _err_result) {}
+};
+
+template <class T, class E = int>
+class RGWPutRawRESTResourceCR: public RGWSendRawRESTResourceCR <T, E> {
+ public:
+ RGWPutRawRESTResourceCR(CephContext *_cct, RGWRESTConn *_conn,
+ RGWHTTPManager *_http_manager,
+ const string& _path,
+ rgw_http_param_pair *_params, bufferlist& _input,
+ T *_result, E *_err_result = nullptr)
+ : RGWSendRawRESTResourceCR<T, E>(_cct, _conn, _http_manager, "PUT", _path,
+ _params, nullptr, _input, _result, true, _err_result) {}
+
+};
+
+template <class T, class E = int>
+class RGWPostRawRESTResourceCR: public RGWSendRawRESTResourceCR <T, E> {
+ public:
+ RGWPostRawRESTResourceCR(CephContext *_cct, RGWRESTConn *_conn,
+ RGWHTTPManager *_http_manager,
+ const string& _path,
+ rgw_http_param_pair *_params,
+ map<string, string> * _attrs,
+ bufferlist& _input,
+ T *_result, E *_err_result = nullptr)
+ : RGWSendRawRESTResourceCR<T, E>(_cct, _conn, _http_manager, "POST", _path,
+ _params, _attrs, _input, _result, true, _err_result) {}
+
+};
+
+
+template <class S, class T, class E = int>
+class RGWPutRESTResourceCR : public RGWSendRESTResourceCR<S, T, E> {
+public:
+ RGWPutRESTResourceCR(CephContext *_cct, RGWRESTConn *_conn,
+ RGWHTTPManager *_http_manager,
+ const string& _path,
+ rgw_http_param_pair *_params, S& _input,
+ T *_result, E *_err_result = nullptr)
+ : RGWSendRESTResourceCR<S, T, E>(_cct, _conn, _http_manager,
+ "PUT", _path,
+ _params, nullptr, _input,
+ _result, _err_result) {}
+
+ RGWPutRESTResourceCR(CephContext *_cct, RGWRESTConn *_conn,
+ RGWHTTPManager *_http_manager,
+ const string& _path,
+ rgw_http_param_pair *_params,
+ map <string, string> *_attrs,
+ S& _input, T *_result, E *_err_result = nullptr)
+ : RGWSendRESTResourceCR<S, T, E>(_cct, _conn, _http_manager,
+ "PUT", _path,
+ _params, _attrs, _input,
+ _result, _err_result) {}
+
+};
+
+class RGWDeleteRESTResourceCR : public RGWSimpleCoroutine {
+ RGWRESTConn *conn;
+ RGWHTTPManager *http_manager;
+ string path;
+ param_vec_t params;
+
+ boost::intrusive_ptr<RGWRESTDeleteResource> http_op;
+
+public:
+ RGWDeleteRESTResourceCR(CephContext *_cct, RGWRESTConn *_conn,
+ RGWHTTPManager *_http_manager,
+ const string& _path,
+ rgw_http_param_pair *_params)
+ : RGWSimpleCoroutine(_cct), conn(_conn), http_manager(_http_manager),
+ path(_path), params(make_param_list(_params))
+ {}
+
+ ~RGWDeleteRESTResourceCR() override {
+ request_cleanup();
+ }
+
+ int send_request() override {
+ auto op = boost::intrusive_ptr<RGWRESTDeleteResource>(
+ new RGWRESTDeleteResource(conn, path, params, nullptr, http_manager));
+
+ init_new_io(op.get());
+
+ bufferlist bl;
+
+ int ret = op->aio_send(bl);
+ if (ret < 0) {
+ lsubdout(cct, rgw, 0) << "ERROR: failed to send DELETE request" << dendl;
+ op->put();
+ return ret;
+ }
+ std::swap(http_op, op); // store reference in http_op on success
+ return 0;
+ }
+
+ int request_complete() override {
+ int ret;
+ bufferlist bl;
+ ret = http_op->wait(&bl);
+ auto op = std::move(http_op); // release ref on return
+ if (ret < 0) {
+ error_stream << "http operation failed: " << op->to_str()
+ << " status=" << op->get_http_status() << std::endl;
+ lsubdout(cct, rgw, 5) << "failed to wait for op, ret=" << ret
+ << ": " << op->to_str() << dendl;
+ op->put();
+ return ret;
+ }
+ op->put();
+ return 0;
+ }
+
+ void request_cleanup() override {
+ if (http_op) {
+ http_op->put();
+ http_op = NULL;
+ }
+ }
+};
+
+class RGWCRHTTPGetDataCB : public RGWHTTPStreamRWRequest::ReceiveCB {
+ Mutex lock;
+ RGWCoroutinesEnv *env;
+ RGWCoroutine *cr;
+ RGWHTTPStreamRWRequest *req;
+ rgw_io_id io_id;
+ bufferlist data;
+ bufferlist extra_data;
+ bool got_all_extra_data{false};
+ bool paused{false};
+ bool notified{false};
+public:
+ RGWCRHTTPGetDataCB(RGWCoroutinesEnv *_env, RGWCoroutine *_cr, RGWHTTPStreamRWRequest *_req);
+
+ int handle_data(bufferlist& bl, bool *pause) override;
+
+ void claim_data(bufferlist *dest, uint64_t max);
+
+ bufferlist& get_extra_data() {
+ return extra_data;
+ }
+
+ bool has_data() {
+ return (data.length() > 0);
+ }
+
+ bool has_all_extra_data() {
+ return got_all_extra_data;
+ }
+};
+
+
+class RGWStreamReadResourceCRF {
+protected:
+ boost::asio::coroutine read_state;
+
+public:
+ virtual int init() = 0;
+ virtual int read(bufferlist *data, uint64_t max, bool *need_retry) = 0; /* reentrant */
+ virtual int decode_rest_obj(map<string, string>& headers, bufferlist& extra_data) = 0;
+ virtual bool has_attrs() = 0;
+ virtual void get_attrs(std::map<string, string> *attrs) = 0;
+ virtual ~RGWStreamReadResourceCRF() = default;
+};
+
+class RGWStreamWriteResourceCRF {
+protected:
+ boost::asio::coroutine write_state;
+ boost::asio::coroutine drain_state;
+
+public:
+ virtual int init() = 0;
+ virtual void send_ready(const rgw_rest_obj& rest_obj) = 0;
+ virtual int send() = 0;
+ virtual int write(bufferlist& data, bool *need_retry) = 0; /* reentrant */
+ virtual int drain_writes(bool *need_retry) = 0; /* reentrant */
+
+ virtual ~RGWStreamWriteResourceCRF() = default;
+};
+
+class RGWStreamReadHTTPResourceCRF : public RGWStreamReadResourceCRF {
+ CephContext *cct;
+ RGWCoroutinesEnv *env;
+ RGWCoroutine *caller;
+ RGWHTTPManager *http_manager;
+
+ RGWHTTPStreamRWRequest *req{nullptr};
+
+ std::optional<RGWCRHTTPGetDataCB> in_cb;
+
+ bufferlist extra_data;
+
+ bool got_attrs{false};
+ bool got_extra_data{false};
+
+ rgw_io_id io_read_mask;
+
+protected:
+ rgw_rest_obj rest_obj;
+
+ struct range_info {
+ bool is_set{false};
+ uint64_t ofs;
+ uint64_t size;
+ } range;
+
+ ceph::real_time mtime;
+ string etag;
+
+public:
+ RGWStreamReadHTTPResourceCRF(CephContext *_cct,
+ RGWCoroutinesEnv *_env,
+ RGWCoroutine *_caller,
+ RGWHTTPManager *_http_manager,
+ const rgw_obj_key& _src_key) : cct(_cct),
+ env(_env),
+ caller(_caller),
+ http_manager(_http_manager) {
+ rest_obj.init(_src_key);
+ }
+ ~RGWStreamReadHTTPResourceCRF();
+
+ int init() override;
+ int read(bufferlist *data, uint64_t max, bool *need_retry) override; /* reentrant */
+ int decode_rest_obj(map<string, string>& headers, bufferlist& extra_data) override;
+ bool has_attrs() override;
+ void get_attrs(std::map<string, string> *attrs) override;
+ bool is_done();
+ virtual bool need_extra_data() { return false; }
+
+ void set_req(RGWHTTPStreamRWRequest *r) {
+ req = r;
+ }
+
+ rgw_rest_obj& get_rest_obj() {
+ return rest_obj;
+ }
+
+ void set_range(uint64_t ofs, uint64_t size) {
+ range.is_set = true;
+ range.ofs = ofs;
+ range.size = size;
+ }
+};
+
+class RGWStreamWriteHTTPResourceCRF : public RGWStreamWriteResourceCRF {
+protected:
+ RGWCoroutinesEnv *env;
+ RGWCoroutine *caller;
+ RGWHTTPManager *http_manager;
+
+ using lock_guard = std::lock_guard<std::mutex>;
+
+ std::mutex blocked_lock;
+ bool is_blocked;
+
+ RGWHTTPStreamRWRequest *req{nullptr};
+
+ struct multipart_info {
+ bool is_multipart{false};
+ string upload_id;
+ int part_num{0};
+ uint64_t part_size;
+ } multipart;
+
+ class WriteDrainNotify : public RGWWriteDrainCB {
+ RGWStreamWriteHTTPResourceCRF *crf;
+ public:
+ explicit WriteDrainNotify(RGWStreamWriteHTTPResourceCRF *_crf) : crf(_crf) {}
+ void notify(uint64_t pending_size) override;
+ } write_drain_notify_cb;
+
+public:
+ RGWStreamWriteHTTPResourceCRF(CephContext *_cct,
+ RGWCoroutinesEnv *_env,
+ RGWCoroutine *_caller,
+ RGWHTTPManager *_http_manager) : env(_env),
+ caller(_caller),
+ http_manager(_http_manager),
+ write_drain_notify_cb(this) {}
+ virtual ~RGWStreamWriteHTTPResourceCRF();
+
+ int init() override {
+ return 0;
+ }
+ void send_ready(const rgw_rest_obj& rest_obj) override;
+ int send() override;
+ int write(bufferlist& data, bool *need_retry) override; /* reentrant */
+ void write_drain_notify(uint64_t pending_size);
+ int drain_writes(bool *need_retry) override; /* reentrant */
+
+ virtual void handle_headers(const std::map<string, string>& headers) {}
+
+ void set_req(RGWHTTPStreamRWRequest *r) {
+ req = r;
+ }
+
+ void set_multipart(const string& upload_id, int part_num, uint64_t part_size) {
+ multipart.is_multipart = true;
+ multipart.upload_id = upload_id;
+ multipart.part_num = part_num;
+ multipart.part_size = part_size;
+ }
+};
+
+class RGWStreamSpliceCR : public RGWCoroutine {
+ CephContext *cct;
+ RGWHTTPManager *http_manager;
+ string url;
+ std::shared_ptr<RGWStreamReadHTTPResourceCRF> in_crf;
+ std::shared_ptr<RGWStreamWriteHTTPResourceCRF> out_crf;
+ bufferlist bl;
+ bool need_retry{false};
+ bool sent_attrs{false};
+ uint64_t total_read{0};
+ int ret{0};
+public:
+ RGWStreamSpliceCR(CephContext *_cct, RGWHTTPManager *_mgr,
+ std::shared_ptr<RGWStreamReadHTTPResourceCRF>& _in_crf,
+ std::shared_ptr<RGWStreamWriteHTTPResourceCRF>& _out_crf);
+ ~RGWStreamSpliceCR();
+
+ int operate() override;
+};
+
+#endif
diff --git a/src/rgw/rgw_cr_tools.cc b/src/rgw/rgw_cr_tools.cc
new file mode 100644
index 00000000..85654cb7
--- /dev/null
+++ b/src/rgw/rgw_cr_tools.cc
@@ -0,0 +1,275 @@
+#include "common/errno.h"
+
+#include "rgw_cr_tools.h"
+#include "rgw_bucket.h"
+#include "rgw_user.h"
+#include "rgw_op.h"
+#include "rgw_acl_s3.h"
+#include "rgw_zone.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+template<>
+int RGWUserCreateCR::Request::_send_request()
+{
+ CephContext *cct = store->ctx();
+
+ const int32_t default_max_buckets =
+ cct->_conf.get_val<int64_t>("rgw_user_max_buckets");
+
+ RGWUserAdminOpState op_state;
+
+ auto& user = params.user;
+
+ op_state.set_user_id(user);
+ op_state.set_display_name(params.display_name);
+ op_state.set_user_email(params.email);
+ op_state.set_caps(params.caps);
+ op_state.set_access_key(params.access_key);
+ op_state.set_secret_key(params.secret_key);
+
+ if (!params.key_type.empty()) {
+ int32_t key_type = KEY_TYPE_S3;
+ if (params.key_type == "swift") {
+ key_type = KEY_TYPE_SWIFT;
+ }
+
+ op_state.set_key_type(key_type);
+ }
+
+ op_state.set_max_buckets(params.max_buckets.value_or(default_max_buckets));
+ op_state.set_suspension(params.suspended);
+ op_state.set_system(params.system);
+ op_state.set_exclusive(params.exclusive);
+
+ if (params.generate_key) {
+ op_state.set_generate_key();
+ }
+
+
+ if (params.apply_quota) {
+ RGWQuotaInfo bucket_quota;
+ RGWQuotaInfo user_quota;
+
+ if (cct->_conf->rgw_bucket_default_quota_max_objects >= 0) {
+ bucket_quota.max_objects = cct->_conf->rgw_bucket_default_quota_max_objects;
+ bucket_quota.enabled = true;
+ }
+
+ if (cct->_conf->rgw_bucket_default_quota_max_size >= 0) {
+ bucket_quota.max_size = cct->_conf->rgw_bucket_default_quota_max_size;
+ bucket_quota.enabled = true;
+ }
+
+ if (cct->_conf->rgw_user_default_quota_max_objects >= 0) {
+ user_quota.max_objects = cct->_conf->rgw_user_default_quota_max_objects;
+ user_quota.enabled = true;
+ }
+
+ if (cct->_conf->rgw_user_default_quota_max_size >= 0) {
+ user_quota.max_size = cct->_conf->rgw_user_default_quota_max_size;
+ user_quota.enabled = true;
+ }
+
+ if (bucket_quota.enabled) {
+ op_state.set_bucket_quota(bucket_quota);
+ }
+
+ if (user_quota.enabled) {
+ op_state.set_user_quota(user_quota);
+ }
+ }
+
+ RGWNullFlusher flusher;
+ return RGWUserAdminOp_User::create(store, op_state, flusher);
+}
+
+template<>
+int RGWGetUserInfoCR::Request::_send_request()
+{
+ return rgw_get_user_info_by_uid(store, params.user, *result);
+}
+
+template<>
+int RGWGetBucketInfoCR::Request::_send_request()
+{
+ RGWSysObjectCtx obj_ctx(store->svc.sysobj->init_obj_ctx());
+ return store->get_bucket_info(obj_ctx, params.tenant, params.bucket_name,
+ result->bucket_info, &result->mtime, &result->attrs);
+}
+
+template<>
+int RGWBucketCreateLocalCR::Request::_send_request()
+{
+ CephContext *cct = store->ctx();
+ auto& zone_svc = store->svc.zone;
+ auto& sysobj_svc = store->svc.sysobj;
+
+ const auto& user_info = params.user_info.get();
+ const auto& user = user_info->user_id;
+ const auto& bucket_name = params.bucket_name;
+ auto& placement_rule = params.placement_rule;
+
+ if (!placement_rule.empty() &&
+ !zone_svc->get_zone_params().valid_placement(placement_rule)) {
+ ldout(cct, 0) << "placement target (" << placement_rule << ")"
+ << " doesn't exist in the placement targets of zonegroup"
+ << " (" << zone_svc->get_zonegroup().api_name << ")" << dendl;
+ return -ERR_INVALID_LOCATION_CONSTRAINT;
+ }
+
+ /* we need to make sure we read bucket info, it's not read before for this
+ * specific request */
+ RGWSysObjectCtx sysobj_ctx(sysobj_svc->init_obj_ctx());
+ RGWBucketInfo bucket_info;
+ map<string, bufferlist> bucket_attrs;
+
+ int ret = store->get_bucket_info(sysobj_ctx, user.tenant, bucket_name,
+ bucket_info, nullptr, &bucket_attrs);
+ if (ret < 0 && ret != -ENOENT)
+ return ret;
+ bool bucket_exists = (ret != -ENOENT);
+
+ RGWAccessControlPolicy old_policy(cct);
+ ACLOwner bucket_owner;
+ bucket_owner.set_id(user);
+ bucket_owner.set_name(user_info->display_name);
+ if (bucket_exists) {
+ ret = rgw_op_get_bucket_policy_from_attr(cct, store, bucket_info,
+ bucket_attrs, &old_policy);
+ if (ret >= 0) {
+ if (old_policy.get_owner().get_id().compare(user) != 0) {
+ return -EEXIST;
+ }
+ }
+ }
+
+ RGWBucketInfo master_info;
+ rgw_bucket *pmaster_bucket = nullptr;
+ uint32_t *pmaster_num_shards = nullptr;
+ real_time creation_time;
+
+ string zonegroup_id = zone_svc->get_zonegroup().get_id();
+
+ if (bucket_exists) {
+ rgw_placement_rule selected_placement_rule;
+ rgw_bucket bucket;
+ bucket.tenant = user.tenant;
+ bucket.name = bucket_name;
+ ret = zone_svc->select_bucket_placement(*user_info, zonegroup_id,
+ placement_rule,
+ &selected_placement_rule, nullptr);
+ if (selected_placement_rule != bucket_info.placement_rule) {
+ ldout(cct, 0) << "bucket already exists on a different placement rule: "
+ << " selected_rule= " << selected_placement_rule
+ << " existing_rule= " << bucket_info.placement_rule << dendl;
+ return -EEXIST;
+ }
+ }
+
+ /* Encode special metadata first as we're using std::map::emplace under
+ * the hood. This method will add the new items only if the map doesn't
+ * contain such keys yet. */
+ RGWAccessControlPolicy_S3 policy(cct);
+ policy.create_canned(bucket_owner, bucket_owner, string()); /* default private policy */
+ bufferlist aclbl;
+ policy.encode(aclbl);
+ map<string, buffer::list> attrs;
+ attrs.emplace(std::move(RGW_ATTR_ACL), std::move(aclbl));
+
+ RGWQuotaInfo quota_info;
+ const RGWQuotaInfo * pquota_info = nullptr;
+
+ rgw_bucket bucket;
+ bucket.tenant = user.tenant;
+ bucket.name = bucket_name;
+
+ RGWBucketInfo info;
+ obj_version ep_objv;
+
+ ret = store->create_bucket(*user_info, bucket, zonegroup_id,
+ placement_rule, bucket_info.swift_ver_location,
+ pquota_info, attrs,
+ info, nullptr, &ep_objv, creation_time,
+ pmaster_bucket, pmaster_num_shards, true);
+
+
+ if (ret && ret != -EEXIST)
+ return ret;
+
+ bool existed = (ret == -EEXIST);
+
+ if (existed) {
+ if (info.owner != user) {
+ ldout(cct, 20) << "NOTICE: bucket already exists under a different user (bucket=" << bucket << " user=" << user << " bucket_owner=" << info.owner << dendl;
+ return -EEXIST;
+ }
+ bucket = info.bucket;
+ }
+
+ ret = rgw_link_bucket(store, user, bucket,
+ info.creation_time, false);
+ if (ret && !existed && ret != -EEXIST) {
+ /* if it exists (or previously existed), don't remove it! */
+ int r = rgw_unlink_bucket(store, user, bucket.tenant, bucket.name);
+ if (r < 0) {
+ ldout(cct, 0) << "WARNING: failed to unlink bucket: ret=" << r << dendl;
+ }
+ } else if (ret == -EEXIST || (ret == 0 && existed)) {
+ ret = -ERR_BUCKET_EXISTS;
+ }
+
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: bucket creation (bucket=" << bucket << ") return ret=" << ret << dendl;
+ }
+
+ return ret;
+}
+
+template<>
+int RGWObjectSimplePutCR::Request::_send_request()
+{
+ RGWDataAccess::ObjectRef obj;
+
+ CephContext *cct = store->ctx();
+
+ int ret = params.bucket->get_object(params.key, &obj);
+ if (ret < 0) {
+ lderr(cct) << "ERROR: failed to get object: " << cpp_strerror(-ret) << dendl;
+ return -ret;
+ }
+
+ if (params.user_data) {
+ obj->set_user_data(*params.user_data);
+ }
+
+ ret = obj->put(params.data, params.attrs);
+ if (ret < 0) {
+ lderr(cct) << "ERROR: put object returned error: " << cpp_strerror(-ret) << dendl;
+ }
+
+ return 0;
+}
+
+template<>
+int RGWBucketLifecycleConfigCR::Request::_send_request()
+{
+ CephContext *cct = store->ctx();
+
+ RGWLC *lc = store->get_lc();
+ if (!lc) {
+ lderr(cct) << "ERROR: lifecycle object is not initialized!" << dendl;
+ return -EIO;
+ }
+
+ int ret = lc->set_bucket_config(params.bucket_info,
+ params.bucket_attrs,
+ &params.config);
+ if (ret < 0) {
+ lderr(cct) << "ERROR: failed to set lifecycle on bucke: " << cpp_strerror(-ret) << dendl;
+ return -ret;
+ }
+
+ return 0;
+}
diff --git a/src/rgw/rgw_cr_tools.h b/src/rgw/rgw_cr_tools.h
new file mode 100644
index 00000000..24e9d7a8
--- /dev/null
+++ b/src/rgw/rgw_cr_tools.h
@@ -0,0 +1,75 @@
+#ifndef CEPH_RGW_CR_TOOLS_H
+#define CEPH_RGW_CR_TOOLS_H
+
+#include "rgw_cr_rados.h"
+#include "rgw_tools.h"
+#include "rgw_lc.h"
+
+
+struct rgw_user_create_params {
+ rgw_user user;
+ std::string display_name;
+ std::string email;
+ std::string access_key;
+ std::string secret_key;
+ std::string key_type; /* "swift" or "s3" */
+ std::string caps;
+
+ bool generate_key{true};
+ bool suspended{false};
+ std::optional<int32_t> max_buckets;
+ bool system{false};
+ bool exclusive{false};
+ bool apply_quota{true};
+};
+
+using RGWUserCreateCR = RGWSimpleWriteOnlyAsyncCR<rgw_user_create_params>;
+
+struct rgw_get_user_info_params {
+ rgw_user user;
+};
+
+using RGWGetUserInfoCR = RGWSimpleAsyncCR<rgw_get_user_info_params, RGWUserInfo>;
+
+struct rgw_get_bucket_info_params {
+ string tenant;
+ string bucket_name;
+};
+
+struct rgw_get_bucket_info_result {
+ ceph::real_time mtime;
+ RGWBucketInfo bucket_info;
+ map<string, bufferlist> attrs;
+};
+
+using RGWGetBucketInfoCR = RGWSimpleAsyncCR<rgw_get_bucket_info_params, rgw_get_bucket_info_result>;
+
+struct rgw_bucket_create_local_params {
+ shared_ptr<RGWUserInfo> user_info;
+ std::string bucket_name;
+ rgw_placement_rule placement_rule;
+};
+
+using RGWBucketCreateLocalCR = RGWSimpleWriteOnlyAsyncCR<rgw_bucket_create_local_params>;
+
+struct rgw_object_simple_put_params {
+ RGWDataAccess::BucketRef bucket;
+ rgw_obj_key key;
+ bufferlist data;
+ map<string, bufferlist> attrs;
+ std::optional<string> user_data;
+};
+
+using RGWObjectSimplePutCR = RGWSimpleWriteOnlyAsyncCR<rgw_object_simple_put_params>;
+
+
+struct rgw_bucket_lifecycle_config_params {
+ RGWBucketInfo bucket_info;
+ map<string, bufferlist> bucket_attrs;
+ RGWLifecycleConfiguration config;
+};
+
+using RGWBucketLifecycleConfigCR = RGWSimpleWriteOnlyAsyncCR<rgw_bucket_lifecycle_config_params>;
+
+
+#endif
diff --git a/src/rgw/rgw_crypt.cc b/src/rgw/rgw_crypt.cc
new file mode 100644
index 00000000..08f28552
--- /dev/null
+++ b/src/rgw/rgw_crypt.cc
@@ -0,0 +1,1317 @@
+// -*- mode:C; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/**
+ * Crypto filters for Put/Post/Get operations.
+ */
+
+#include <rgw/rgw_op.h>
+#include <rgw/rgw_crypt.h>
+#include <auth/Crypto.h>
+#include <rgw/rgw_b64.h>
+#include <rgw/rgw_rest_s3.h>
+#include "include/ceph_assert.h"
+#include <boost/utility/string_view.hpp>
+#include <rgw/rgw_keystone.h>
+#include "include/str_map.h"
+#include "crypto/crypto_accel.h"
+#include "crypto/crypto_plugin.h"
+#ifdef USE_NSS
+# include <nspr.h>
+# include <nss.h>
+# include <pk11pub.h>
+#endif
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+using namespace rgw;
+
+/**
+ * Encryption in CTR mode. offset is used as IV for each block.
+ */
+class AES_256_CTR : public BlockCrypt {
+public:
+ static const size_t AES_256_KEYSIZE = 256 / 8;
+ static const size_t AES_256_IVSIZE = 128 / 8;
+private:
+ static const uint8_t IV[AES_256_IVSIZE];
+ CephContext* cct;
+ uint8_t key[AES_256_KEYSIZE];
+public:
+ explicit AES_256_CTR(CephContext* cct): cct(cct) {
+ }
+ ~AES_256_CTR() {
+ ::ceph::crypto::zeroize_for_security(key, AES_256_KEYSIZE);
+ }
+ bool set_key(const uint8_t* _key, size_t key_size) {
+ if (key_size != AES_256_KEYSIZE) {
+ return false;
+ }
+ memcpy(key, _key, AES_256_KEYSIZE);
+ return true;
+ }
+ size_t get_block_size() {
+ return AES_256_IVSIZE;
+ }
+
+#ifdef USE_NSS
+
+ bool encrypt(bufferlist& input, off_t in_ofs, size_t size, bufferlist& output, off_t stream_offset)
+ {
+ bool result = false;
+ PK11SlotInfo *slot;
+ SECItem keyItem;
+ PK11SymKey *symkey;
+ CK_AES_CTR_PARAMS ctr_params = {0};
+ SECItem ivItem;
+ SECItem *param;
+ SECStatus ret;
+ PK11Context *ectx;
+ int written;
+ unsigned int written2;
+
+ slot = PK11_GetBestSlot(CKM_AES_CTR, NULL);
+ if (slot) {
+ keyItem.type = siBuffer;
+ keyItem.data = key;
+ keyItem.len = AES_256_KEYSIZE;
+
+ symkey = PK11_ImportSymKey(slot, CKM_AES_CTR, PK11_OriginUnwrap, CKA_UNWRAP, &keyItem, NULL);
+ if (symkey) {
+ static_assert(sizeof(ctr_params.cb) >= AES_256_IVSIZE, "Must fit counter");
+ ctr_params.ulCounterBits = 128;
+ prepare_iv(reinterpret_cast<unsigned char*>(&ctr_params.cb), stream_offset);
+
+ ivItem.type = siBuffer;
+ ivItem.data = (unsigned char*)&ctr_params;
+ ivItem.len = sizeof(ctr_params);
+
+ param = PK11_ParamFromIV(CKM_AES_CTR, &ivItem);
+ if (param) {
+ ectx = PK11_CreateContextBySymKey(CKM_AES_CTR, CKA_ENCRYPT, symkey, param);
+ if (ectx) {
+ buffer::ptr buf((size + AES_256_KEYSIZE - 1) / AES_256_KEYSIZE * AES_256_KEYSIZE);
+ ret = PK11_CipherOp(ectx,
+ (unsigned char*)buf.c_str(), &written, buf.length(),
+ (unsigned char*)input.c_str() + in_ofs, size);
+ if (ret == SECSuccess) {
+ ret = PK11_DigestFinal(ectx,
+ (unsigned char*)buf.c_str() + written, &written2,
+ buf.length() - written);
+ if (ret == SECSuccess) {
+ buf.set_length(written + written2);
+ output.append(buf);
+ result = true;
+ }
+ }
+ PK11_DestroyContext(ectx, PR_TRUE);
+ }
+ SECITEM_FreeItem(param, PR_TRUE);
+ }
+ PK11_FreeSymKey(symkey);
+ }
+ PK11_FreeSlot(slot);
+ }
+ if (result == false) {
+ ldout(cct, 5) << "Failed to perform AES-CTR encryption: " << PR_GetError() << dendl;
+ }
+ return result;
+ }
+
+#else
+# error "No supported crypto implementation found."
+#endif
+ /* in CTR encrypt is the same as decrypt */
+ bool decrypt(bufferlist& input, off_t in_ofs, size_t size, bufferlist& output, off_t stream_offset) {
+ return encrypt(input, in_ofs, size, output, stream_offset);
+ }
+
+ void prepare_iv(unsigned char iv[AES_256_IVSIZE], off_t offset) {
+ off_t index = offset / AES_256_IVSIZE;
+ off_t i = AES_256_IVSIZE - 1;
+ unsigned int val;
+ unsigned int carry = 0;
+ while (i>=0) {
+ val = (index & 0xff) + IV[i] + carry;
+ iv[i] = val;
+ carry = val >> 8;
+ index = index >> 8;
+ i--;
+ }
+ }
+};
+
+const uint8_t AES_256_CTR::IV[AES_256_CTR::AES_256_IVSIZE] =
+ { 'a', 'e', 's', '2', '5', '6', 'i', 'v', '_', 'c', 't', 'r', '1', '3', '3', '7' };
+
+
+CryptoAccelRef get_crypto_accel(CephContext *cct)
+{
+ CryptoAccelRef ca_impl = nullptr;
+ stringstream ss;
+ PluginRegistry *reg = cct->get_plugin_registry();
+ string crypto_accel_type = cct->_conf->plugin_crypto_accelerator;
+
+ CryptoPlugin *factory = dynamic_cast<CryptoPlugin*>(reg->get_with_load("crypto", crypto_accel_type));
+ if (factory == nullptr) {
+ lderr(cct) << __func__ << " cannot load crypto accelerator of type " << crypto_accel_type << dendl;
+ return nullptr;
+ }
+ int err = factory->factory(&ca_impl, &ss);
+ if (err) {
+ lderr(cct) << __func__ << " factory return error " << err <<
+ " with description: " << ss.str() << dendl;
+ }
+ return ca_impl;
+}
+
+
+/**
+ * Encryption in CBC mode. Chunked to 4K blocks. Offset is used as IV for each 4K block.
+ *
+ *
+ *
+ * A. Encryption
+ * 1. Input is split to 4K chunks + remainder in one, smaller chunk
+ * 2. Each full chunk is encrypted separately with CBC chained mode, with initial IV derived from offset
+ * 3. Last chunk is 16*m + n.
+ * 4. 16*m bytes are encrypted with CBC chained mode, with initial IV derived from offset
+ * 5. Last n bytes are xor-ed with pattern obtained by CBC encryption of
+ * last encrypted 16 byte block <16m-16, 16m-15) with IV = {0}.
+ * 6. (Special case) If m == 0 then last n bytes are xor-ed with pattern
+ * obtained by CBC encryption of {0} with IV derived from offset
+ *
+ * B. Decryption
+ * 1. Input is split to 4K chunks + remainder in one, smaller chunk
+ * 2. Each full chunk is decrypted separately with CBC chained mode, with initial IV derived from offset
+ * 3. Last chunk is 16*m + n.
+ * 4. 16*m bytes are decrypted with CBC chained mode, with initial IV derived from offset
+ * 5. Last n bytes are xor-ed with pattern obtained by CBC ENCRYPTION of
+ * last (still encrypted) 16 byte block <16m-16,16m-15) with IV = {0}
+ * 6. (Special case) If m == 0 then last n bytes are xor-ed with pattern
+ * obtained by CBC ENCRYPTION of {0} with IV derived from offset
+ */
+class AES_256_CBC : public BlockCrypt {
+public:
+ static const size_t AES_256_KEYSIZE = 256 / 8;
+ static const size_t AES_256_IVSIZE = 128 / 8;
+ static const size_t CHUNK_SIZE = 4096;
+private:
+ static const uint8_t IV[AES_256_IVSIZE];
+ CephContext* cct;
+ uint8_t key[AES_256_KEYSIZE];
+public:
+ explicit AES_256_CBC(CephContext* cct): cct(cct) {
+ }
+ ~AES_256_CBC() {
+ ::ceph::crypto::zeroize_for_security(key, AES_256_KEYSIZE);
+ }
+ bool set_key(const uint8_t* _key, size_t key_size) {
+ if (key_size != AES_256_KEYSIZE) {
+ return false;
+ }
+ memcpy(key, _key, AES_256_KEYSIZE);
+ return true;
+ }
+ size_t get_block_size() {
+ return CHUNK_SIZE;
+ }
+
+#ifdef USE_NSS
+
+ bool cbc_transform(unsigned char* out,
+ const unsigned char* in,
+ size_t size,
+ const unsigned char (&iv)[AES_256_IVSIZE],
+ const unsigned char (&key)[AES_256_KEYSIZE],
+ bool encrypt)
+ {
+ bool result = false;
+ PK11SlotInfo *slot;
+ SECItem keyItem;
+ PK11SymKey *symkey;
+ CK_AES_CBC_ENCRYPT_DATA_PARAMS ctr_params = {0};
+ SECItem ivItem;
+ SECItem *param;
+ SECStatus ret;
+ PK11Context *ectx;
+ int written;
+
+ slot = PK11_GetBestSlot(CKM_AES_CBC, NULL);
+ if (slot) {
+ keyItem.type = siBuffer;
+ keyItem.data = const_cast<unsigned char*>(&key[0]);
+ keyItem.len = AES_256_KEYSIZE;
+ symkey = PK11_ImportSymKey(slot, CKM_AES_CBC, PK11_OriginUnwrap, CKA_UNWRAP, &keyItem, NULL);
+ if (symkey) {
+ memcpy(ctr_params.iv, iv, AES_256_IVSIZE);
+ ivItem.type = siBuffer;
+ ivItem.data = (unsigned char*)&ctr_params;
+ ivItem.len = sizeof(ctr_params);
+
+ param = PK11_ParamFromIV(CKM_AES_CBC, &ivItem);
+ if (param) {
+ ectx = PK11_CreateContextBySymKey(CKM_AES_CBC, encrypt?CKA_ENCRYPT:CKA_DECRYPT, symkey, param);
+ if (ectx) {
+ ret = PK11_CipherOp(ectx,
+ out, &written, size,
+ in, size);
+ if ((ret == SECSuccess) && (written == (int)size)) {
+ result = true;
+ }
+ PK11_DestroyContext(ectx, PR_TRUE);
+ }
+ SECITEM_FreeItem(param, PR_TRUE);
+ }
+ PK11_FreeSymKey(symkey);
+ }
+ PK11_FreeSlot(slot);
+ }
+ if (result == false) {
+ ldout(cct, 5) << "Failed to perform AES-CBC encryption: " << PR_GetError() << dendl;
+ }
+ return result;
+ }
+
+#else
+# error "No supported crypto implementation found."
+#endif
+
+ bool cbc_transform(unsigned char* out,
+ const unsigned char* in,
+ size_t size,
+ off_t stream_offset,
+ const unsigned char (&key)[AES_256_KEYSIZE],
+ bool encrypt)
+ {
+ static std::atomic<bool> failed_to_get_crypto(false);
+ CryptoAccelRef crypto_accel;
+ if (! failed_to_get_crypto.load())
+ {
+ crypto_accel = get_crypto_accel(cct);
+ if (!crypto_accel)
+ failed_to_get_crypto = true;
+ }
+ bool result = true;
+ unsigned char iv[AES_256_IVSIZE];
+ for (size_t offset = 0; result && (offset < size); offset += CHUNK_SIZE) {
+ size_t process_size = offset + CHUNK_SIZE <= size ? CHUNK_SIZE : size - offset;
+ prepare_iv(iv, stream_offset + offset);
+ if (crypto_accel != nullptr) {
+ if (encrypt) {
+ result = crypto_accel->cbc_encrypt(out + offset, in + offset,
+ process_size, iv, key);
+ } else {
+ result = crypto_accel->cbc_decrypt(out + offset, in + offset,
+ process_size, iv, key);
+ }
+ } else {
+ result = cbc_transform(
+ out + offset, in + offset, process_size,
+ iv, key, encrypt);
+ }
+ }
+ return result;
+ }
+
+
+ bool encrypt(bufferlist& input,
+ off_t in_ofs,
+ size_t size,
+ bufferlist& output,
+ off_t stream_offset)
+ {
+ bool result = false;
+ size_t aligned_size = size / AES_256_IVSIZE * AES_256_IVSIZE;
+ size_t unaligned_rest_size = size - aligned_size;
+ output.clear();
+ buffer::ptr buf(aligned_size + AES_256_IVSIZE);
+ unsigned char* buf_raw = reinterpret_cast<unsigned char*>(buf.c_str());
+ const unsigned char* input_raw = reinterpret_cast<const unsigned char*>(input.c_str());
+
+ /* encrypt main bulk of data */
+ result = cbc_transform(buf_raw,
+ input_raw + in_ofs,
+ aligned_size,
+ stream_offset, key, true);
+ if (result && (unaligned_rest_size > 0)) {
+ /* remainder to encrypt */
+ if (aligned_size % CHUNK_SIZE > 0) {
+ /* use last chunk for unaligned part */
+ unsigned char iv[AES_256_IVSIZE] = {0};
+ result = cbc_transform(buf_raw + aligned_size,
+ buf_raw + aligned_size - AES_256_IVSIZE,
+ AES_256_IVSIZE,
+ iv, key, true);
+ } else {
+ /* 0 full blocks in current chunk, use IV as base for unaligned part */
+ unsigned char iv[AES_256_IVSIZE] = {0};
+ unsigned char data[AES_256_IVSIZE];
+ prepare_iv(data, stream_offset + aligned_size);
+ result = cbc_transform(buf_raw + aligned_size,
+ data,
+ AES_256_IVSIZE,
+ iv, key, true);
+ }
+ if (result) {
+ for(size_t i = aligned_size; i < size; i++) {
+ *(buf_raw + i) ^= *(input_raw + in_ofs + i);
+ }
+ }
+ }
+ if (result) {
+ ldout(cct, 25) << "Encrypted " << size << " bytes"<< dendl;
+ buf.set_length(size);
+ output.append(buf);
+ } else {
+ ldout(cct, 5) << "Failed to encrypt" << dendl;
+ }
+ return result;
+ }
+
+
+ bool decrypt(bufferlist& input,
+ off_t in_ofs,
+ size_t size,
+ bufferlist& output,
+ off_t stream_offset)
+ {
+ bool result = false;
+ size_t aligned_size = size / AES_256_IVSIZE * AES_256_IVSIZE;
+ size_t unaligned_rest_size = size - aligned_size;
+ output.clear();
+ buffer::ptr buf(aligned_size + AES_256_IVSIZE);
+ unsigned char* buf_raw = reinterpret_cast<unsigned char*>(buf.c_str());
+ unsigned char* input_raw = reinterpret_cast<unsigned char*>(input.c_str());
+
+ /* decrypt main bulk of data */
+ result = cbc_transform(buf_raw,
+ input_raw + in_ofs,
+ aligned_size,
+ stream_offset, key, false);
+ if (result && unaligned_rest_size > 0) {
+ /* remainder to decrypt */
+ if (aligned_size % CHUNK_SIZE > 0) {
+ /*use last chunk for unaligned part*/
+ unsigned char iv[AES_256_IVSIZE] = {0};
+ result = cbc_transform(buf_raw + aligned_size,
+ input_raw + in_ofs + aligned_size - AES_256_IVSIZE,
+ AES_256_IVSIZE,
+ iv, key, true);
+ } else {
+ /* 0 full blocks in current chunk, use IV as base for unaligned part */
+ unsigned char iv[AES_256_IVSIZE] = {0};
+ unsigned char data[AES_256_IVSIZE];
+ prepare_iv(data, stream_offset + aligned_size);
+ result = cbc_transform(buf_raw + aligned_size,
+ data,
+ AES_256_IVSIZE,
+ iv, key, true);
+ }
+ if (result) {
+ for(size_t i = aligned_size; i < size; i++) {
+ *(buf_raw + i) ^= *(input_raw + in_ofs + i);
+ }
+ }
+ }
+ if (result) {
+ ldout(cct, 25) << "Decrypted " << size << " bytes"<< dendl;
+ buf.set_length(size);
+ output.append(buf);
+ } else {
+ ldout(cct, 5) << "Failed to decrypt" << dendl;
+ }
+ return result;
+ }
+
+
+ void prepare_iv(unsigned char (&iv)[AES_256_IVSIZE], off_t offset) {
+ off_t index = offset / AES_256_IVSIZE;
+ off_t i = AES_256_IVSIZE - 1;
+ unsigned int val;
+ unsigned int carry = 0;
+ while (i>=0) {
+ val = (index & 0xff) + IV[i] + carry;
+ iv[i] = val;
+ carry = val >> 8;
+ index = index >> 8;
+ i--;
+ }
+ }
+};
+
+
+std::unique_ptr<BlockCrypt> AES_256_CBC_create(CephContext* cct, const uint8_t* key, size_t len)
+{
+ auto cbc = std::unique_ptr<AES_256_CBC>(new AES_256_CBC(cct));
+ cbc->set_key(key, AES_256_KEYSIZE);
+ return std::move(cbc);
+}
+
+
+const uint8_t AES_256_CBC::IV[AES_256_CBC::AES_256_IVSIZE] =
+ { 'a', 'e', 's', '2', '5', '6', 'i', 'v', '_', 'c', 't', 'r', '1', '3', '3', '7' };
+
+
+#ifdef USE_NSS
+
+bool AES_256_ECB_encrypt(CephContext* cct,
+ const uint8_t* key,
+ size_t key_size,
+ const uint8_t* data_in,
+ uint8_t* data_out,
+ size_t data_size) {
+ bool result = false;
+ PK11SlotInfo *slot;
+ SECItem keyItem;
+ PK11SymKey *symkey;
+ SECItem *param;
+ SECStatus ret;
+ PK11Context *ectx;
+ int written;
+ unsigned int written2;
+ if (key_size == AES_256_KEYSIZE) {
+ slot = PK11_GetBestSlot(CKM_AES_ECB, NULL);
+ if (slot) {
+ keyItem.type = siBuffer;
+ keyItem.data = const_cast<uint8_t*>(key);
+ keyItem.len = AES_256_KEYSIZE;
+
+ param = PK11_ParamFromIV(CKM_AES_ECB, NULL);
+ if (param) {
+ symkey = PK11_ImportSymKey(slot, CKM_AES_ECB, PK11_OriginUnwrap, CKA_UNWRAP, &keyItem, NULL);
+ if (symkey) {
+ ectx = PK11_CreateContextBySymKey(CKM_AES_ECB, CKA_ENCRYPT, symkey, param);
+ if (ectx) {
+ ret = PK11_CipherOp(ectx,
+ data_out, &written, data_size,
+ data_in, data_size);
+ if (ret == SECSuccess) {
+ ret = PK11_DigestFinal(ectx,
+ data_out + written, &written2,
+ data_size - written);
+ if (ret == SECSuccess) {
+ result = true;
+ }
+ }
+ PK11_DestroyContext(ectx, PR_TRUE);
+ }
+ PK11_FreeSymKey(symkey);
+ }
+ SECITEM_FreeItem(param, PR_TRUE);
+ }
+ PK11_FreeSlot(slot);
+ }
+ if (result == false) {
+ ldout(cct, 5) << "Failed to perform AES-ECB encryption: " << PR_GetError() << dendl;
+ }
+ } else {
+ ldout(cct, 5) << "Key size must be 256 bits long" << dendl;
+ }
+ return result;
+}
+
+#else
+# error "No supported crypto implementation found."
+#endif
+
+
+RGWGetObj_BlockDecrypt::RGWGetObj_BlockDecrypt(CephContext* cct,
+ RGWGetObj_Filter* next,
+ std::unique_ptr<BlockCrypt> crypt):
+ RGWGetObj_Filter(next),
+ cct(cct),
+ crypt(std::move(crypt)),
+ enc_begin_skip(0),
+ ofs(0),
+ end(0),
+ cache()
+{
+ block_size = this->crypt->get_block_size();
+}
+
+RGWGetObj_BlockDecrypt::~RGWGetObj_BlockDecrypt() {
+}
+
+int RGWGetObj_BlockDecrypt::read_manifest(bufferlist& manifest_bl) {
+ parts_len.clear();
+ RGWObjManifest manifest;
+ if (manifest_bl.length()) {
+ auto miter = manifest_bl.cbegin();
+ try {
+ decode(manifest, miter);
+ } catch (buffer::error& err) {
+ ldout(cct, 0) << "ERROR: couldn't decode manifest" << dendl;
+ return -EIO;
+ }
+ RGWObjManifest::obj_iterator mi;
+ for (mi = manifest.obj_begin(); mi != manifest.obj_end(); ++mi) {
+ if (mi.get_cur_stripe() == 0) {
+ parts_len.push_back(0);
+ }
+ parts_len.back() += mi.get_stripe_size();
+ }
+ if (cct->_conf->subsys.should_gather<ceph_subsys_rgw, 20>()) {
+ for (size_t i = 0; i<parts_len.size(); i++) {
+ ldout(cct, 20) << "Manifest part " << i << ", size=" << parts_len[i] << dendl;
+ }
+ }
+ }
+ return 0;
+}
+
+int RGWGetObj_BlockDecrypt::fixup_range(off_t& bl_ofs, off_t& bl_end) {
+ off_t inp_ofs = bl_ofs;
+ off_t inp_end = bl_end;
+ if (parts_len.size() > 0) {
+ off_t in_ofs = bl_ofs;
+ off_t in_end = bl_end;
+
+ size_t i = 0;
+ while (i<parts_len.size() && (in_ofs >= (off_t)parts_len[i])) {
+ in_ofs -= parts_len[i];
+ i++;
+ }
+ //in_ofs is inside block i
+ size_t j = 0;
+ while (j<(parts_len.size() - 1) && (in_end >= (off_t)parts_len[j])) {
+ in_end -= parts_len[j];
+ j++;
+ }
+ //in_end is inside part j, OR j is the last part
+
+ size_t rounded_end = ( in_end & ~(block_size - 1) ) + (block_size - 1);
+ if (rounded_end > parts_len[j]) {
+ rounded_end = parts_len[j] - 1;
+ }
+
+ enc_begin_skip = in_ofs & (block_size - 1);
+ ofs = bl_ofs - enc_begin_skip;
+ end = bl_end;
+ bl_end += rounded_end - in_end;
+ bl_ofs = std::min(bl_ofs - enc_begin_skip, bl_end);
+ }
+ else
+ {
+ enc_begin_skip = bl_ofs & (block_size - 1);
+ ofs = bl_ofs & ~(block_size - 1);
+ end = bl_end;
+ bl_ofs = bl_ofs & ~(block_size - 1);
+ bl_end = ( bl_end & ~(block_size - 1) ) + (block_size - 1);
+ }
+ ldout(cct, 20) << "fixup_range [" << inp_ofs << "," << inp_end
+ << "] => [" << bl_ofs << "," << bl_end << "]" << dendl;
+ return 0;
+}
+
+int RGWGetObj_BlockDecrypt::process(bufferlist& in, size_t part_ofs, size_t size)
+{
+ bufferlist data;
+ if (!crypt->decrypt(in, 0, size, data, part_ofs)) {
+ return -ERR_INTERNAL_ERROR;
+ }
+ off_t send_size = size - enc_begin_skip;
+ if (ofs + enc_begin_skip + send_size > end + 1) {
+ send_size = end + 1 - ofs - enc_begin_skip;
+ }
+ int res = next->handle_data(data, enc_begin_skip, send_size);
+ enc_begin_skip = 0;
+ ofs += size;
+ in.splice(0, size);
+ return res;
+}
+
+int RGWGetObj_BlockDecrypt::handle_data(bufferlist& bl, off_t bl_ofs, off_t bl_len) {
+ ldout(cct, 25) << "Decrypt " << bl_len << " bytes" << dendl;
+ bl.copy(bl_ofs, bl_len, cache);
+
+ int res = 0;
+ size_t part_ofs = ofs;
+ for (size_t part : parts_len) {
+ if (part_ofs >= part) {
+ part_ofs -= part;
+ } else if (part_ofs + cache.length() >= part) {
+ // flush data up to part boundaries, aligned or not
+ res = process(cache, part_ofs, part - part_ofs);
+ if (res < 0) {
+ return res;
+ }
+ part_ofs = 0;
+ } else {
+ break;
+ }
+ }
+ // write up to block boundaries, aligned only
+ off_t aligned_size = cache.length() & ~(block_size - 1);
+ if (aligned_size > 0) {
+ res = process(cache, part_ofs, aligned_size);
+ }
+ return res;
+}
+
+/**
+ * flush remainder of data to output
+ */
+int RGWGetObj_BlockDecrypt::flush() {
+ ldout(cct, 25) << "Decrypt flushing " << cache.length() << " bytes" << dendl;
+ int res = 0;
+ size_t part_ofs = ofs;
+ for (size_t part : parts_len) {
+ if (part_ofs >= part) {
+ part_ofs -= part;
+ } else if (part_ofs + cache.length() >= part) {
+ // flush data up to part boundaries, aligned or not
+ res = process(cache, part_ofs, part - part_ofs);
+ if (res < 0) {
+ return res;
+ }
+ part_ofs = 0;
+ } else {
+ break;
+ }
+ }
+ // flush up to block boundaries, aligned or not
+ if (cache.length() > 0) {
+ res = process(cache, part_ofs, cache.length());
+ }
+ return res;
+}
+
+RGWPutObj_BlockEncrypt::RGWPutObj_BlockEncrypt(CephContext* cct,
+ rgw::putobj::DataProcessor *next,
+ std::unique_ptr<BlockCrypt> crypt)
+ : Pipe(next),
+ cct(cct),
+ crypt(std::move(crypt)),
+ block_size(this->crypt->get_block_size())
+{
+}
+
+int RGWPutObj_BlockEncrypt::process(bufferlist&& data, uint64_t logical_offset)
+{
+ ldout(cct, 25) << "Encrypt " << data.length() << " bytes" << dendl;
+
+ // adjust logical offset to beginning of cached data
+ ceph_assert(logical_offset >= cache.length());
+ logical_offset -= cache.length();
+
+ const bool flush = (data.length() == 0);
+ cache.claim_append(data);
+
+ uint64_t proc_size = cache.length() & ~(block_size - 1);
+ if (flush) {
+ proc_size = cache.length();
+ }
+ if (proc_size > 0) {
+ bufferlist in, out;
+ cache.splice(0, proc_size, &in);
+ if (!crypt->encrypt(in, 0, proc_size, out, logical_offset)) {
+ return -ERR_INTERNAL_ERROR;
+ }
+ int r = Pipe::process(std::move(out), logical_offset);
+ logical_offset += proc_size;
+ if (r < 0)
+ return r;
+ }
+
+ if (flush) {
+ /*replicate 0-sized handle_data*/
+ return Pipe::process({}, logical_offset);
+ }
+ return 0;
+}
+
+
+std::string create_random_key_selector(CephContext * const cct) {
+ char random[AES_256_KEYSIZE];
+ cct->random()->get_bytes(&random[0], sizeof(random));
+ return std::string(random, sizeof(random));
+}
+
+static int get_barbican_url(CephContext * const cct,
+ std::string& url)
+{
+ url = cct->_conf->rgw_barbican_url;
+ if (url.empty()) {
+ ldout(cct, 0) << "ERROR: conf rgw_barbican_url is not set" << dendl;
+ return -EINVAL;
+ }
+
+ if (url.back() != '/') {
+ url.append("/");
+ }
+
+ return 0;
+}
+
+static int request_key_from_barbican(CephContext *cct,
+ boost::string_view key_id,
+ boost::string_view key_selector,
+ const std::string& barbican_token,
+ std::string& actual_key) {
+ std::string secret_url;
+ int res;
+ res = get_barbican_url(cct, secret_url);
+ if (res < 0) {
+ return res;
+ }
+ secret_url += "v1/secrets/" + std::string(key_id);
+
+ bufferlist secret_bl;
+ RGWHTTPTransceiver secret_req(cct, "GET", secret_url, &secret_bl);
+ secret_req.append_header("Accept", "application/octet-stream");
+ secret_req.append_header("X-Auth-Token", barbican_token);
+
+ res = secret_req.process();
+ if (res < 0) {
+ return res;
+ }
+ if (secret_req.get_http_status() ==
+ RGWHTTPTransceiver::HTTP_STATUS_UNAUTHORIZED) {
+ return -EACCES;
+ }
+
+ if (secret_req.get_http_status() >=200 &&
+ secret_req.get_http_status() < 300 &&
+ secret_bl.length() == AES_256_KEYSIZE) {
+ actual_key.assign(secret_bl.c_str(), secret_bl.length());
+ ::ceph::crypto::zeroize_for_security(secret_bl.c_str(), secret_bl.length());
+ } else {
+ res = -EACCES;
+ }
+ return res;
+}
+
+static map<string,string> get_str_map(const string &str) {
+ map<string,string> m;
+ get_str_map(str, &m, ";, \t");
+ return m;
+}
+
+static int get_actual_key_from_kms(CephContext *cct,
+ boost::string_view key_id,
+ boost::string_view key_selector,
+ std::string& actual_key)
+{
+ int res = 0;
+ ldout(cct, 20) << "Getting KMS encryption key for key=" << key_id << dendl;
+ static map<string,string> str_map = get_str_map(
+ cct->_conf->rgw_crypt_s3_kms_encryption_keys);
+
+ map<string, string>::iterator it = str_map.find(std::string(key_id));
+ if (it != str_map.end() ) {
+ std::string master_key;
+ try {
+ master_key = from_base64((*it).second);
+ } catch (...) {
+ ldout(cct, 5) << "ERROR: get_actual_key_from_kms invalid encryption key id "
+ << "which contains character that is not base64 encoded."
+ << dendl;
+ return -EINVAL;
+ }
+
+ if (master_key.length() == AES_256_KEYSIZE) {
+ uint8_t _actual_key[AES_256_KEYSIZE];
+ if (AES_256_ECB_encrypt(cct,
+ reinterpret_cast<const uint8_t*>(master_key.c_str()), AES_256_KEYSIZE,
+ reinterpret_cast<const uint8_t*>(key_selector.data()),
+ _actual_key, AES_256_KEYSIZE)) {
+ actual_key = std::string((char*)&_actual_key[0], AES_256_KEYSIZE);
+ } else {
+ res = -EIO;
+ }
+ ::ceph::crypto::zeroize_for_security(_actual_key, sizeof(_actual_key));
+ } else {
+ ldout(cct, 20) << "Wrong size for key=" << key_id << dendl;
+ res = -EIO;
+ }
+ } else {
+ std::string token;
+ if (rgw::keystone::Service::get_keystone_barbican_token(cct, token) < 0) {
+ ldout(cct, 5) << "Failed to retrieve token for barbican" << dendl;
+ res = -EINVAL;
+ return res;
+ }
+
+ res = request_key_from_barbican(cct, key_id, key_selector, token, actual_key);
+ if (res != 0) {
+ ldout(cct, 5) << "Failed to retrieve secret from barbican:" << key_id << dendl;
+ }
+ }
+ return res;
+}
+
+static inline void set_attr(map<string, bufferlist>& attrs,
+ const char* key,
+ boost::string_view value)
+{
+ bufferlist bl;
+ bl.append(value.data(), value.size());
+ attrs[key] = std::move(bl);
+}
+
+static inline std::string get_str_attribute(map<string, bufferlist>& attrs,
+ const char *name)
+{
+ auto iter = attrs.find(name);
+ if (iter == attrs.end()) {
+ return {};
+ }
+ return iter->second.to_str();
+}
+
+typedef enum {
+ X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_ALGORITHM=0,
+ X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY,
+ X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY_MD5,
+ X_AMZ_SERVER_SIDE_ENCRYPTION,
+ X_AMZ_SERVER_SIDE_ENCRYPTION_AWS_KMS_KEY_ID,
+ X_AMZ_SERVER_SIDE_ENCRYPTION_LAST
+} crypt_option_e;
+
+typedef struct {
+ const char* http_header_name;
+ const std::string post_part_name;
+} crypt_option_names;
+
+static const crypt_option_names crypt_options[] = {
+ {"HTTP_X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_ALGORITHM", "x-amz-server-side-encryption-customer-algorithm"},
+ {"HTTP_X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY", "x-amz-server-side-encryption-customer-key"},
+ {"HTTP_X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY_MD5", "x-amz-server-side-encryption-customer-key-md5"},
+ {"HTTP_X_AMZ_SERVER_SIDE_ENCRYPTION", "x-amz-server-side-encryption"},
+ {"HTTP_X_AMZ_SERVER_SIDE_ENCRYPTION_AWS_KMS_KEY_ID", "x-amz-server-side-encryption-aws-kms-key-id"},
+};
+
+static boost::string_view get_crypt_attribute(
+ const RGWEnv* env,
+ std::map<std::string,
+ RGWPostObj_ObjStore::post_form_part,
+ const ltstr_nocase>* parts,
+ crypt_option_e option)
+{
+ static_assert(
+ X_AMZ_SERVER_SIDE_ENCRYPTION_LAST == sizeof(crypt_options)/sizeof(*crypt_options),
+ "Missing items in crypt_options");
+ if (parts != nullptr) {
+ auto iter
+ = parts->find(crypt_options[option].post_part_name);
+ if (iter == parts->end())
+ return boost::string_view();
+ bufferlist& data = iter->second.data;
+ boost::string_view str = boost::string_view(data.c_str(), data.length());
+ return rgw_trim_whitespace(str);
+ } else {
+ const char* hdr = env->get(crypt_options[option].http_header_name, nullptr);
+ if (hdr != nullptr) {
+ return boost::string_view(hdr);
+ } else {
+ return boost::string_view();
+ }
+ }
+}
+
+
+int rgw_s3_prepare_encrypt(struct req_state* s,
+ std::map<std::string, ceph::bufferlist>& attrs,
+ std::map<std::string,
+ RGWPostObj_ObjStore::post_form_part,
+ const ltstr_nocase>* parts,
+ std::unique_ptr<BlockCrypt>* block_crypt,
+ std::map<std::string, std::string>& crypt_http_responses)
+{
+ int res = 0;
+ crypt_http_responses.clear();
+ {
+ boost::string_view req_sse_ca =
+ get_crypt_attribute(s->info.env, parts, X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_ALGORITHM);
+ if (! req_sse_ca.empty()) {
+ if (req_sse_ca != "AES256") {
+ ldout(s->cct, 5) << "ERROR: Invalid value for header "
+ << "x-amz-server-side-encryption-customer-algorithm"
+ << dendl;
+ s->err.message = "The requested encryption algorithm is not valid, must be AES256.";
+ return -ERR_INVALID_ENCRYPTION_ALGORITHM;
+ }
+ if (s->cct->_conf->rgw_crypt_require_ssl &&
+ !rgw_transport_is_secure(s->cct, *s->info.env)) {
+ ldout(s->cct, 5) << "ERROR: Insecure request, rgw_crypt_require_ssl is set" << dendl;
+ return -ERR_INVALID_REQUEST;
+ }
+
+ std::string key_bin;
+ try {
+ key_bin = from_base64(
+ get_crypt_attribute(s->info.env, parts, X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY) );
+ } catch (...) {
+ ldout(s->cct, 5) << "ERROR: rgw_s3_prepare_encrypt invalid encryption "
+ << "key which contains character that is not base64 encoded."
+ << dendl;
+ s->err.message = "Requests specifying Server Side Encryption with Customer "
+ "provided keys must provide an appropriate secret key.";
+ return -EINVAL;
+ }
+
+ if (key_bin.size() != AES_256_CBC::AES_256_KEYSIZE) {
+ ldout(s->cct, 5) << "ERROR: invalid encryption key size" << dendl;
+ s->err.message = "Requests specifying Server Side Encryption with Customer "
+ "provided keys must provide an appropriate secret key.";
+ return -EINVAL;
+ }
+
+ boost::string_view keymd5 =
+ get_crypt_attribute(s->info.env, parts, X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY_MD5);
+
+ std::string keymd5_bin;
+ try {
+ keymd5_bin = from_base64(keymd5);
+ } catch (...) {
+ ldout(s->cct, 5) << "ERROR: rgw_s3_prepare_encrypt invalid encryption key "
+ << "md5 which contains character that is not base64 encoded."
+ << dendl;
+ s->err.message = "Requests specifying Server Side Encryption with Customer "
+ "provided keys must provide an appropriate secret key md5.";
+ return -EINVAL;
+ }
+
+ if (keymd5_bin.size() != CEPH_CRYPTO_MD5_DIGESTSIZE) {
+ ldout(s->cct, 5) << "ERROR: Invalid key md5 size" << dendl;
+ s->err.message = "Requests specifying Server Side Encryption with Customer "
+ "provided keys must provide an appropriate secret key md5.";
+ return -EINVAL;
+ }
+
+ MD5 key_hash;
+ unsigned char key_hash_res[CEPH_CRYPTO_MD5_DIGESTSIZE];
+ key_hash.Update(reinterpret_cast<const unsigned char*>(key_bin.c_str()), key_bin.size());
+ key_hash.Final(key_hash_res);
+
+ if (memcmp(key_hash_res, keymd5_bin.c_str(), CEPH_CRYPTO_MD5_DIGESTSIZE) != 0) {
+ ldout(s->cct, 5) << "ERROR: Invalid key md5 hash" << dendl;
+ s->err.message = "The calculated MD5 hash of the key did not match the hash that was provided.";
+ return -EINVAL;
+ }
+
+ set_attr(attrs, RGW_ATTR_CRYPT_MODE, "SSE-C-AES256");
+ set_attr(attrs, RGW_ATTR_CRYPT_KEYMD5, keymd5_bin);
+
+ if (block_crypt) {
+ auto aes = std::unique_ptr<AES_256_CBC>(new AES_256_CBC(s->cct));
+ aes->set_key(reinterpret_cast<const uint8_t*>(key_bin.c_str()), AES_256_KEYSIZE);
+ *block_crypt = std::move(aes);
+ }
+
+ crypt_http_responses["x-amz-server-side-encryption-customer-algorithm"] = "AES256";
+ crypt_http_responses["x-amz-server-side-encryption-customer-key-MD5"] = keymd5.to_string();
+ return 0;
+ } else {
+ boost::string_view customer_key =
+ get_crypt_attribute(s->info.env, parts, X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY);
+ if (!customer_key.empty()) {
+ ldout(s->cct, 5) << "ERROR: SSE-C encryption request is missing the header "
+ << "x-amz-server-side-encryption-customer-algorithm"
+ << dendl;
+ s->err.message = "Requests specifying Server Side Encryption with Customer "
+ "provided keys must provide a valid encryption algorithm.";
+ return -EINVAL;
+ }
+
+ boost::string_view customer_key_md5 =
+ get_crypt_attribute(s->info.env, parts, X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY_MD5);
+ if (!customer_key_md5.empty()) {
+ ldout(s->cct, 5) << "ERROR: SSE-C encryption request is missing the header "
+ << "x-amz-server-side-encryption-customer-algorithm"
+ << dendl;
+ s->err.message = "Requests specifying Server Side Encryption with Customer "
+ "provided keys must provide a valid encryption algorithm.";
+ return -EINVAL;
+ }
+ }
+
+ /* AMAZON server side encryption with KMS (key management service) */
+ boost::string_view req_sse =
+ get_crypt_attribute(s->info.env, parts, X_AMZ_SERVER_SIDE_ENCRYPTION);
+ if (! req_sse.empty()) {
+
+ if (s->cct->_conf->rgw_crypt_require_ssl &&
+ !rgw_transport_is_secure(s->cct, *s->info.env)) {
+ ldout(s->cct, 5) << "ERROR: insecure request, rgw_crypt_require_ssl is set" << dendl;
+ return -ERR_INVALID_REQUEST;
+ }
+
+ if (req_sse == "aws:kms") {
+ boost::string_view key_id =
+ get_crypt_attribute(s->info.env, parts, X_AMZ_SERVER_SIDE_ENCRYPTION_AWS_KMS_KEY_ID);
+ if (key_id.empty()) {
+ ldout(s->cct, 5) << "ERROR: not provide a valid key id" << dendl;
+ s->err.message = "Server Side Encryption with KMS managed key requires "
+ "HTTP header x-amz-server-side-encryption-aws-kms-key-id";
+ return -ERR_INVALID_ACCESS_KEY;
+ }
+ /* try to retrieve actual key */
+ std::string key_selector = create_random_key_selector(s->cct);
+ std::string actual_key;
+ res = get_actual_key_from_kms(s->cct, key_id, key_selector, actual_key);
+ if (res != 0) {
+ ldout(s->cct, 5) << "ERROR: failed to retrieve actual key from key_id: " << key_id << dendl;
+ s->err.message = "Failed to retrieve the actual key, kms-keyid: " + key_id.to_string();
+ return res;
+ }
+ if (actual_key.size() != AES_256_KEYSIZE) {
+ ldout(s->cct, 5) << "ERROR: key obtained from key_id:" <<
+ key_id << " is not 256 bit size" << dendl;
+ s->err.message = "KMS provided an invalid key for the given kms-keyid.";
+ return -ERR_INVALID_ACCESS_KEY;
+ }
+ set_attr(attrs, RGW_ATTR_CRYPT_MODE, "SSE-KMS");
+ set_attr(attrs, RGW_ATTR_CRYPT_KEYID, key_id);
+ set_attr(attrs, RGW_ATTR_CRYPT_KEYSEL, key_selector);
+
+ if (block_crypt) {
+ auto aes = std::unique_ptr<AES_256_CBC>(new AES_256_CBC(s->cct));
+ aes->set_key(reinterpret_cast<const uint8_t*>(actual_key.c_str()), AES_256_KEYSIZE);
+ *block_crypt = std::move(aes);
+ }
+ actual_key.replace(0, actual_key.length(), actual_key.length(), '\000');
+
+ crypt_http_responses["x-amz-server-side-encryption"] = "aws:kms";
+ crypt_http_responses["x-amz-server-side-encryption-aws-kms-key-id"] = key_id.to_string();
+ return 0;
+ } else if (req_sse == "AES256") {
+ /* if a default encryption key was provided, we will use it for SSE-S3 */
+ } else {
+ ldout(s->cct, 5) << "ERROR: Invalid value for header x-amz-server-side-encryption"
+ << dendl;
+ s->err.message = "Server Side Encryption with KMS managed key requires "
+ "HTTP header x-amz-server-side-encryption : aws:kms or AES256";
+ return -EINVAL;
+ }
+ } else {
+ /* x-amz-server-side-encryption not present or empty */
+ boost::string_view key_id =
+ get_crypt_attribute(s->info.env, parts,
+ X_AMZ_SERVER_SIDE_ENCRYPTION_AWS_KMS_KEY_ID);
+ if (!key_id.empty()) {
+ ldout(s->cct, 5) << "ERROR: SSE-KMS encryption request is missing the header "
+ << "x-amz-server-side-encryption"
+ << dendl;
+ s->err.message = "Server Side Encryption with KMS managed key requires "
+ "HTTP header x-amz-server-side-encryption : aws:kms";
+ return -EINVAL;
+ }
+ }
+
+ /* no other encryption mode, check if default encryption is selected */
+ if (s->cct->_conf->rgw_crypt_default_encryption_key != "") {
+ std::string master_encryption_key;
+ try {
+ master_encryption_key = from_base64(s->cct->_conf->rgw_crypt_default_encryption_key);
+ } catch (...) {
+ ldout(s->cct, 5) << "ERROR: rgw_s3_prepare_encrypt invalid default encryption key "
+ << "which contains character that is not base64 encoded."
+ << dendl;
+ s->err.message = "Requests specifying Server Side Encryption with Customer "
+ "provided keys must provide an appropriate secret key.";
+ return -EINVAL;
+ }
+
+ if (master_encryption_key.size() != 256 / 8) {
+ ldout(s->cct, 0) << "ERROR: failed to decode 'rgw crypt default encryption key' to 256 bit string" << dendl;
+ /* not an error to return; missing encryption does not inhibit processing */
+ return 0;
+ }
+
+ set_attr(attrs, RGW_ATTR_CRYPT_MODE, "RGW-AUTO");
+ std::string key_selector = create_random_key_selector(s->cct);
+ set_attr(attrs, RGW_ATTR_CRYPT_KEYSEL, key_selector);
+
+ uint8_t actual_key[AES_256_KEYSIZE];
+ if (AES_256_ECB_encrypt(s->cct,
+ reinterpret_cast<const uint8_t*>(master_encryption_key.c_str()), AES_256_KEYSIZE,
+ reinterpret_cast<const uint8_t*>(key_selector.c_str()),
+ actual_key, AES_256_KEYSIZE) != true) {
+ ::ceph::crypto::zeroize_for_security(actual_key, sizeof(actual_key));
+ return -EIO;
+ }
+ if (block_crypt) {
+ auto aes = std::unique_ptr<AES_256_CBC>(new AES_256_CBC(s->cct));
+ aes->set_key(reinterpret_cast<const uint8_t*>(actual_key), AES_256_KEYSIZE);
+ *block_crypt = std::move(aes);
+ }
+ ::ceph::crypto::zeroize_for_security(actual_key, sizeof(actual_key));
+ return 0;
+ }
+ }
+ /*no encryption*/
+ return 0;
+}
+
+
+int rgw_s3_prepare_decrypt(struct req_state* s,
+ map<string, bufferlist>& attrs,
+ std::unique_ptr<BlockCrypt>* block_crypt,
+ std::map<std::string, std::string>& crypt_http_responses)
+{
+ int res = 0;
+ std::string stored_mode = get_str_attribute(attrs, RGW_ATTR_CRYPT_MODE);
+ ldout(s->cct, 15) << "Encryption mode: " << stored_mode << dendl;
+
+ const char *req_sse = s->info.env->get("HTTP_X_AMZ_SERVER_SIDE_ENCRYPTION", NULL);
+ if (nullptr != req_sse && (s->op == OP_GET || s->op == OP_HEAD)) {
+ return -ERR_INVALID_REQUEST;
+ }
+
+ if (stored_mode == "SSE-C-AES256") {
+ if (s->cct->_conf->rgw_crypt_require_ssl &&
+ !rgw_transport_is_secure(s->cct, *s->info.env)) {
+ ldout(s->cct, 5) << "ERROR: Insecure request, rgw_crypt_require_ssl is set" << dendl;
+ return -ERR_INVALID_REQUEST;
+ }
+ const char *req_cust_alg =
+ s->info.env->get("HTTP_X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_ALGORITHM", NULL);
+
+ if (nullptr == req_cust_alg) {
+ ldout(s->cct, 5) << "ERROR: Request for SSE-C encrypted object missing "
+ << "x-amz-server-side-encryption-customer-algorithm"
+ << dendl;
+ s->err.message = "Requests specifying Server Side Encryption with Customer "
+ "provided keys must provide a valid encryption algorithm.";
+ return -EINVAL;
+ } else if (strcmp(req_cust_alg, "AES256") != 0) {
+ ldout(s->cct, 5) << "ERROR: The requested encryption algorithm is not valid, must be AES256." << dendl;
+ s->err.message = "The requested encryption algorithm is not valid, must be AES256.";
+ return -ERR_INVALID_ENCRYPTION_ALGORITHM;
+ }
+
+ std::string key_bin;
+ try {
+ key_bin = from_base64(s->info.env->get("HTTP_X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY", ""));
+ } catch (...) {
+ ldout(s->cct, 5) << "ERROR: rgw_s3_prepare_decrypt invalid encryption key "
+ << "which contains character that is not base64 encoded."
+ << dendl;
+ s->err.message = "Requests specifying Server Side Encryption with Customer "
+ "provided keys must provide an appropriate secret key.";
+ return -EINVAL;
+ }
+
+ if (key_bin.size() != AES_256_CBC::AES_256_KEYSIZE) {
+ ldout(s->cct, 5) << "ERROR: Invalid encryption key size" << dendl;
+ s->err.message = "Requests specifying Server Side Encryption with Customer "
+ "provided keys must provide an appropriate secret key.";
+ return -EINVAL;
+ }
+
+ std::string keymd5 =
+ s->info.env->get("HTTP_X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY_MD5", "");
+ std::string keymd5_bin;
+ try {
+ keymd5_bin = from_base64(keymd5);
+ } catch (...) {
+ ldout(s->cct, 5) << "ERROR: rgw_s3_prepare_decrypt invalid encryption key md5 "
+ << "which contains character that is not base64 encoded."
+ << dendl;
+ s->err.message = "Requests specifying Server Side Encryption with Customer "
+ "provided keys must provide an appropriate secret key md5.";
+ return -EINVAL;
+ }
+
+
+ if (keymd5_bin.size() != CEPH_CRYPTO_MD5_DIGESTSIZE) {
+ ldout(s->cct, 5) << "ERROR: Invalid key md5 size " << dendl;
+ s->err.message = "Requests specifying Server Side Encryption with Customer "
+ "provided keys must provide an appropriate secret key md5.";
+ return -EINVAL;
+ }
+
+ MD5 key_hash;
+ uint8_t key_hash_res[CEPH_CRYPTO_MD5_DIGESTSIZE];
+ key_hash.Update(reinterpret_cast<const unsigned char*>(key_bin.c_str()), key_bin.size());
+ key_hash.Final(key_hash_res);
+
+ if ((memcmp(key_hash_res, keymd5_bin.c_str(), CEPH_CRYPTO_MD5_DIGESTSIZE) != 0) ||
+ (get_str_attribute(attrs, RGW_ATTR_CRYPT_KEYMD5) != keymd5_bin)) {
+ s->err.message = "The calculated MD5 hash of the key did not match the hash that was provided.";
+ return -EINVAL;
+ }
+ auto aes = std::unique_ptr<AES_256_CBC>(new AES_256_CBC(s->cct));
+ aes->set_key(reinterpret_cast<const uint8_t*>(key_bin.c_str()), AES_256_CBC::AES_256_KEYSIZE);
+ if (block_crypt) *block_crypt = std::move(aes);
+
+ crypt_http_responses["x-amz-server-side-encryption-customer-algorithm"] = "AES256";
+ crypt_http_responses["x-amz-server-side-encryption-customer-key-MD5"] = keymd5;
+ return 0;
+ }
+
+ if (stored_mode == "SSE-KMS") {
+ if (s->cct->_conf->rgw_crypt_require_ssl &&
+ !rgw_transport_is_secure(s->cct, *s->info.env)) {
+ ldout(s->cct, 5) << "ERROR: Insecure request, rgw_crypt_require_ssl is set" << dendl;
+ return -ERR_INVALID_REQUEST;
+ }
+ /* try to retrieve actual key */
+ std::string key_id = get_str_attribute(attrs, RGW_ATTR_CRYPT_KEYID);
+ std::string key_selector = get_str_attribute(attrs, RGW_ATTR_CRYPT_KEYSEL);
+ std::string actual_key;
+ res = get_actual_key_from_kms(s->cct, key_id, key_selector, actual_key);
+ if (res != 0) {
+ ldout(s->cct, 10) << "ERROR: failed to retrieve actual key from key_id: " << key_id << dendl;
+ s->err.message = "Failed to retrieve the actual key, kms-keyid: " + key_id;
+ return res;
+ }
+ if (actual_key.size() != AES_256_KEYSIZE) {
+ ldout(s->cct, 0) << "ERROR: key obtained from key_id:" <<
+ key_id << " is not 256 bit size" << dendl;
+ s->err.message = "KMS provided an invalid key for the given kms-keyid.";
+ return -ERR_INVALID_ACCESS_KEY;
+ }
+
+ auto aes = std::unique_ptr<AES_256_CBC>(new AES_256_CBC(s->cct));
+ aes->set_key(reinterpret_cast<const uint8_t*>(actual_key.c_str()), AES_256_KEYSIZE);
+ actual_key.replace(0, actual_key.length(), actual_key.length(), '\000');
+ if (block_crypt) *block_crypt = std::move(aes);
+
+ crypt_http_responses["x-amz-server-side-encryption"] = "aws:kms";
+ crypt_http_responses["x-amz-server-side-encryption-aws-kms-key-id"] = key_id;
+ return 0;
+ }
+
+ if (stored_mode == "RGW-AUTO") {
+ std::string master_encryption_key;
+ try {
+ master_encryption_key = from_base64(std::string(s->cct->_conf->rgw_crypt_default_encryption_key));
+ } catch (...) {
+ ldout(s->cct, 5) << "ERROR: rgw_s3_prepare_decrypt invalid default encryption key "
+ << "which contains character that is not base64 encoded."
+ << dendl;
+ s->err.message = "The default encryption key is not valid base64.";
+ return -EINVAL;
+ }
+
+ if (master_encryption_key.size() != 256 / 8) {
+ ldout(s->cct, 0) << "ERROR: failed to decode 'rgw crypt default encryption key' to 256 bit string" << dendl;
+ return -EIO;
+ }
+ std::string attr_key_selector = get_str_attribute(attrs, RGW_ATTR_CRYPT_KEYSEL);
+ if (attr_key_selector.size() != AES_256_CBC::AES_256_KEYSIZE) {
+ ldout(s->cct, 0) << "ERROR: missing or invalid " RGW_ATTR_CRYPT_KEYSEL << dendl;
+ return -EIO;
+ }
+ uint8_t actual_key[AES_256_KEYSIZE];
+ if (AES_256_ECB_encrypt(s->cct,
+ reinterpret_cast<const uint8_t*>(master_encryption_key.c_str()),
+ AES_256_KEYSIZE,
+ reinterpret_cast<const uint8_t*>(attr_key_selector.c_str()),
+ actual_key, AES_256_KEYSIZE) != true) {
+ ::ceph::crypto::zeroize_for_security(actual_key, sizeof(actual_key));
+ return -EIO;
+ }
+ auto aes = std::unique_ptr<AES_256_CBC>(new AES_256_CBC(s->cct));
+ aes->set_key(actual_key, AES_256_KEYSIZE);
+ ::ceph::crypto::zeroize_for_security(actual_key, sizeof(actual_key));
+ if (block_crypt) *block_crypt = std::move(aes);
+ return 0;
+ }
+ /*no decryption*/
+ return 0;
+}
diff --git a/src/rgw/rgw_crypt.h b/src/rgw/rgw_crypt.h
new file mode 100644
index 00000000..e928d054
--- /dev/null
+++ b/src/rgw/rgw_crypt.h
@@ -0,0 +1,152 @@
+// -*- mode:C; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/**
+ * Crypto filters for Put/Post/Get operations.
+ */
+
+#ifndef CEPH_RGW_CRYPT_H
+#define CEPH_RGW_CRYPT_H
+
+#include <rgw/rgw_op.h>
+#include <rgw/rgw_rest.h>
+#include <rgw/rgw_rest_s3.h>
+#include "rgw_putobj.h"
+#include <boost/utility/string_view.hpp>
+
+/**
+ * \brief Interface for block encryption methods
+ *
+ * Encrypts and decrypts data.
+ * Operations are performed in context of larger stream being divided into blocks.
+ * Each block can be processed independently, but only as a whole.
+ * Part block cannot be properly processed.
+ * Each request must start on block-aligned offset.
+ * Each request should have length that is multiply of block size.
+ * Request with unaligned length is only acceptable for last part of stream.
+ */
+class BlockCrypt {
+public:
+ BlockCrypt(){};
+ virtual ~BlockCrypt(){};
+
+ /**
+ * Determines size of encryption block.
+ * This is usually multiply of key size.
+ * It determines size of chunks that should be passed to \ref encrypt and \ref decrypt.
+ */
+ virtual size_t get_block_size() = 0;
+
+ /**
+ * Encrypts data.
+ * Argument \ref stream_offset shows where in generalized stream chunk is located.
+ * Input for encryption is \ref input buffer, with relevant data in range <in_ofs, in_ofs+size).
+ * \ref input and \output may not be the same buffer.
+ *
+ * \params
+ * input - source buffer of data
+ * in_ofs - offset of chunk inside input
+ * size - size of chunk, must be chunk-aligned unless last part is processed
+ * output - destination buffer to encrypt to
+ * stream_offset - location of <in_ofs,in_ofs+size) chunk in data stream, must be chunk-aligned
+ * \return true iff successfully encrypted
+ */
+ virtual bool encrypt(bufferlist& input,
+ off_t in_ofs,
+ size_t size,
+ bufferlist& output,
+ off_t stream_offset) = 0;
+
+ /**
+ * Decrypts data.
+ * Argument \ref stream_offset shows where in generalized stream chunk is located.
+ * Input for decryption is \ref input buffer, with relevant data in range <in_ofs, in_ofs+size).
+ * \ref input and \output may not be the same buffer.
+ *
+ * \params
+ * input - source buffer of data
+ * in_ofs - offset of chunk inside input
+ * size - size of chunk, must be chunk-aligned unless last part is processed
+ * output - destination buffer to encrypt to
+ * stream_offset - location of <in_ofs,in_ofs+size) chunk in data stream, must be chunk-aligned
+ * \return true iff successfully encrypted
+ */
+ virtual bool decrypt(bufferlist& input,
+ off_t in_ofs,
+ size_t size,
+ bufferlist& output,
+ off_t stream_offset) = 0;
+};
+
+static const size_t AES_256_KEYSIZE = 256 / 8;
+bool AES_256_ECB_encrypt(CephContext* cct,
+ const uint8_t* key,
+ size_t key_size,
+ const uint8_t* data_in,
+ uint8_t* data_out,
+ size_t data_size);
+
+class RGWGetObj_BlockDecrypt : public RGWGetObj_Filter {
+ CephContext* cct;
+
+ std::unique_ptr<BlockCrypt> crypt; /**< already configured stateless BlockCrypt
+ for operations when enough data is accumulated */
+ off_t enc_begin_skip; /**< amount of data to skip from beginning of received data */
+ off_t ofs; /**< stream offset of data we expect to show up next through \ref handle_data */
+ off_t end; /**< stream offset of last byte that is requested */
+ bufferlist cache; /**< stores extra data that could not (yet) be processed by BlockCrypt */
+ size_t block_size; /**< snapshot of \ref BlockCrypt.get_block_size() */
+
+ int process(bufferlist& cipher, size_t part_ofs, size_t size);
+
+protected:
+ std::vector<size_t> parts_len; /**< size of parts of multipart object, parsed from manifest */
+public:
+ RGWGetObj_BlockDecrypt(CephContext* cct,
+ RGWGetObj_Filter* next,
+ std::unique_ptr<BlockCrypt> crypt);
+ virtual ~RGWGetObj_BlockDecrypt();
+
+ virtual int fixup_range(off_t& bl_ofs,
+ off_t& bl_end) override;
+ virtual int handle_data(bufferlist& bl,
+ off_t bl_ofs,
+ off_t bl_len) override;
+ virtual int flush() override;
+
+ int read_manifest(bufferlist& manifest_bl);
+}; /* RGWGetObj_BlockDecrypt */
+
+
+class RGWPutObj_BlockEncrypt : public rgw::putobj::Pipe
+{
+ CephContext* cct;
+ std::unique_ptr<BlockCrypt> crypt; /**< already configured stateless BlockCrypt
+ for operations when enough data is accumulated */
+ bufferlist cache; /**< stores extra data that could not (yet) be processed by BlockCrypt */
+ const size_t block_size; /**< snapshot of \ref BlockCrypt.get_block_size() */
+public:
+ RGWPutObj_BlockEncrypt(CephContext* cct,
+ rgw::putobj::DataProcessor *next,
+ std::unique_ptr<BlockCrypt> crypt);
+
+ int process(bufferlist&& data, uint64_t logical_offset) override;
+}; /* RGWPutObj_BlockEncrypt */
+
+
+int rgw_s3_prepare_encrypt(struct req_state* s,
+ std::map<std::string, ceph::bufferlist>& attrs,
+ std::map<std::string,
+ RGWPostObj_ObjStore::post_form_part,
+ const ltstr_nocase>* parts,
+ std::unique_ptr<BlockCrypt>* block_crypt,
+ std::map<std::string,
+ std::string>& crypt_http_responses);
+
+int rgw_s3_prepare_decrypt(struct req_state* s,
+ std::map<std::string, ceph::bufferlist>& attrs,
+ std::unique_ptr<BlockCrypt>* block_crypt,
+ std::map<std::string,
+ std::string>& crypt_http_responses);
+
+#endif
diff --git a/src/rgw/rgw_crypt_sanitize.cc b/src/rgw/rgw_crypt_sanitize.cc
new file mode 100644
index 00000000..776f1376
--- /dev/null
+++ b/src/rgw/rgw_crypt_sanitize.cc
@@ -0,0 +1,88 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * rgw_crypt_sanitize.cc
+ *
+ * Created on: Mar 3, 2017
+ * Author: adam
+ */
+
+#include "rgw_common.h"
+#include "rgw_crypt_sanitize.h"
+#include "boost/algorithm/string/predicate.hpp"
+
+namespace rgw {
+namespace crypt_sanitize {
+const char* HTTP_X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY = "HTTP_X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY";
+const char* x_amz_server_side_encryption_customer_key = "x-amz-server-side-encryption-customer-key";
+const char* dollar_x_amz_server_side_encryption_customer_key = "$x-amz-server-side-encryption-customer-key";
+const char* suppression_message = "=suppressed due to key presence=";
+
+std::ostream& operator<<(std::ostream& out, const env& e) {
+ if (g_ceph_context->_conf->rgw_crypt_suppress_logs) {
+ if (boost::algorithm::iequals(
+ e.name,
+ HTTP_X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY))
+ {
+ out << suppression_message;
+ return out;
+ }
+ if (boost::algorithm::iequals(e.name, "QUERY_STRING") &&
+ boost::algorithm::ifind_first(
+ e.value,
+ x_amz_server_side_encryption_customer_key))
+ {
+ out << suppression_message;
+ return out;
+ }
+ }
+ out << e.value;
+ return out;
+}
+
+std::ostream& operator<<(std::ostream& out, const x_meta_map& x) {
+ if (g_ceph_context->_conf->rgw_crypt_suppress_logs &&
+ boost::algorithm::iequals(x.name, x_amz_server_side_encryption_customer_key))
+ {
+ out << suppression_message;
+ return out;
+ }
+ out << x.value;
+ return out;
+}
+
+std::ostream& operator<<(std::ostream& out, const s3_policy& x) {
+ if (g_ceph_context->_conf->rgw_crypt_suppress_logs &&
+ boost::algorithm::iequals(x.name, dollar_x_amz_server_side_encryption_customer_key))
+ {
+ out << suppression_message;
+ return out;
+ }
+ out << x.value;
+ return out;
+}
+
+std::ostream& operator<<(std::ostream& out, const auth& x) {
+ if (g_ceph_context->_conf->rgw_crypt_suppress_logs &&
+ x.s->info.env->get(HTTP_X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY, nullptr) != nullptr)
+ {
+ out << suppression_message;
+ return out;
+ }
+ out << x.value;
+ return out;
+}
+
+std::ostream& operator<<(std::ostream& out, const log_content& x) {
+ if (g_ceph_context->_conf->rgw_crypt_suppress_logs &&
+ boost::algorithm::ifind_first(x.buf, x_amz_server_side_encryption_customer_key)) {
+ out << suppression_message;
+ return out;
+ }
+ out << x.buf;
+ return out;
+}
+
+}
+}
diff --git a/src/rgw/rgw_crypt_sanitize.h b/src/rgw/rgw_crypt_sanitize.h
new file mode 100644
index 00000000..548c1240
--- /dev/null
+++ b/src/rgw/rgw_crypt_sanitize.h
@@ -0,0 +1,71 @@
+// -*- mode:C; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RGW_RGW_CRYPT_SANITIZE_H_
+#define RGW_RGW_CRYPT_SANITIZE_H_
+
+#include <boost/utility/string_view.hpp>
+
+#include "rgw_common.h"
+
+namespace rgw {
+namespace crypt_sanitize {
+
+/*
+ * Temporary container for suppressing printing if variable contains secret key.
+ */
+struct env {
+ boost::string_ref name;
+ boost::string_ref value;
+
+ env(boost::string_ref name, boost::string_ref value)
+ : name(name), value(value) {}
+};
+
+/*
+ * Temporary container for suppressing printing if aws meta attributes contains secret key.
+ */
+struct x_meta_map {
+ boost::string_ref name;
+ boost::string_ref value;
+ x_meta_map(boost::string_ref name, boost::string_ref value)
+ : name(name), value(value) {}
+};
+
+/*
+ * Temporary container for suppressing printing if s3_policy calculation variable contains secret key.
+ */
+struct s3_policy {
+ boost::string_ref name;
+ boost::string_ref value;
+ s3_policy(boost::string_ref name, boost::string_ref value)
+ : name(name), value(value) {}
+};
+
+/*
+ * Temporary container for suppressing printing if auth string contains secret key.
+ */
+struct auth {
+ const req_state* const s;
+ boost::string_ref value;
+ auth(const req_state* const s, boost::string_ref value)
+ : s(s), value(value) {}
+};
+
+/*
+ * Temporary container for suppressing printing if log made from civetweb may contain secret key.
+ */
+struct log_content {
+ const boost::string_view buf;
+ explicit log_content(const boost::string_view buf)
+ : buf(buf) {}
+};
+
+std::ostream& operator<<(std::ostream& out, const env& e);
+std::ostream& operator<<(std::ostream& out, const x_meta_map& x);
+std::ostream& operator<<(std::ostream& out, const s3_policy& x);
+std::ostream& operator<<(std::ostream& out, const auth& x);
+std::ostream& operator<<(std::ostream& out, const log_content& x);
+}
+}
+#endif /* RGW_RGW_CRYPT_SANITIZE_H_ */
diff --git a/src/rgw/rgw_data_sync.cc b/src/rgw/rgw_data_sync.cc
new file mode 100644
index 00000000..3f70ff84
--- /dev/null
+++ b/src/rgw/rgw_data_sync.cc
@@ -0,0 +1,3709 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <boost/utility/string_ref.hpp>
+
+#include "common/ceph_json.h"
+#include "common/RWLock.h"
+#include "common/RefCountedObj.h"
+#include "common/WorkQueue.h"
+#include "common/Throttle.h"
+#include "common/errno.h"
+
+#include "rgw_common.h"
+#include "rgw_rados.h"
+#include "rgw_zone.h"
+#include "rgw_sync.h"
+#include "rgw_data_sync.h"
+#include "rgw_rest_conn.h"
+#include "rgw_cr_rados.h"
+#include "rgw_cr_rest.h"
+#include "rgw_http_client.h"
+#include "rgw_bucket.h"
+#include "rgw_metadata.h"
+#include "rgw_sync_counters.h"
+#include "rgw_sync_module.h"
+#include "rgw_sync_log_trim.h"
+
+#include "cls/lock/cls_lock_client.h"
+
+#include "services/svc_zone.h"
+#include "services/svc_sync_modules.h"
+
+#include "include/random.h"
+
+#include <boost/asio/yield.hpp>
+
+#define dout_subsys ceph_subsys_rgw
+
+#undef dout_prefix
+#define dout_prefix (*_dout << "data sync: ")
+
+static string datalog_sync_status_oid_prefix = "datalog.sync-status";
+static string datalog_sync_status_shard_prefix = "datalog.sync-status.shard";
+static string datalog_sync_full_sync_index_prefix = "data.full-sync.index";
+static string bucket_status_oid_prefix = "bucket.sync-status";
+static string object_status_oid_prefix = "bucket.sync-status";
+
+
+void rgw_datalog_info::decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("num_objects", num_shards, obj);
+}
+
+void rgw_datalog_entry::decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("key", key, obj);
+ utime_t ut;
+ JSONDecoder::decode_json("timestamp", ut, obj);
+ timestamp = ut.to_real_time();
+}
+
+void rgw_datalog_shard_data::decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("marker", marker, obj);
+ JSONDecoder::decode_json("truncated", truncated, obj);
+ JSONDecoder::decode_json("entries", entries, obj);
+};
+
+class RGWReadDataSyncStatusMarkersCR : public RGWShardCollectCR {
+ static constexpr int MAX_CONCURRENT_SHARDS = 16;
+
+ RGWDataSyncEnv *env;
+ const int num_shards;
+ int shard_id{0};;
+
+ map<uint32_t, rgw_data_sync_marker>& markers;
+
+ public:
+ RGWReadDataSyncStatusMarkersCR(RGWDataSyncEnv *env, int num_shards,
+ map<uint32_t, rgw_data_sync_marker>& markers)
+ : RGWShardCollectCR(env->cct, MAX_CONCURRENT_SHARDS),
+ env(env), num_shards(num_shards), markers(markers)
+ {}
+ bool spawn_next() override;
+};
+
+bool RGWReadDataSyncStatusMarkersCR::spawn_next()
+{
+ if (shard_id >= num_shards) {
+ return false;
+ }
+ using CR = RGWSimpleRadosReadCR<rgw_data_sync_marker>;
+ spawn(new CR(env->async_rados, env->store->svc.sysobj,
+ rgw_raw_obj(env->store->svc.zone->get_zone_params().log_pool, RGWDataSyncStatusManager::shard_obj_name(env->source_zone, shard_id)),
+ &markers[shard_id]),
+ false);
+ shard_id++;
+ return true;
+}
+
+class RGWReadDataSyncRecoveringShardsCR : public RGWShardCollectCR {
+ static constexpr int MAX_CONCURRENT_SHARDS = 16;
+
+ RGWDataSyncEnv *env;
+
+ uint64_t max_entries;
+ int num_shards;
+ int shard_id{0};
+
+ string marker;
+ std::vector<RGWRadosGetOmapKeysCR::ResultPtr>& omapkeys;
+
+ public:
+ RGWReadDataSyncRecoveringShardsCR(RGWDataSyncEnv *env, uint64_t _max_entries, int _num_shards,
+ std::vector<RGWRadosGetOmapKeysCR::ResultPtr>& omapkeys)
+ : RGWShardCollectCR(env->cct, MAX_CONCURRENT_SHARDS), env(env),
+ max_entries(_max_entries), num_shards(_num_shards), omapkeys(omapkeys)
+ {}
+ bool spawn_next() override;
+};
+
+bool RGWReadDataSyncRecoveringShardsCR::spawn_next()
+{
+ if (shard_id >= num_shards)
+ return false;
+
+ string error_oid = RGWDataSyncStatusManager::shard_obj_name(env->source_zone, shard_id) + ".retry";
+ auto& shard_keys = omapkeys[shard_id];
+ shard_keys = std::make_shared<RGWRadosGetOmapKeysCR::Result>();
+ spawn(new RGWRadosGetOmapKeysCR(env->store, rgw_raw_obj(env->store->svc.zone->get_zone_params().log_pool, error_oid),
+ marker, max_entries, shard_keys), false);
+
+ ++shard_id;
+ return true;
+}
+
+class RGWReadDataSyncStatusCoroutine : public RGWCoroutine {
+ RGWDataSyncEnv *sync_env;
+ rgw_data_sync_status *sync_status;
+
+public:
+ RGWReadDataSyncStatusCoroutine(RGWDataSyncEnv *_sync_env,
+ rgw_data_sync_status *_status)
+ : RGWCoroutine(_sync_env->cct), sync_env(_sync_env), sync_status(_status)
+ {}
+ int operate() override;
+};
+
+int RGWReadDataSyncStatusCoroutine::operate()
+{
+ reenter(this) {
+ // read sync info
+ using ReadInfoCR = RGWSimpleRadosReadCR<rgw_data_sync_info>;
+ yield {
+ bool empty_on_enoent = false; // fail on ENOENT
+ call(new ReadInfoCR(sync_env->async_rados, sync_env->store->svc.sysobj,
+ rgw_raw_obj(sync_env->store->svc.zone->get_zone_params().log_pool, RGWDataSyncStatusManager::sync_status_oid(sync_env->source_zone)),
+ &sync_status->sync_info, empty_on_enoent));
+ }
+ if (retcode < 0) {
+ ldout(sync_env->cct, 4) << "failed to read sync status info with "
+ << cpp_strerror(retcode) << dendl;
+ return set_cr_error(retcode);
+ }
+ // read shard markers
+ using ReadMarkersCR = RGWReadDataSyncStatusMarkersCR;
+ yield call(new ReadMarkersCR(sync_env, sync_status->sync_info.num_shards,
+ sync_status->sync_markers));
+ if (retcode < 0) {
+ ldout(sync_env->cct, 4) << "failed to read sync status markers with "
+ << cpp_strerror(retcode) << dendl;
+ return set_cr_error(retcode);
+ }
+ return set_cr_done();
+ }
+ return 0;
+}
+
+class RGWReadRemoteDataLogShardInfoCR : public RGWCoroutine {
+ RGWDataSyncEnv *sync_env;
+
+ RGWRESTReadResource *http_op;
+
+ int shard_id;
+ RGWDataChangesLogInfo *shard_info;
+
+public:
+ RGWReadRemoteDataLogShardInfoCR(RGWDataSyncEnv *_sync_env,
+ int _shard_id, RGWDataChangesLogInfo *_shard_info) : RGWCoroutine(_sync_env->cct),
+ sync_env(_sync_env),
+ http_op(NULL),
+ shard_id(_shard_id),
+ shard_info(_shard_info) {
+ }
+
+ ~RGWReadRemoteDataLogShardInfoCR() override {
+ if (http_op) {
+ http_op->put();
+ }
+ }
+
+ int operate() override {
+ reenter(this) {
+ yield {
+ char buf[16];
+ snprintf(buf, sizeof(buf), "%d", shard_id);
+ rgw_http_param_pair pairs[] = { { "type" , "data" },
+ { "id", buf },
+ { "info" , NULL },
+ { NULL, NULL } };
+
+ string p = "/admin/log/";
+
+ http_op = new RGWRESTReadResource(sync_env->conn, p, pairs, NULL, sync_env->http_manager);
+
+ init_new_io(http_op);
+
+ int ret = http_op->aio_read();
+ if (ret < 0) {
+ ldout(sync_env->cct, 0) << "ERROR: failed to read from " << p << dendl;
+ log_error() << "failed to send http operation: " << http_op->to_str() << " ret=" << ret << std::endl;
+ return set_cr_error(ret);
+ }
+
+ return io_block(0);
+ }
+ yield {
+ int ret = http_op->wait(shard_info);
+ if (ret < 0) {
+ return set_cr_error(ret);
+ }
+ return set_cr_done();
+ }
+ }
+ return 0;
+ }
+};
+
+struct read_remote_data_log_response {
+ string marker;
+ bool truncated;
+ list<rgw_data_change_log_entry> entries;
+
+ read_remote_data_log_response() : truncated(false) {}
+
+ void decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("marker", marker, obj);
+ JSONDecoder::decode_json("truncated", truncated, obj);
+ JSONDecoder::decode_json("entries", entries, obj);
+ };
+};
+
+class RGWReadRemoteDataLogShardCR : public RGWCoroutine {
+ RGWDataSyncEnv *sync_env;
+
+ RGWRESTReadResource *http_op = nullptr;
+
+ int shard_id;
+ const std::string& marker;
+ string *pnext_marker;
+ list<rgw_data_change_log_entry> *entries;
+ bool *truncated;
+
+ read_remote_data_log_response response;
+ std::optional<PerfGuard> timer;
+
+public:
+ RGWReadRemoteDataLogShardCR(RGWDataSyncEnv *_sync_env, int _shard_id,
+ const std::string& marker, string *pnext_marker,
+ list<rgw_data_change_log_entry> *_entries,
+ bool *_truncated)
+ : RGWCoroutine(_sync_env->cct), sync_env(_sync_env),
+ shard_id(_shard_id), marker(marker), pnext_marker(pnext_marker),
+ entries(_entries), truncated(_truncated) {
+ }
+ ~RGWReadRemoteDataLogShardCR() override {
+ if (http_op) {
+ http_op->put();
+ }
+ }
+
+ int operate() override {
+ reenter(this) {
+ yield {
+ char buf[16];
+ snprintf(buf, sizeof(buf), "%d", shard_id);
+ rgw_http_param_pair pairs[] = { { "type" , "data" },
+ { "id", buf },
+ { "marker", marker.c_str() },
+ { "extra-info", "true" },
+ { NULL, NULL } };
+
+ string p = "/admin/log/";
+
+ http_op = new RGWRESTReadResource(sync_env->conn, p, pairs, NULL, sync_env->http_manager);
+
+ init_new_io(http_op);
+
+ if (sync_env->counters) {
+ timer.emplace(sync_env->counters, sync_counters::l_poll);
+ }
+ int ret = http_op->aio_read();
+ if (ret < 0) {
+ ldout(sync_env->cct, 0) << "ERROR: failed to read from " << p << dendl;
+ log_error() << "failed to send http operation: " << http_op->to_str() << " ret=" << ret << std::endl;
+ if (sync_env->counters) {
+ sync_env->counters->inc(sync_counters::l_poll_err);
+ }
+ return set_cr_error(ret);
+ }
+
+ return io_block(0);
+ }
+ yield {
+ timer.reset();
+ int ret = http_op->wait(&response);
+ if (ret < 0) {
+ if (sync_env->counters && ret != -ENOENT) {
+ sync_env->counters->inc(sync_counters::l_poll_err);
+ }
+ return set_cr_error(ret);
+ }
+ entries->clear();
+ entries->swap(response.entries);
+ *pnext_marker = response.marker;
+ *truncated = response.truncated;
+ return set_cr_done();
+ }
+ }
+ return 0;
+ }
+};
+
+class RGWReadRemoteDataLogInfoCR : public RGWShardCollectCR {
+ RGWDataSyncEnv *sync_env;
+
+ int num_shards;
+ map<int, RGWDataChangesLogInfo> *datalog_info;
+
+ int shard_id;
+#define READ_DATALOG_MAX_CONCURRENT 10
+
+public:
+ RGWReadRemoteDataLogInfoCR(RGWDataSyncEnv *_sync_env,
+ int _num_shards,
+ map<int, RGWDataChangesLogInfo> *_datalog_info) : RGWShardCollectCR(_sync_env->cct, READ_DATALOG_MAX_CONCURRENT),
+ sync_env(_sync_env), num_shards(_num_shards),
+ datalog_info(_datalog_info), shard_id(0) {}
+ bool spawn_next() override;
+};
+
+bool RGWReadRemoteDataLogInfoCR::spawn_next() {
+ if (shard_id >= num_shards) {
+ return false;
+ }
+ spawn(new RGWReadRemoteDataLogShardInfoCR(sync_env, shard_id, &(*datalog_info)[shard_id]), false);
+ shard_id++;
+ return true;
+}
+
+class RGWListRemoteDataLogShardCR : public RGWSimpleCoroutine {
+ RGWDataSyncEnv *sync_env;
+ RGWRESTReadResource *http_op;
+
+ int shard_id;
+ string marker;
+ uint32_t max_entries;
+ rgw_datalog_shard_data *result;
+
+public:
+ RGWListRemoteDataLogShardCR(RGWDataSyncEnv *env, int _shard_id,
+ const string& _marker, uint32_t _max_entries,
+ rgw_datalog_shard_data *_result)
+ : RGWSimpleCoroutine(env->store->ctx()), sync_env(env), http_op(NULL),
+ shard_id(_shard_id), marker(_marker), max_entries(_max_entries), result(_result) {}
+
+ int send_request() override {
+ RGWRESTConn *conn = sync_env->conn;
+ RGWRados *store = sync_env->store;
+
+ char buf[32];
+ snprintf(buf, sizeof(buf), "%d", shard_id);
+
+ char max_entries_buf[32];
+ snprintf(max_entries_buf, sizeof(max_entries_buf), "%d", (int)max_entries);
+
+ const char *marker_key = (marker.empty() ? "" : "marker");
+
+ rgw_http_param_pair pairs[] = { { "type", "data" },
+ { "id", buf },
+ { "max-entries", max_entries_buf },
+ { marker_key, marker.c_str() },
+ { NULL, NULL } };
+
+ string p = "/admin/log/";
+
+ http_op = new RGWRESTReadResource(conn, p, pairs, NULL, sync_env->http_manager);
+ init_new_io(http_op);
+
+ int ret = http_op->aio_read();
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "ERROR: failed to read from " << p << dendl;
+ log_error() << "failed to send http operation: " << http_op->to_str() << " ret=" << ret << std::endl;
+ http_op->put();
+ return ret;
+ }
+
+ return 0;
+ }
+
+ int request_complete() override {
+ int ret = http_op->wait(result);
+ http_op->put();
+ if (ret < 0 && ret != -ENOENT) {
+ ldout(sync_env->store->ctx(), 0) << "ERROR: failed to list remote datalog shard, ret=" << ret << dendl;
+ return ret;
+ }
+ return 0;
+ }
+};
+
+class RGWListRemoteDataLogCR : public RGWShardCollectCR {
+ RGWDataSyncEnv *sync_env;
+
+ map<int, string> shards;
+ int max_entries_per_shard;
+ map<int, rgw_datalog_shard_data> *result;
+
+ map<int, string>::iterator iter;
+#define READ_DATALOG_MAX_CONCURRENT 10
+
+public:
+ RGWListRemoteDataLogCR(RGWDataSyncEnv *_sync_env,
+ map<int, string>& _shards,
+ int _max_entries_per_shard,
+ map<int, rgw_datalog_shard_data> *_result) : RGWShardCollectCR(_sync_env->cct, READ_DATALOG_MAX_CONCURRENT),
+ sync_env(_sync_env), max_entries_per_shard(_max_entries_per_shard),
+ result(_result) {
+ shards.swap(_shards);
+ iter = shards.begin();
+ }
+ bool spawn_next() override;
+};
+
+bool RGWListRemoteDataLogCR::spawn_next() {
+ if (iter == shards.end()) {
+ return false;
+ }
+
+ spawn(new RGWListRemoteDataLogShardCR(sync_env, iter->first, iter->second, max_entries_per_shard, &(*result)[iter->first]), false);
+ ++iter;
+ return true;
+}
+
+class RGWInitDataSyncStatusCoroutine : public RGWCoroutine {
+ static constexpr uint32_t lock_duration = 30;
+ RGWDataSyncEnv *sync_env;
+ RGWRados *store;
+ const rgw_pool& pool;
+ const uint32_t num_shards;
+
+ string sync_status_oid;
+
+ string lock_name;
+ string cookie;
+ rgw_data_sync_status *status;
+ map<int, RGWDataChangesLogInfo> shards_info;
+
+ RGWSyncTraceNodeRef tn;
+public:
+ RGWInitDataSyncStatusCoroutine(RGWDataSyncEnv *_sync_env, uint32_t num_shards,
+ uint64_t instance_id,
+ RGWSyncTraceNodeRef& _tn_parent,
+ rgw_data_sync_status *status)
+ : RGWCoroutine(_sync_env->cct), sync_env(_sync_env), store(sync_env->store),
+ pool(store->svc.zone->get_zone_params().log_pool),
+ num_shards(num_shards), status(status),
+ tn(sync_env->sync_tracer->add_node(_tn_parent, "init_data_sync_status")) {
+ lock_name = "sync_lock";
+
+ status->sync_info.instance_id = instance_id;
+
+#define COOKIE_LEN 16
+ char buf[COOKIE_LEN + 1];
+
+ gen_rand_alphanumeric(cct, buf, sizeof(buf) - 1);
+ cookie = buf;
+
+ sync_status_oid = RGWDataSyncStatusManager::sync_status_oid(sync_env->source_zone);
+
+ }
+
+ int operate() override {
+ int ret;
+ reenter(this) {
+ using LockCR = RGWSimpleRadosLockCR;
+ yield call(new LockCR(sync_env->async_rados, store,
+ rgw_raw_obj{pool, sync_status_oid},
+ lock_name, cookie, lock_duration));
+ if (retcode < 0) {
+ tn->log(0, SSTR("ERROR: failed to take a lock on " << sync_status_oid));
+ return set_cr_error(retcode);
+ }
+ using WriteInfoCR = RGWSimpleRadosWriteCR<rgw_data_sync_info>;
+ yield call(new WriteInfoCR(sync_env->async_rados, store->svc.sysobj,
+ rgw_raw_obj{pool, sync_status_oid},
+ status->sync_info));
+ if (retcode < 0) {
+ tn->log(0, SSTR("ERROR: failed to write sync status info with " << retcode));
+ return set_cr_error(retcode);
+ }
+
+ /* take lock again, we just recreated the object */
+ yield call(new LockCR(sync_env->async_rados, store,
+ rgw_raw_obj{pool, sync_status_oid},
+ lock_name, cookie, lock_duration));
+ if (retcode < 0) {
+ tn->log(0, SSTR("ERROR: failed to take a lock on " << sync_status_oid));
+ return set_cr_error(retcode);
+ }
+
+ tn->log(10, "took lease");
+
+ /* fetch current position in logs */
+ yield {
+ RGWRESTConn *conn = store->svc.zone->get_zone_conn_by_id(sync_env->source_zone);
+ if (!conn) {
+ tn->log(0, SSTR("ERROR: connection to zone " << sync_env->source_zone << " does not exist!"));
+ return set_cr_error(-EIO);
+ }
+ for (uint32_t i = 0; i < num_shards; i++) {
+ spawn(new RGWReadRemoteDataLogShardInfoCR(sync_env, i, &shards_info[i]), true);
+ }
+ }
+ while (collect(&ret, NULL)) {
+ if (ret < 0) {
+ tn->log(0, SSTR("ERROR: failed to read remote data log shards"));
+ return set_state(RGWCoroutine_Error);
+ }
+ yield;
+ }
+ yield {
+ for (uint32_t i = 0; i < num_shards; i++) {
+ RGWDataChangesLogInfo& info = shards_info[i];
+ auto& marker = status->sync_markers[i];
+ marker.next_step_marker = info.marker;
+ marker.timestamp = info.last_update;
+ const auto& oid = RGWDataSyncStatusManager::shard_obj_name(sync_env->source_zone, i);
+ using WriteMarkerCR = RGWSimpleRadosWriteCR<rgw_data_sync_marker>;
+ spawn(new WriteMarkerCR(sync_env->async_rados, store->svc.sysobj,
+ rgw_raw_obj{pool, oid}, marker), true);
+ }
+ }
+ while (collect(&ret, NULL)) {
+ if (ret < 0) {
+ tn->log(0, SSTR("ERROR: failed to write data sync status markers"));
+ return set_state(RGWCoroutine_Error);
+ }
+ yield;
+ }
+
+ status->sync_info.state = rgw_data_sync_info::StateBuildingFullSyncMaps;
+ yield call(new WriteInfoCR(sync_env->async_rados, store->svc.sysobj,
+ rgw_raw_obj{pool, sync_status_oid},
+ status->sync_info));
+ if (retcode < 0) {
+ tn->log(0, SSTR("ERROR: failed to write sync status info with " << retcode));
+ return set_cr_error(retcode);
+ }
+ yield call(new RGWSimpleRadosUnlockCR(sync_env->async_rados, store,
+ rgw_raw_obj{pool, sync_status_oid},
+ lock_name, cookie));
+ return set_cr_done();
+ }
+ return 0;
+ }
+};
+
+int RGWRemoteDataLog::read_log_info(rgw_datalog_info *log_info)
+{
+ rgw_http_param_pair pairs[] = { { "type", "data" },
+ { NULL, NULL } };
+
+ int ret = sync_env.conn->get_json_resource("/admin/log", pairs, *log_info);
+ if (ret < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: failed to fetch datalog info" << dendl;
+ return ret;
+ }
+
+ ldpp_dout(dpp, 20) << "remote datalog, num_shards=" << log_info->num_shards << dendl;
+
+ return 0;
+}
+
+int RGWRemoteDataLog::read_source_log_shards_info(map<int, RGWDataChangesLogInfo> *shards_info)
+{
+ rgw_datalog_info log_info;
+ int ret = read_log_info(&log_info);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return run(new RGWReadRemoteDataLogInfoCR(&sync_env, log_info.num_shards, shards_info));
+}
+
+int RGWRemoteDataLog::read_source_log_shards_next(map<int, string> shard_markers, map<int, rgw_datalog_shard_data> *result)
+{
+ return run(new RGWListRemoteDataLogCR(&sync_env, shard_markers, 1, result));
+}
+
+int RGWRemoteDataLog::init(const string& _source_zone, RGWRESTConn *_conn, RGWSyncErrorLogger *_error_logger,
+ RGWSyncTraceManager *_sync_tracer, RGWSyncModuleInstanceRef& _sync_module,
+ PerfCounters* counters)
+{
+ sync_env.init(dpp, store->ctx(), store, _conn, async_rados, &http_manager, _error_logger,
+ _sync_tracer, _source_zone, _sync_module, counters);
+
+ if (initialized) {
+ return 0;
+ }
+
+ int ret = http_manager.start();
+ if (ret < 0) {
+ ldpp_dout(dpp, 0) << "failed in http_manager.start() ret=" << ret << dendl;
+ return ret;
+ }
+
+ tn = sync_env.sync_tracer->add_node(sync_env.sync_tracer->root_node, "data");
+
+ initialized = true;
+
+ return 0;
+}
+
+void RGWRemoteDataLog::finish()
+{
+ stop();
+}
+
+int RGWRemoteDataLog::read_sync_status(rgw_data_sync_status *sync_status)
+{
+ // cannot run concurrently with run_sync(), so run in a separate manager
+ RGWCoroutinesManager crs(store->ctx(), store->get_cr_registry());
+ RGWHTTPManager http_manager(store->ctx(), crs.get_completion_mgr());
+ int ret = http_manager.start();
+ if (ret < 0) {
+ ldpp_dout(dpp, 0) << "failed in http_manager.start() ret=" << ret << dendl;
+ return ret;
+ }
+ RGWDataSyncEnv sync_env_local = sync_env;
+ sync_env_local.http_manager = &http_manager;
+ ret = crs.run(new RGWReadDataSyncStatusCoroutine(&sync_env_local, sync_status));
+ http_manager.stop();
+ return ret;
+}
+
+int RGWRemoteDataLog::read_recovering_shards(const int num_shards, set<int>& recovering_shards)
+{
+ // cannot run concurrently with run_sync(), so run in a separate manager
+ RGWCoroutinesManager crs(store->ctx(), store->get_cr_registry());
+ RGWHTTPManager http_manager(store->ctx(), crs.get_completion_mgr());
+ int ret = http_manager.start();
+ if (ret < 0) {
+ ldpp_dout(dpp, 0) << "failed in http_manager.start() ret=" << ret << dendl;
+ return ret;
+ }
+ RGWDataSyncEnv sync_env_local = sync_env;
+ sync_env_local.http_manager = &http_manager;
+ std::vector<RGWRadosGetOmapKeysCR::ResultPtr> omapkeys;
+ omapkeys.resize(num_shards);
+ uint64_t max_entries{1};
+ ret = crs.run(new RGWReadDataSyncRecoveringShardsCR(&sync_env_local, max_entries, num_shards, omapkeys));
+ http_manager.stop();
+
+ if (ret == 0) {
+ for (int i = 0; i < num_shards; i++) {
+ if (omapkeys[i]->entries.size() != 0) {
+ recovering_shards.insert(i);
+ }
+ }
+ }
+
+ return ret;
+}
+
+int RGWRemoteDataLog::init_sync_status(int num_shards)
+{
+ rgw_data_sync_status sync_status;
+ sync_status.sync_info.num_shards = num_shards;
+
+ RGWCoroutinesManager crs(store->ctx(), store->get_cr_registry());
+ RGWHTTPManager http_manager(store->ctx(), crs.get_completion_mgr());
+ int ret = http_manager.start();
+ if (ret < 0) {
+ ldpp_dout(dpp, 0) << "failed in http_manager.start() ret=" << ret << dendl;
+ return ret;
+ }
+ RGWDataSyncEnv sync_env_local = sync_env;
+ sync_env_local.http_manager = &http_manager;
+ auto instance_id = ceph::util::generate_random_number<uint64_t>();
+ ret = crs.run(new RGWInitDataSyncStatusCoroutine(&sync_env_local, num_shards, instance_id, tn, &sync_status));
+ http_manager.stop();
+ return ret;
+}
+
+static string full_data_sync_index_shard_oid(const string& source_zone, int shard_id)
+{
+ char buf[datalog_sync_full_sync_index_prefix.size() + 1 + source_zone.size() + 1 + 16];
+ snprintf(buf, sizeof(buf), "%s.%s.%d", datalog_sync_full_sync_index_prefix.c_str(), source_zone.c_str(), shard_id);
+ return string(buf);
+}
+
+struct read_metadata_list {
+ string marker;
+ bool truncated;
+ list<string> keys;
+ int count;
+
+ read_metadata_list() : truncated(false), count(0) {}
+
+ void decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("marker", marker, obj);
+ JSONDecoder::decode_json("truncated", truncated, obj);
+ JSONDecoder::decode_json("keys", keys, obj);
+ JSONDecoder::decode_json("count", count, obj);
+ }
+};
+
+struct bucket_instance_meta_info {
+ string key;
+ obj_version ver;
+ utime_t mtime;
+ RGWBucketInstanceMetadataObject data;
+
+ bucket_instance_meta_info() {}
+
+ void decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("key", key, obj);
+ JSONDecoder::decode_json("ver", ver, obj);
+ JSONDecoder::decode_json("mtime", mtime, obj);
+ JSONDecoder::decode_json("data", data, obj);
+ }
+};
+
+class RGWListBucketIndexesCR : public RGWCoroutine {
+ RGWDataSyncEnv *sync_env;
+
+ RGWRados *store;
+
+ rgw_data_sync_status *sync_status;
+ int num_shards;
+
+ int req_ret;
+ int ret;
+
+ list<string>::iterator iter;
+
+ RGWShardedOmapCRManager *entries_index;
+
+ string oid_prefix;
+
+ string path;
+ bucket_instance_meta_info meta_info;
+ string key;
+ string s;
+ int i;
+
+ bool failed;
+ bool truncated;
+ read_metadata_list result;
+
+public:
+ RGWListBucketIndexesCR(RGWDataSyncEnv *_sync_env,
+ rgw_data_sync_status *_sync_status) : RGWCoroutine(_sync_env->cct), sync_env(_sync_env),
+ store(sync_env->store), sync_status(_sync_status),
+ req_ret(0), ret(0), entries_index(NULL), i(0), failed(false), truncated(false) {
+ oid_prefix = datalog_sync_full_sync_index_prefix + "." + sync_env->source_zone;
+ path = "/admin/metadata/bucket.instance";
+ num_shards = sync_status->sync_info.num_shards;
+ }
+ ~RGWListBucketIndexesCR() override {
+ delete entries_index;
+ }
+
+ int operate() override {
+ reenter(this) {
+ entries_index = new RGWShardedOmapCRManager(sync_env->async_rados, store, this, num_shards,
+ store->svc.zone->get_zone_params().log_pool,
+ oid_prefix);
+ yield; // yield so OmapAppendCRs can start
+
+ do {
+ yield {
+ string entrypoint = string("/admin/metadata/bucket.instance");
+
+ rgw_http_param_pair pairs[] = {{"max-entries", "1000"},
+ {"marker", result.marker.c_str()},
+ {NULL, NULL}};
+
+ call(new RGWReadRESTResourceCR<read_metadata_list>(store->ctx(), sync_env->conn, sync_env->http_manager,
+ entrypoint, pairs, &result));
+ }
+ if (retcode < 0) {
+ ldout(sync_env->cct, 0) << "ERROR: failed to fetch metadata for section bucket.instance" << dendl;
+ return set_cr_error(retcode);
+ }
+
+ for (iter = result.keys.begin(); iter != result.keys.end(); ++iter) {
+ ldout(sync_env->cct, 20) << "list metadata: section=bucket.instance key=" << *iter << dendl;
+ key = *iter;
+
+ yield {
+ rgw_http_param_pair pairs[] = {{"key", key.c_str()},
+ {NULL, NULL}};
+
+ call(new RGWReadRESTResourceCR<bucket_instance_meta_info>(store->ctx(), sync_env->conn, sync_env->http_manager, path, pairs, &meta_info));
+ }
+
+ num_shards = meta_info.data.get_bucket_info().num_shards;
+ if (num_shards > 0) {
+ for (i = 0; i < num_shards; i++) {
+ char buf[16];
+ snprintf(buf, sizeof(buf), ":%d", i);
+ s = key + buf;
+ yield entries_index->append(s, store->data_log->get_log_shard_id(meta_info.data.get_bucket_info().bucket, i));
+ }
+ } else {
+ yield entries_index->append(key, store->data_log->get_log_shard_id(meta_info.data.get_bucket_info().bucket, -1));
+ }
+ }
+ truncated = result.truncated;
+ } while (truncated);
+
+ yield {
+ if (!entries_index->finish()) {
+ failed = true;
+ }
+ }
+ if (!failed) {
+ for (map<uint32_t, rgw_data_sync_marker>::iterator iter = sync_status->sync_markers.begin(); iter != sync_status->sync_markers.end(); ++iter) {
+ int shard_id = (int)iter->first;
+ rgw_data_sync_marker& marker = iter->second;
+ marker.total_entries = entries_index->get_total_entries(shard_id);
+ spawn(new RGWSimpleRadosWriteCR<rgw_data_sync_marker>(sync_env->async_rados, store->svc.sysobj,
+ rgw_raw_obj(store->svc.zone->get_zone_params().log_pool, RGWDataSyncStatusManager::shard_obj_name(sync_env->source_zone, shard_id)),
+ marker),
+ true);
+ }
+ } else {
+ yield call(sync_env->error_logger->log_error_cr(sync_env->conn->get_remote_id(), "data.init", "",
+ EIO, string("failed to build bucket instances map")));
+ }
+ while (collect(&ret, NULL)) {
+ if (ret < 0) {
+ yield call(sync_env->error_logger->log_error_cr(sync_env->conn->get_remote_id(), "data.init", "",
+ -ret, string("failed to store sync status: ") + cpp_strerror(-ret)));
+ req_ret = ret;
+ }
+ yield;
+ }
+
+ drain_all();
+ if (req_ret < 0) {
+ yield return set_cr_error(req_ret);
+ }
+ yield return set_cr_done();
+ }
+ return 0;
+ }
+};
+
+#define DATA_SYNC_UPDATE_MARKER_WINDOW 1
+
+class RGWDataSyncShardMarkerTrack : public RGWSyncShardMarkerTrack<string, string> {
+ RGWDataSyncEnv *sync_env;
+
+ string marker_oid;
+ rgw_data_sync_marker sync_marker;
+
+ map<string, string> key_to_marker;
+ map<string, string> marker_to_key;
+
+ void handle_finish(const string& marker) override {
+ map<string, string>::iterator iter = marker_to_key.find(marker);
+ if (iter == marker_to_key.end()) {
+ return;
+ }
+ key_to_marker.erase(iter->second);
+ reset_need_retry(iter->second);
+ marker_to_key.erase(iter);
+ }
+
+ RGWSyncTraceNodeRef tn;
+
+public:
+ RGWDataSyncShardMarkerTrack(RGWDataSyncEnv *_sync_env,
+ const string& _marker_oid,
+ const rgw_data_sync_marker& _marker,
+ RGWSyncTraceNodeRef& _tn) : RGWSyncShardMarkerTrack(DATA_SYNC_UPDATE_MARKER_WINDOW),
+ sync_env(_sync_env),
+ marker_oid(_marker_oid),
+ sync_marker(_marker),
+ tn(_tn) {}
+
+ RGWCoroutine *store_marker(const string& new_marker, uint64_t index_pos, const real_time& timestamp) override {
+ sync_marker.marker = new_marker;
+ sync_marker.pos = index_pos;
+ sync_marker.timestamp = timestamp;
+
+ tn->log(20, SSTR("updating marker marker_oid=" << marker_oid << " marker=" << new_marker));
+ RGWRados *store = sync_env->store;
+
+ return new RGWSimpleRadosWriteCR<rgw_data_sync_marker>(sync_env->async_rados, store->svc.sysobj,
+ rgw_raw_obj(store->svc.zone->get_zone_params().log_pool, marker_oid),
+ sync_marker);
+ }
+
+ /*
+ * create index from key -> marker, and from marker -> key
+ * this is useful so that we can insure that we only have one
+ * entry for any key that is used. This is needed when doing
+ * incremenatl sync of data, and we don't want to run multiple
+ * concurrent sync operations for the same bucket shard
+ */
+ bool index_key_to_marker(const string& key, const string& marker) {
+ if (key_to_marker.find(key) != key_to_marker.end()) {
+ set_need_retry(key);
+ return false;
+ }
+ key_to_marker[key] = marker;
+ marker_to_key[marker] = key;
+ return true;
+ }
+
+ RGWOrderCallCR *allocate_order_control_cr() override {
+ return new RGWLastCallerWinsCR(sync_env->cct);
+ }
+};
+
+// ostream wrappers to print buckets without copying strings
+struct bucket_str {
+ const rgw_bucket& b;
+ explicit bucket_str(const rgw_bucket& b) : b(b) {}
+};
+std::ostream& operator<<(std::ostream& out, const bucket_str& rhs) {
+ auto& b = rhs.b;
+ if (!b.tenant.empty()) {
+ out << b.tenant << '/';
+ }
+ out << b.name;
+ if (!b.bucket_id.empty()) {
+ out << ':' << b.bucket_id;
+ }
+ return out;
+}
+
+struct bucket_str_noinstance {
+ const rgw_bucket& b;
+ explicit bucket_str_noinstance(const rgw_bucket& b) : b(b) {}
+};
+std::ostream& operator<<(std::ostream& out, const bucket_str_noinstance& rhs) {
+ auto& b = rhs.b;
+ if (!b.tenant.empty()) {
+ out << b.tenant << '/';
+ }
+ out << b.name;
+ return out;
+}
+
+struct bucket_shard_str {
+ const rgw_bucket_shard& bs;
+ explicit bucket_shard_str(const rgw_bucket_shard& bs) : bs(bs) {}
+};
+std::ostream& operator<<(std::ostream& out, const bucket_shard_str& rhs) {
+ auto& bs = rhs.bs;
+ out << bucket_str{bs.bucket};
+ if (bs.shard_id >= 0) {
+ out << ':' << bs.shard_id;
+ }
+ return out;
+}
+
+class RGWRunBucketSyncCoroutine : public RGWCoroutine {
+ RGWDataSyncEnv *sync_env;
+ rgw_bucket_shard bs;
+ RGWBucketInfo bucket_info;
+ rgw_bucket_shard_sync_info sync_status;
+ RGWMetaSyncEnv meta_sync_env;
+
+ const std::string status_oid;
+
+ boost::intrusive_ptr<RGWContinuousLeaseCR> lease_cr;
+ boost::intrusive_ptr<RGWCoroutinesStack> lease_stack;
+
+ RGWSyncTraceNodeRef tn;
+
+public:
+ RGWRunBucketSyncCoroutine(RGWDataSyncEnv *_sync_env, const rgw_bucket_shard& bs, const RGWSyncTraceNodeRef& _tn_parent)
+ : RGWCoroutine(_sync_env->cct), sync_env(_sync_env), bs(bs),
+ status_oid(RGWBucketSyncStatusManager::status_oid(sync_env->source_zone, bs)),
+ tn(sync_env->sync_tracer->add_node(_tn_parent, "bucket",
+ SSTR(bucket_shard_str{bs}))) {
+ }
+ ~RGWRunBucketSyncCoroutine() override {
+ if (lease_cr) {
+ lease_cr->abort();
+ }
+ }
+
+ int operate() override;
+};
+
+class RGWDataSyncSingleEntryCR : public RGWCoroutine {
+ RGWDataSyncEnv *sync_env;
+
+ string raw_key;
+ string entry_marker;
+
+ rgw_bucket_shard bs;
+
+ int sync_status;
+
+ bufferlist md_bl;
+
+ RGWDataSyncShardMarkerTrack *marker_tracker;
+
+ boost::intrusive_ptr<RGWOmapAppend> error_repo;
+ bool remove_from_repo;
+
+ set<string> keys;
+
+ RGWSyncTraceNodeRef tn;
+public:
+ RGWDataSyncSingleEntryCR(RGWDataSyncEnv *_sync_env,
+ const string& _raw_key, const string& _entry_marker, RGWDataSyncShardMarkerTrack *_marker_tracker,
+ RGWOmapAppend *_error_repo, bool _remove_from_repo, const RGWSyncTraceNodeRef& _tn_parent) : RGWCoroutine(_sync_env->cct),
+ sync_env(_sync_env),
+ raw_key(_raw_key), entry_marker(_entry_marker),
+ sync_status(0),
+ marker_tracker(_marker_tracker),
+ error_repo(_error_repo), remove_from_repo(_remove_from_repo) {
+ set_description() << "data sync single entry (source_zone=" << sync_env->source_zone << ") key=" <<_raw_key << " entry=" << entry_marker;
+ tn = sync_env->sync_tracer->add_node(_tn_parent, "entry", raw_key);
+ }
+
+ int operate() override {
+ reenter(this) {
+ do {
+ yield {
+ int ret = rgw_bucket_parse_bucket_key(sync_env->cct, raw_key,
+ &bs.bucket, &bs.shard_id);
+ if (ret < 0) {
+ return set_cr_error(-EIO);
+ }
+ if (marker_tracker) {
+ marker_tracker->reset_need_retry(raw_key);
+ }
+ tn->log(0, SSTR("triggering sync of bucket/shard " << bucket_shard_str{bs}));
+ call(new RGWRunBucketSyncCoroutine(sync_env, bs, tn));
+ }
+ } while (marker_tracker && marker_tracker->need_retry(raw_key));
+
+ sync_status = retcode;
+
+ if (sync_status == -ENOENT) {
+ // this was added when 'tenant/' was added to datalog entries, because
+ // preexisting tenant buckets could never sync and would stay in the
+ // error_repo forever
+ tn->log(0, SSTR("WARNING: skipping data log entry for missing bucket " << raw_key));
+ sync_status = 0;
+ }
+
+ if (sync_status < 0) {
+ // write actual sync failures for 'radosgw-admin sync error list'
+ if (sync_status != -EBUSY && sync_status != -EAGAIN) {
+ yield call(sync_env->error_logger->log_error_cr(sync_env->conn->get_remote_id(), "data", raw_key,
+ -sync_status, string("failed to sync bucket instance: ") + cpp_strerror(-sync_status)));
+ if (retcode < 0) {
+ tn->log(0, SSTR("ERROR: failed to log sync failure: retcode=" << retcode));
+ }
+ }
+ if (error_repo && !error_repo->append(raw_key)) {
+ tn->log(0, SSTR("ERROR: failed to log sync failure in error repo: retcode=" << retcode));
+ }
+ } else if (error_repo && remove_from_repo) {
+ keys = {raw_key};
+ yield call(new RGWRadosRemoveOmapKeysCR(sync_env->store, error_repo->get_obj(), keys));
+ if (retcode < 0) {
+ tn->log(0, SSTR("ERROR: failed to remove omap key from error repo ("
+ << error_repo->get_obj() << " retcode=" << retcode));
+ }
+ }
+ /* FIXME: what do do in case of error */
+ if (marker_tracker && !entry_marker.empty()) {
+ /* update marker */
+ yield call(marker_tracker->finish(entry_marker));
+ }
+ if (sync_status == 0) {
+ sync_status = retcode;
+ }
+ if (sync_status < 0) {
+ return set_cr_error(sync_status);
+ }
+ return set_cr_done();
+ }
+ return 0;
+ }
+};
+
+#define BUCKET_SHARD_SYNC_SPAWN_WINDOW 20
+#define DATA_SYNC_MAX_ERR_ENTRIES 10
+
+class RGWDataSyncShardCR : public RGWCoroutine {
+ RGWDataSyncEnv *sync_env;
+
+ rgw_pool pool;
+
+ uint32_t shard_id;
+ rgw_data_sync_marker sync_marker;
+
+ RGWRadosGetOmapKeysCR::ResultPtr omapkeys;
+ std::set<std::string> entries;
+ std::set<std::string>::iterator iter;
+
+ string oid;
+
+ RGWDataSyncShardMarkerTrack *marker_tracker;
+
+ std::string next_marker;
+ list<rgw_data_change_log_entry> log_entries;
+ list<rgw_data_change_log_entry>::iterator log_iter;
+ bool truncated;
+
+ Mutex inc_lock;
+ Cond inc_cond;
+
+ boost::asio::coroutine incremental_cr;
+ boost::asio::coroutine full_cr;
+
+
+ set<string> modified_shards;
+ set<string> current_modified;
+
+ set<string>::iterator modified_iter;
+
+ int total_entries;
+
+ int spawn_window;
+
+ bool *reset_backoff;
+
+ boost::intrusive_ptr<RGWContinuousLeaseCR> lease_cr;
+ boost::intrusive_ptr<RGWCoroutinesStack> lease_stack;
+ string status_oid;
+
+
+ string error_oid;
+ RGWOmapAppend *error_repo;
+ std::set<std::string> error_entries;
+ string error_marker;
+ int max_error_entries;
+
+ ceph::coarse_real_time error_retry_time;
+
+#define RETRY_BACKOFF_SECS_MIN 60
+#define RETRY_BACKOFF_SECS_DEFAULT 60
+#define RETRY_BACKOFF_SECS_MAX 600
+ uint32_t retry_backoff_secs;
+
+ RGWSyncTraceNodeRef tn;
+public:
+ RGWDataSyncShardCR(RGWDataSyncEnv *_sync_env,
+ rgw_pool& _pool,
+ uint32_t _shard_id, const rgw_data_sync_marker& _marker,
+ RGWSyncTraceNodeRef& _tn,
+ bool *_reset_backoff) : RGWCoroutine(_sync_env->cct),
+ sync_env(_sync_env),
+ pool(_pool),
+ shard_id(_shard_id),
+ sync_marker(_marker),
+ marker_tracker(NULL), truncated(false), inc_lock("RGWDataSyncShardCR::inc_lock"),
+ total_entries(0), spawn_window(BUCKET_SHARD_SYNC_SPAWN_WINDOW), reset_backoff(NULL),
+ lease_cr(nullptr), lease_stack(nullptr), error_repo(nullptr), max_error_entries(DATA_SYNC_MAX_ERR_ENTRIES),
+ retry_backoff_secs(RETRY_BACKOFF_SECS_DEFAULT), tn(_tn) {
+ set_description() << "data sync shard source_zone=" << sync_env->source_zone << " shard_id=" << shard_id;
+ status_oid = RGWDataSyncStatusManager::shard_obj_name(sync_env->source_zone, shard_id);
+ error_oid = status_oid + ".retry";
+ }
+
+ ~RGWDataSyncShardCR() override {
+ delete marker_tracker;
+ if (lease_cr) {
+ lease_cr->abort();
+ }
+ if (error_repo) {
+ error_repo->put();
+ }
+ }
+
+ void append_modified_shards(set<string>& keys) {
+ Mutex::Locker l(inc_lock);
+ modified_shards.insert(keys.begin(), keys.end());
+ }
+
+ void set_marker_tracker(RGWDataSyncShardMarkerTrack *mt) {
+ delete marker_tracker;
+ marker_tracker = mt;
+ }
+
+ int operate() override {
+ int r;
+ while (true) {
+ switch (sync_marker.state) {
+ case rgw_data_sync_marker::FullSync:
+ r = full_sync();
+ if (r < 0) {
+ if (r != -EBUSY) {
+ tn->log(10, SSTR("full sync failed (r=" << r << ")"));
+ }
+ return set_cr_error(r);
+ }
+ return 0;
+ case rgw_data_sync_marker::IncrementalSync:
+ r = incremental_sync();
+ if (r < 0) {
+ if (r != -EBUSY) {
+ tn->log(10, SSTR("incremental sync failed (r=" << r << ")"));
+ }
+ return set_cr_error(r);
+ }
+ return 0;
+ default:
+ return set_cr_error(-EIO);
+ }
+ }
+ return 0;
+ }
+
+ void init_lease_cr() {
+ set_status("acquiring sync lock");
+ uint32_t lock_duration = cct->_conf->rgw_sync_lease_period;
+ string lock_name = "sync_lock";
+ if (lease_cr) {
+ lease_cr->abort();
+ }
+ RGWRados *store = sync_env->store;
+ lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, store,
+ rgw_raw_obj(store->svc.zone->get_zone_params().log_pool, status_oid),
+ lock_name, lock_duration, this));
+ lease_stack.reset(spawn(lease_cr.get(), false));
+ }
+
+ int full_sync() {
+#define OMAP_GET_MAX_ENTRIES 100
+ int max_entries = OMAP_GET_MAX_ENTRIES;
+ reenter(&full_cr) {
+ tn->log(10, "start full sync");
+ yield init_lease_cr();
+ while (!lease_cr->is_locked()) {
+ if (lease_cr->is_done()) {
+ tn->log(5, "failed to take lease");
+ set_status("lease lock failed, early abort");
+ drain_all();
+ return set_cr_error(lease_cr->get_ret_status());
+ }
+ set_sleeping(true);
+ yield;
+ }
+ tn->log(10, "took lease");
+ oid = full_data_sync_index_shard_oid(sync_env->source_zone, shard_id);
+ set_marker_tracker(new RGWDataSyncShardMarkerTrack(sync_env, status_oid, sync_marker, tn));
+ total_entries = sync_marker.pos;
+ do {
+ if (!lease_cr->is_locked()) {
+ stop_spawned_services();
+ drain_all();
+ return set_cr_error(-ECANCELED);
+ }
+ omapkeys = std::make_shared<RGWRadosGetOmapKeysCR::Result>();
+ yield call(new RGWRadosGetOmapKeysCR(sync_env->store, rgw_raw_obj(pool, oid),
+ sync_marker.marker, max_entries, omapkeys));
+ if (retcode < 0) {
+ tn->log(0, SSTR("ERROR: RGWRadosGetOmapKeysCR() returned ret=" << retcode));
+ lease_cr->go_down();
+ drain_all();
+ return set_cr_error(retcode);
+ }
+ entries = std::move(omapkeys->entries);
+ if (entries.size() > 0) {
+ tn->set_flag(RGW_SNS_FLAG_ACTIVE); /* actually have entries to sync */
+ }
+ tn->log(20, SSTR("retrieved " << entries.size() << " entries to sync"));
+ iter = entries.begin();
+ for (; iter != entries.end(); ++iter) {
+ tn->log(20, SSTR("full sync: " << *iter));
+ total_entries++;
+ if (!marker_tracker->start(*iter, total_entries, real_time())) {
+ tn->log(0, SSTR("ERROR: cannot start syncing " << *iter << ". Duplicate entry?"));
+ } else {
+ // fetch remote and write locally
+ yield spawn(new RGWDataSyncSingleEntryCR(sync_env, *iter, *iter, marker_tracker, error_repo, false, tn), false);
+ }
+ sync_marker.marker = *iter;
+
+ while ((int)num_spawned() > spawn_window) {
+ set_status() << "num_spawned() > spawn_window";
+ yield wait_for_child();
+ int ret;
+ while (collect(&ret, lease_stack.get())) {
+ if (ret < 0) {
+ tn->log(10, "a sync operation returned error");
+ }
+ }
+ }
+ }
+ } while (omapkeys->more);
+ omapkeys.reset();
+
+ drain_all_but_stack(lease_stack.get());
+
+ tn->unset_flag(RGW_SNS_FLAG_ACTIVE);
+
+ yield {
+ /* update marker to reflect we're done with full sync */
+ sync_marker.state = rgw_data_sync_marker::IncrementalSync;
+ sync_marker.marker = sync_marker.next_step_marker;
+ sync_marker.next_step_marker.clear();
+ RGWRados *store = sync_env->store;
+ call(new RGWSimpleRadosWriteCR<rgw_data_sync_marker>(sync_env->async_rados, store->svc.sysobj,
+ rgw_raw_obj(store->svc.zone->get_zone_params().log_pool, status_oid),
+ sync_marker));
+ }
+ if (retcode < 0) {
+ tn->log(0, SSTR("ERROR: failed to set sync marker: retcode=" << retcode));
+ lease_cr->go_down();
+ drain_all();
+ return set_cr_error(retcode);
+ }
+ // keep lease and transition to incremental_sync()
+ }
+ return 0;
+ }
+
+ int incremental_sync() {
+ reenter(&incremental_cr) {
+ tn->log(10, "start incremental sync");
+ if (lease_cr) {
+ tn->log(10, "lease already held from full sync");
+ } else {
+ yield init_lease_cr();
+ while (!lease_cr->is_locked()) {
+ if (lease_cr->is_done()) {
+ tn->log(5, "failed to take lease");
+ set_status("lease lock failed, early abort");
+ drain_all();
+ return set_cr_error(lease_cr->get_ret_status());
+ }
+ set_sleeping(true);
+ yield;
+ }
+ set_status("lease acquired");
+ tn->log(10, "took lease");
+ }
+ error_repo = new RGWOmapAppend(sync_env->async_rados, sync_env->store,
+ rgw_raw_obj(pool, error_oid),
+ 1 /* no buffer */);
+ error_repo->get();
+ spawn(error_repo, false);
+ set_marker_tracker(new RGWDataSyncShardMarkerTrack(sync_env, status_oid, sync_marker, tn));
+ do {
+ if (!lease_cr->is_locked()) {
+ stop_spawned_services();
+ drain_all();
+ return set_cr_error(-ECANCELED);
+ }
+ current_modified.clear();
+ inc_lock.Lock();
+ current_modified.swap(modified_shards);
+ inc_lock.Unlock();
+
+ if (current_modified.size() > 0) {
+ tn->set_flag(RGW_SNS_FLAG_ACTIVE); /* actually have entries to sync */
+ }
+ /* process out of band updates */
+ for (modified_iter = current_modified.begin(); modified_iter != current_modified.end(); ++modified_iter) {
+ yield {
+ tn->log(20, SSTR("received async update notification: " << *modified_iter));
+ spawn(new RGWDataSyncSingleEntryCR(sync_env, *modified_iter, string(), marker_tracker, error_repo, false, tn), false);
+ }
+ }
+
+ if (error_retry_time <= ceph::coarse_real_clock::now()) {
+ /* process bucket shards that previously failed */
+ omapkeys = std::make_shared<RGWRadosGetOmapKeysCR::Result>();
+ yield call(new RGWRadosGetOmapKeysCR(sync_env->store, rgw_raw_obj(pool, error_oid),
+ error_marker, max_error_entries, omapkeys));
+ error_entries = std::move(omapkeys->entries);
+ tn->log(20, SSTR("read error repo, got " << error_entries.size() << " entries"));
+ iter = error_entries.begin();
+ for (; iter != error_entries.end(); ++iter) {
+ error_marker = *iter;
+ tn->log(20, SSTR("handle error entry: " << error_marker));
+ spawn(new RGWDataSyncSingleEntryCR(sync_env, error_marker, error_marker, nullptr /* no marker tracker */, error_repo, true, tn), false);
+ }
+ if (!omapkeys->more) {
+ if (error_marker.empty() && error_entries.empty()) {
+ /* the retry repo is empty, we back off a bit before calling it again */
+ retry_backoff_secs *= 2;
+ if (retry_backoff_secs > RETRY_BACKOFF_SECS_MAX) {
+ retry_backoff_secs = RETRY_BACKOFF_SECS_MAX;
+ }
+ } else {
+ retry_backoff_secs = RETRY_BACKOFF_SECS_DEFAULT;
+ }
+ error_retry_time = ceph::coarse_real_clock::now() + make_timespan(retry_backoff_secs);
+ error_marker.clear();
+ }
+ }
+ omapkeys.reset();
+
+ tn->log(20, SSTR("shard_id=" << shard_id << " sync_marker=" << sync_marker.marker));
+ yield call(new RGWReadRemoteDataLogShardCR(sync_env, shard_id, sync_marker.marker,
+ &next_marker, &log_entries, &truncated));
+ if (retcode < 0 && retcode != -ENOENT) {
+ tn->log(0, SSTR("ERROR: failed to read remote data log info: ret=" << retcode));
+ stop_spawned_services();
+ drain_all();
+ return set_cr_error(retcode);
+ }
+
+ if (log_entries.size() > 0) {
+ tn->set_flag(RGW_SNS_FLAG_ACTIVE); /* actually have entries to sync */
+ }
+
+ for (log_iter = log_entries.begin(); log_iter != log_entries.end(); ++log_iter) {
+ tn->log(20, SSTR("shard_id=" << shard_id << " log_entry: " << log_iter->log_id << ":" << log_iter->log_timestamp << ":" << log_iter->entry.key));
+ if (!marker_tracker->index_key_to_marker(log_iter->entry.key, log_iter->log_id)) {
+ tn->log(20, SSTR("skipping sync of entry: " << log_iter->log_id << ":" << log_iter->entry.key << " sync already in progress for bucket shard"));
+ marker_tracker->try_update_high_marker(log_iter->log_id, 0, log_iter->log_timestamp);
+ continue;
+ }
+ if (!marker_tracker->start(log_iter->log_id, 0, log_iter->log_timestamp)) {
+ tn->log(0, SSTR("ERROR: cannot start syncing " << log_iter->log_id << ". Duplicate entry?"));
+ } else {
+ spawn(new RGWDataSyncSingleEntryCR(sync_env, log_iter->entry.key, log_iter->log_id, marker_tracker, error_repo, false, tn), false);
+ }
+ while ((int)num_spawned() > spawn_window) {
+ set_status() << "num_spawned() > spawn_window";
+ yield wait_for_child();
+ int ret;
+ while (collect(&ret, lease_stack.get())) {
+ if (ret < 0) {
+ tn->log(10, "a sync operation returned error");
+ /* we have reported this error */
+ }
+ /* not waiting for child here */
+ }
+ }
+ }
+
+ tn->log(20, SSTR("shard_id=" << shard_id << " sync_marker=" << sync_marker.marker
+ << " next_marker=" << next_marker << " truncated=" << truncated));
+ if (!next_marker.empty()) {
+ sync_marker.marker = next_marker;
+ } else if (!log_entries.empty()) {
+ sync_marker.marker = log_entries.back().log_id;
+ }
+ if (!truncated) {
+ // we reached the end, wait a while before checking for more
+ tn->unset_flag(RGW_SNS_FLAG_ACTIVE);
+ yield wait(get_idle_interval());
+ }
+ } while (true);
+ }
+ return 0;
+ }
+
+ utime_t get_idle_interval() const {
+#define INCREMENTAL_INTERVAL 20
+ ceph::timespan interval = std::chrono::seconds(INCREMENTAL_INTERVAL);
+ if (!ceph::coarse_real_clock::is_zero(error_retry_time)) {
+ auto now = ceph::coarse_real_clock::now();
+ if (error_retry_time > now) {
+ auto d = error_retry_time - now;
+ if (interval > d) {
+ interval = d;
+ }
+ }
+ }
+ // convert timespan -> time_point -> utime_t
+ return utime_t(ceph::coarse_real_clock::zero() + interval);
+ }
+
+ void stop_spawned_services() {
+ lease_cr->go_down();
+ if (error_repo) {
+ error_repo->finish();
+ error_repo->put();
+ error_repo = NULL;
+ }
+ }
+};
+
+class RGWDataSyncShardControlCR : public RGWBackoffControlCR {
+ RGWDataSyncEnv *sync_env;
+
+ rgw_pool pool;
+
+ uint32_t shard_id;
+ rgw_data_sync_marker sync_marker;
+
+ RGWSyncTraceNodeRef tn;
+public:
+ RGWDataSyncShardControlCR(RGWDataSyncEnv *_sync_env, const rgw_pool& _pool,
+ uint32_t _shard_id, rgw_data_sync_marker& _marker,
+ RGWSyncTraceNodeRef& _tn_parent) : RGWBackoffControlCR(_sync_env->cct, false),
+ sync_env(_sync_env),
+ pool(_pool),
+ shard_id(_shard_id),
+ sync_marker(_marker) {
+ tn = sync_env->sync_tracer->add_node(_tn_parent, "shard", std::to_string(shard_id));
+ }
+
+ RGWCoroutine *alloc_cr() override {
+ return new RGWDataSyncShardCR(sync_env, pool, shard_id, sync_marker, tn, backoff_ptr());
+ }
+
+ RGWCoroutine *alloc_finisher_cr() override {
+ RGWRados *store = sync_env->store;
+ return new RGWSimpleRadosReadCR<rgw_data_sync_marker>(sync_env->async_rados, store->svc.sysobj,
+ rgw_raw_obj(store->svc.zone->get_zone_params().log_pool, RGWDataSyncStatusManager::shard_obj_name(sync_env->source_zone, shard_id)),
+ &sync_marker);
+ }
+
+ void append_modified_shards(set<string>& keys) {
+ Mutex::Locker l(cr_lock());
+
+ RGWDataSyncShardCR *cr = static_cast<RGWDataSyncShardCR *>(get_cr());
+ if (!cr) {
+ return;
+ }
+
+ cr->append_modified_shards(keys);
+ }
+};
+
+class RGWDataSyncCR : public RGWCoroutine {
+ RGWDataSyncEnv *sync_env;
+ uint32_t num_shards;
+
+ rgw_data_sync_status sync_status;
+
+ RGWDataSyncShardMarkerTrack *marker_tracker;
+
+ Mutex shard_crs_lock;
+ map<int, RGWDataSyncShardControlCR *> shard_crs;
+
+ bool *reset_backoff;
+
+ RGWSyncTraceNodeRef tn;
+
+ RGWDataSyncModule *data_sync_module{nullptr};
+public:
+ RGWDataSyncCR(RGWDataSyncEnv *_sync_env, uint32_t _num_shards, RGWSyncTraceNodeRef& _tn, bool *_reset_backoff) : RGWCoroutine(_sync_env->cct),
+ sync_env(_sync_env),
+ num_shards(_num_shards),
+ marker_tracker(NULL),
+ shard_crs_lock("RGWDataSyncCR::shard_crs_lock"),
+ reset_backoff(_reset_backoff), tn(_tn) {
+
+ }
+
+ ~RGWDataSyncCR() override {
+ for (auto iter : shard_crs) {
+ iter.second->put();
+ }
+ }
+
+ int operate() override {
+ reenter(this) {
+
+ /* read sync status */
+ yield call(new RGWReadDataSyncStatusCoroutine(sync_env, &sync_status));
+
+ data_sync_module = sync_env->sync_module->get_data_handler();
+
+ if (retcode < 0 && retcode != -ENOENT) {
+ tn->log(0, SSTR("ERROR: failed to fetch sync status, retcode=" << retcode));
+ return set_cr_error(retcode);
+ }
+
+ /* state: init status */
+ if ((rgw_data_sync_info::SyncState)sync_status.sync_info.state == rgw_data_sync_info::StateInit) {
+ tn->log(20, SSTR("init"));
+ sync_status.sync_info.num_shards = num_shards;
+ uint64_t instance_id;
+ instance_id = ceph::util::generate_random_number<uint64_t>();
+ yield call(new RGWInitDataSyncStatusCoroutine(sync_env, num_shards, instance_id, tn, &sync_status));
+ if (retcode < 0) {
+ tn->log(0, SSTR("ERROR: failed to init sync, retcode=" << retcode));
+ return set_cr_error(retcode);
+ }
+ // sets state = StateBuildingFullSyncMaps
+
+ *reset_backoff = true;
+ }
+
+ data_sync_module->init(sync_env, sync_status.sync_info.instance_id);
+
+ if ((rgw_data_sync_info::SyncState)sync_status.sync_info.state == rgw_data_sync_info::StateBuildingFullSyncMaps) {
+ tn->log(10, SSTR("building full sync maps"));
+ /* call sync module init here */
+ sync_status.sync_info.num_shards = num_shards;
+ yield call(data_sync_module->init_sync(sync_env));
+ if (retcode < 0) {
+ tn->log(0, SSTR("ERROR: sync module init_sync() failed, retcode=" << retcode));
+ return set_cr_error(retcode);
+ }
+ /* state: building full sync maps */
+ yield call(new RGWListBucketIndexesCR(sync_env, &sync_status));
+ if (retcode < 0) {
+ tn->log(0, SSTR("ERROR: failed to build full sync maps, retcode=" << retcode));
+ return set_cr_error(retcode);
+ }
+ sync_status.sync_info.state = rgw_data_sync_info::StateSync;
+
+ /* update new state */
+ yield call(set_sync_info_cr());
+ if (retcode < 0) {
+ tn->log(0, SSTR("ERROR: failed to write sync status, retcode=" << retcode));
+ return set_cr_error(retcode);
+ }
+
+ *reset_backoff = true;
+ }
+
+ yield call(data_sync_module->start_sync(sync_env));
+
+ yield {
+ if ((rgw_data_sync_info::SyncState)sync_status.sync_info.state == rgw_data_sync_info::StateSync) {
+ tn->log(10, SSTR("spawning " << num_shards << " shards sync"));
+ for (map<uint32_t, rgw_data_sync_marker>::iterator iter = sync_status.sync_markers.begin();
+ iter != sync_status.sync_markers.end(); ++iter) {
+ RGWDataSyncShardControlCR *cr = new RGWDataSyncShardControlCR(sync_env, sync_env->store->svc.zone->get_zone_params().log_pool,
+ iter->first, iter->second, tn);
+ cr->get();
+ shard_crs_lock.Lock();
+ shard_crs[iter->first] = cr;
+ shard_crs_lock.Unlock();
+ spawn(cr, true);
+ }
+ }
+ }
+
+ return set_cr_done();
+ }
+ return 0;
+ }
+
+ RGWCoroutine *set_sync_info_cr() {
+ RGWRados *store = sync_env->store;
+ return new RGWSimpleRadosWriteCR<rgw_data_sync_info>(sync_env->async_rados, store->svc.sysobj,
+ rgw_raw_obj(store->svc.zone->get_zone_params().log_pool, RGWDataSyncStatusManager::sync_status_oid(sync_env->source_zone)),
+ sync_status.sync_info);
+ }
+
+ void wakeup(int shard_id, set<string>& keys) {
+ Mutex::Locker l(shard_crs_lock);
+ map<int, RGWDataSyncShardControlCR *>::iterator iter = shard_crs.find(shard_id);
+ if (iter == shard_crs.end()) {
+ return;
+ }
+ iter->second->append_modified_shards(keys);
+ iter->second->wakeup();
+ }
+};
+
+class RGWDefaultDataSyncModule : public RGWDataSyncModule {
+public:
+ RGWDefaultDataSyncModule() {}
+
+ RGWCoroutine *sync_object(RGWDataSyncEnv *sync_env, RGWBucketInfo& bucket_info, rgw_obj_key& key, std::optional<uint64_t> versioned_epoch, rgw_zone_set *zones_trace) override;
+ RGWCoroutine *remove_object(RGWDataSyncEnv *sync_env, RGWBucketInfo& bucket_info, rgw_obj_key& key, real_time& mtime, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) override;
+ RGWCoroutine *create_delete_marker(RGWDataSyncEnv *sync_env, RGWBucketInfo& bucket_info, rgw_obj_key& key, real_time& mtime,
+ rgw_bucket_entry_owner& owner, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) override;
+};
+
+class RGWDefaultSyncModuleInstance : public RGWSyncModuleInstance {
+ RGWDefaultDataSyncModule data_handler;
+public:
+ RGWDefaultSyncModuleInstance() {}
+ RGWDataSyncModule *get_data_handler() override {
+ return &data_handler;
+ }
+ bool supports_user_writes() override {
+ return true;
+ }
+};
+
+int RGWDefaultSyncModule::create_instance(CephContext *cct, const JSONFormattable& config, RGWSyncModuleInstanceRef *instance)
+{
+ instance->reset(new RGWDefaultSyncModuleInstance());
+ return 0;
+}
+
+RGWCoroutine *RGWDefaultDataSyncModule::sync_object(RGWDataSyncEnv *sync_env, RGWBucketInfo& bucket_info, rgw_obj_key& key, std::optional<uint64_t> versioned_epoch, rgw_zone_set *zones_trace)
+{
+ return new RGWFetchRemoteObjCR(sync_env->async_rados, sync_env->store, sync_env->source_zone, bucket_info,
+ std::nullopt,
+ key, std::nullopt, versioned_epoch,
+ true, zones_trace, sync_env->counters);
+}
+
+RGWCoroutine *RGWDefaultDataSyncModule::remove_object(RGWDataSyncEnv *sync_env, RGWBucketInfo& bucket_info, rgw_obj_key& key,
+ real_time& mtime, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace)
+{
+ return new RGWRemoveObjCR(sync_env->async_rados, sync_env->store, sync_env->source_zone,
+ bucket_info, key, versioned, versioned_epoch,
+ NULL, NULL, false, &mtime, zones_trace);
+}
+
+RGWCoroutine *RGWDefaultDataSyncModule::create_delete_marker(RGWDataSyncEnv *sync_env, RGWBucketInfo& bucket_info, rgw_obj_key& key, real_time& mtime,
+ rgw_bucket_entry_owner& owner, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace)
+{
+ return new RGWRemoveObjCR(sync_env->async_rados, sync_env->store, sync_env->source_zone,
+ bucket_info, key, versioned, versioned_epoch,
+ &owner.id, &owner.display_name, true, &mtime, zones_trace);
+}
+
+class RGWArchiveDataSyncModule : public RGWDefaultDataSyncModule {
+public:
+ RGWArchiveDataSyncModule() {}
+
+ RGWCoroutine *sync_object(RGWDataSyncEnv *sync_env, RGWBucketInfo& bucket_info, rgw_obj_key& key, std::optional<uint64_t> versioned_epoch, rgw_zone_set *zones_trace) override;
+ RGWCoroutine *remove_object(RGWDataSyncEnv *sync_env, RGWBucketInfo& bucket_info, rgw_obj_key& key, real_time& mtime, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) override;
+ RGWCoroutine *create_delete_marker(RGWDataSyncEnv *sync_env, RGWBucketInfo& bucket_info, rgw_obj_key& key, real_time& mtime,
+ rgw_bucket_entry_owner& owner, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) override;
+};
+
+class RGWArchiveSyncModuleInstance : public RGWDefaultSyncModuleInstance {
+ RGWArchiveDataSyncModule data_handler;
+public:
+ RGWArchiveSyncModuleInstance() {}
+ RGWDataSyncModule *get_data_handler() override {
+ return &data_handler;
+ }
+ RGWMetadataHandler *alloc_bucket_meta_handler() override {
+ return RGWArchiveBucketMetaHandlerAllocator::alloc();
+ }
+ RGWMetadataHandler *alloc_bucket_instance_meta_handler() override {
+ return RGWArchiveBucketInstanceMetaHandlerAllocator::alloc();
+ }
+};
+
+int RGWArchiveSyncModule::create_instance(CephContext *cct, const JSONFormattable& config, RGWSyncModuleInstanceRef *instance)
+{
+ instance->reset(new RGWArchiveSyncModuleInstance());
+ return 0;
+}
+
+RGWCoroutine *RGWArchiveDataSyncModule::sync_object(RGWDataSyncEnv *sync_env, RGWBucketInfo& bucket_info, rgw_obj_key& key, std::optional<uint64_t> versioned_epoch, rgw_zone_set *zones_trace)
+{
+ ldout(sync_env->cct, 5) << "SYNC_ARCHIVE: sync_object: b=" << bucket_info.bucket << " k=" << key << " versioned_epoch=" << versioned_epoch.value_or(0) << dendl;
+ if (!bucket_info.versioned() ||
+ (bucket_info.flags & BUCKET_VERSIONS_SUSPENDED)) {
+ ldout(sync_env->cct, 0) << "SYNC_ARCHIVE: sync_object: enabling object versioning for archive bucket" << dendl;
+ bucket_info.flags = (bucket_info.flags & ~BUCKET_VERSIONS_SUSPENDED) | BUCKET_VERSIONED;
+ int op_ret = sync_env->store->put_bucket_instance_info(bucket_info, false, real_time(), NULL);
+ if (op_ret < 0) {
+ ldout(sync_env->cct, 0) << "SYNC_ARCHIVE: sync_object: error versioning archive bucket" << dendl;
+ return NULL;
+ }
+ }
+
+ std::optional<rgw_obj_key> dest_key;
+
+ if (versioned_epoch.value_or(0) == 0) { /* force version if not set */
+ versioned_epoch = 0;
+ dest_key = key;
+ if (key.instance.empty()) {
+ sync_env->store->gen_rand_obj_instance_name(&(*dest_key));
+ }
+ }
+
+ return new RGWFetchRemoteObjCR(sync_env->async_rados, sync_env->store, sync_env->source_zone,
+ bucket_info, std::nullopt,
+ key, dest_key, versioned_epoch,
+ true, zones_trace, nullptr);
+}
+
+RGWCoroutine *RGWArchiveDataSyncModule::remove_object(RGWDataSyncEnv *sync_env, RGWBucketInfo& bucket_info, rgw_obj_key& key,
+ real_time& mtime, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace)
+{
+ ldout(sync_env->cct, 0) << "SYNC_ARCHIVE: remove_object: b=" << bucket_info.bucket << " k=" << key << " versioned_epoch=" << versioned_epoch << dendl;
+ return NULL;
+}
+
+RGWCoroutine *RGWArchiveDataSyncModule::create_delete_marker(RGWDataSyncEnv *sync_env, RGWBucketInfo& bucket_info, rgw_obj_key& key, real_time& mtime,
+ rgw_bucket_entry_owner& owner, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace)
+{
+ ldout(sync_env->cct, 0) << "SYNC_ARCHIVE: create_delete_marker: b=" << bucket_info.bucket << " k=" << key << " mtime=" << mtime
+ << " versioned=" << versioned << " versioned_epoch=" << versioned_epoch << dendl;
+ return new RGWRemoveObjCR(sync_env->async_rados, sync_env->store, sync_env->source_zone,
+ bucket_info, key, versioned, versioned_epoch,
+ &owner.id, &owner.display_name, true, &mtime, zones_trace);
+}
+
+class RGWDataSyncControlCR : public RGWBackoffControlCR
+{
+ RGWDataSyncEnv *sync_env;
+ uint32_t num_shards;
+
+ RGWSyncTraceNodeRef tn;
+
+ static constexpr bool exit_on_error = false; // retry on all errors
+public:
+ RGWDataSyncControlCR(RGWDataSyncEnv *_sync_env, uint32_t _num_shards,
+ RGWSyncTraceNodeRef& _tn_parent) : RGWBackoffControlCR(_sync_env->cct, exit_on_error),
+ sync_env(_sync_env), num_shards(_num_shards) {
+ tn = sync_env->sync_tracer->add_node(_tn_parent, "sync");
+ }
+
+ RGWCoroutine *alloc_cr() override {
+ return new RGWDataSyncCR(sync_env, num_shards, tn, backoff_ptr());
+ }
+
+ void wakeup(int shard_id, set<string>& keys) {
+ Mutex& m = cr_lock();
+
+ m.Lock();
+ RGWDataSyncCR *cr = static_cast<RGWDataSyncCR *>(get_cr());
+ if (!cr) {
+ m.Unlock();
+ return;
+ }
+
+ cr->get();
+ m.Unlock();
+
+ if (cr) {
+ tn->log(20, SSTR("notify shard=" << shard_id << " keys=" << keys));
+ cr->wakeup(shard_id, keys);
+ }
+
+ cr->put();
+ }
+};
+
+void RGWRemoteDataLog::wakeup(int shard_id, set<string>& keys) {
+ RWLock::RLocker rl(lock);
+ if (!data_sync_cr) {
+ return;
+ }
+ data_sync_cr->wakeup(shard_id, keys);
+}
+
+int RGWRemoteDataLog::run_sync(int num_shards)
+{
+ lock.get_write();
+ data_sync_cr = new RGWDataSyncControlCR(&sync_env, num_shards, tn);
+ data_sync_cr->get(); // run() will drop a ref, so take another
+ lock.unlock();
+
+ int r = run(data_sync_cr);
+
+ lock.get_write();
+ data_sync_cr->put();
+ data_sync_cr = NULL;
+ lock.unlock();
+
+ if (r < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: failed to run sync" << dendl;
+ return r;
+ }
+ return 0;
+}
+
+int RGWDataSyncStatusManager::init()
+{
+ RGWZone *zone_def;
+
+ if (!store->svc.zone->find_zone_by_id(source_zone, &zone_def)) {
+ ldpp_dout(this, 0) << "ERROR: failed to find zone config info for zone=" << source_zone << dendl;
+ return -EIO;
+ }
+
+ if (!store->svc.sync_modules->get_manager()->supports_data_export(zone_def->tier_type)) {
+ return -ENOTSUP;
+ }
+
+ const RGWZoneParams& zone_params = store->svc.zone->get_zone_params();
+
+ if (sync_module == nullptr) {
+ sync_module = store->get_sync_module();
+ }
+
+ conn = store->svc.zone->get_zone_conn_by_id(source_zone);
+ if (!conn) {
+ ldpp_dout(this, 0) << "connection object to zone " << source_zone << " does not exist" << dendl;
+ return -EINVAL;
+ }
+
+ error_logger = new RGWSyncErrorLogger(store, RGW_SYNC_ERROR_LOG_SHARD_PREFIX, ERROR_LOGGER_SHARDS);
+
+ int r = source_log.init(source_zone, conn, error_logger, store->get_sync_tracer(),
+ sync_module, counters);
+ if (r < 0) {
+ ldpp_dout(this, 0) << "ERROR: failed to init remote log, r=" << r << dendl;
+ finalize();
+ return r;
+ }
+
+ rgw_datalog_info datalog_info;
+ r = source_log.read_log_info(&datalog_info);
+ if (r < 0) {
+ ldpp_dout(this, 5) << "ERROR: master.read_log_info() returned r=" << r << dendl;
+ finalize();
+ return r;
+ }
+
+ num_shards = datalog_info.num_shards;
+
+ for (int i = 0; i < num_shards; i++) {
+ shard_objs[i] = rgw_raw_obj(zone_params.log_pool, shard_obj_name(source_zone, i));
+ }
+
+ return 0;
+}
+
+void RGWDataSyncStatusManager::finalize()
+{
+ delete error_logger;
+ error_logger = nullptr;
+}
+
+unsigned RGWDataSyncStatusManager::get_subsys() const
+{
+ return dout_subsys;
+}
+
+std::ostream& RGWDataSyncStatusManager::gen_prefix(std::ostream& out) const
+{
+ auto zone = std::string_view{source_zone};
+ return out << "data sync zone:" << zone.substr(0, 8) << ' ';
+}
+
+string RGWDataSyncStatusManager::sync_status_oid(const string& source_zone)
+{
+ char buf[datalog_sync_status_oid_prefix.size() + source_zone.size() + 16];
+ snprintf(buf, sizeof(buf), "%s.%s", datalog_sync_status_oid_prefix.c_str(), source_zone.c_str());
+
+ return string(buf);
+}
+
+string RGWDataSyncStatusManager::shard_obj_name(const string& source_zone, int shard_id)
+{
+ char buf[datalog_sync_status_shard_prefix.size() + source_zone.size() + 16];
+ snprintf(buf, sizeof(buf), "%s.%s.%d", datalog_sync_status_shard_prefix.c_str(), source_zone.c_str(), shard_id);
+
+ return string(buf);
+}
+
+int RGWRemoteBucketLog::init(const string& _source_zone, RGWRESTConn *_conn,
+ const rgw_bucket& bucket, int shard_id,
+ RGWSyncErrorLogger *_error_logger,
+ RGWSyncTraceManager *_sync_tracer,
+ RGWSyncModuleInstanceRef& _sync_module)
+{
+ conn = _conn;
+ source_zone = _source_zone;
+ bs.bucket = bucket;
+ bs.shard_id = shard_id;
+
+ sync_env.init(dpp, store->ctx(), store, conn, async_rados, http_manager,
+ _error_logger, _sync_tracer, source_zone, _sync_module, nullptr);
+
+ return 0;
+}
+
+class RGWReadRemoteBucketIndexLogInfoCR : public RGWCoroutine {
+ RGWDataSyncEnv *sync_env;
+ const string instance_key;
+
+ rgw_bucket_index_marker_info *info;
+
+public:
+ RGWReadRemoteBucketIndexLogInfoCR(RGWDataSyncEnv *_sync_env,
+ const rgw_bucket_shard& bs,
+ rgw_bucket_index_marker_info *_info)
+ : RGWCoroutine(_sync_env->cct), sync_env(_sync_env),
+ instance_key(bs.get_key()), info(_info) {}
+
+ int operate() override {
+ reenter(this) {
+ yield {
+ rgw_http_param_pair pairs[] = { { "type" , "bucket-index" },
+ { "bucket-instance", instance_key.c_str() },
+ { "info" , NULL },
+ { NULL, NULL } };
+
+ string p = "/admin/log/";
+ call(new RGWReadRESTResourceCR<rgw_bucket_index_marker_info>(sync_env->cct, sync_env->conn, sync_env->http_manager, p, pairs, info));
+ }
+ if (retcode < 0) {
+ return set_cr_error(retcode);
+ }
+ return set_cr_done();
+ }
+ return 0;
+ }
+};
+
+class RGWInitBucketShardSyncStatusCoroutine : public RGWCoroutine {
+ RGWDataSyncEnv *sync_env;
+
+ rgw_bucket_shard bs;
+ const string sync_status_oid;
+
+ rgw_bucket_shard_sync_info& status;
+
+ rgw_bucket_index_marker_info info;
+public:
+ RGWInitBucketShardSyncStatusCoroutine(RGWDataSyncEnv *_sync_env,
+ const rgw_bucket_shard& bs,
+ rgw_bucket_shard_sync_info& _status)
+ : RGWCoroutine(_sync_env->cct), sync_env(_sync_env), bs(bs),
+ sync_status_oid(RGWBucketSyncStatusManager::status_oid(sync_env->source_zone, bs)),
+ status(_status)
+ {}
+
+ int operate() override {
+ reenter(this) {
+ /* fetch current position in logs */
+ yield call(new RGWReadRemoteBucketIndexLogInfoCR(sync_env, bs, &info));
+ if (retcode < 0 && retcode != -ENOENT) {
+ ldout(cct, 0) << "ERROR: failed to fetch bucket index status" << dendl;
+ return set_cr_error(retcode);
+ }
+ yield {
+ auto store = sync_env->store;
+ rgw_raw_obj obj(store->svc.zone->get_zone_params().log_pool, sync_status_oid);
+
+ if (info.syncstopped) {
+ call(new RGWRadosRemoveCR(store, obj));
+ } else {
+ status.state = rgw_bucket_shard_sync_info::StateFullSync;
+ status.inc_marker.position = info.max_marker;
+ map<string, bufferlist> attrs;
+ status.encode_all_attrs(attrs);
+ call(new RGWSimpleRadosWriteAttrsCR(sync_env->async_rados, store->svc.sysobj, obj, attrs));
+ }
+ }
+ if (info.syncstopped) {
+ retcode = -ENOENT;
+ }
+ if (retcode < 0) {
+ return set_cr_error(retcode);
+ }
+ return set_cr_done();
+ }
+ return 0;
+ }
+};
+
+RGWCoroutine *RGWRemoteBucketLog::init_sync_status_cr()
+{
+ return new RGWInitBucketShardSyncStatusCoroutine(&sync_env, bs, init_status);
+}
+
+#define BUCKET_SYNC_ATTR_PREFIX RGW_ATTR_PREFIX "bucket-sync."
+
+template <class T>
+static bool decode_attr(CephContext *cct, map<string, bufferlist>& attrs, const string& attr_name, T *val)
+{
+ map<string, bufferlist>::iterator iter = attrs.find(attr_name);
+ if (iter == attrs.end()) {
+ *val = T();
+ return false;
+ }
+
+ auto biter = iter->second.cbegin();
+ try {
+ decode(*val, biter);
+ } catch (buffer::error& err) {
+ ldout(cct, 0) << "ERROR: failed to decode attribute: " << attr_name << dendl;
+ return false;
+ }
+ return true;
+}
+
+void rgw_bucket_shard_sync_info::decode_from_attrs(CephContext *cct, map<string, bufferlist>& attrs)
+{
+ if (!decode_attr(cct, attrs, BUCKET_SYNC_ATTR_PREFIX "state", &state)) {
+ decode_attr(cct, attrs, "state", &state);
+ }
+ if (!decode_attr(cct, attrs, BUCKET_SYNC_ATTR_PREFIX "full_marker", &full_marker)) {
+ decode_attr(cct, attrs, "full_marker", &full_marker);
+ }
+ if (!decode_attr(cct, attrs, BUCKET_SYNC_ATTR_PREFIX "inc_marker", &inc_marker)) {
+ decode_attr(cct, attrs, "inc_marker", &inc_marker);
+ }
+}
+
+void rgw_bucket_shard_sync_info::encode_all_attrs(map<string, bufferlist>& attrs)
+{
+ encode_state_attr(attrs);
+ full_marker.encode_attr(attrs);
+ inc_marker.encode_attr(attrs);
+}
+
+void rgw_bucket_shard_sync_info::encode_state_attr(map<string, bufferlist>& attrs)
+{
+ using ceph::encode;
+ encode(state, attrs[BUCKET_SYNC_ATTR_PREFIX "state"]);
+}
+
+void rgw_bucket_shard_full_sync_marker::encode_attr(map<string, bufferlist>& attrs)
+{
+ using ceph::encode;
+ encode(*this, attrs[BUCKET_SYNC_ATTR_PREFIX "full_marker"]);
+}
+
+void rgw_bucket_shard_inc_sync_marker::encode_attr(map<string, bufferlist>& attrs)
+{
+ using ceph::encode;
+ encode(*this, attrs[BUCKET_SYNC_ATTR_PREFIX "inc_marker"]);
+}
+
+class RGWReadBucketSyncStatusCoroutine : public RGWCoroutine {
+ RGWDataSyncEnv *sync_env;
+ string oid;
+ rgw_bucket_shard_sync_info *status;
+
+ map<string, bufferlist> attrs;
+public:
+ RGWReadBucketSyncStatusCoroutine(RGWDataSyncEnv *_sync_env,
+ const rgw_bucket_shard& bs,
+ rgw_bucket_shard_sync_info *_status)
+ : RGWCoroutine(_sync_env->cct), sync_env(_sync_env),
+ oid(RGWBucketSyncStatusManager::status_oid(sync_env->source_zone, bs)),
+ status(_status) {}
+ int operate() override;
+};
+
+int RGWReadBucketSyncStatusCoroutine::operate()
+{
+ reenter(this) {
+ yield call(new RGWSimpleRadosReadAttrsCR(sync_env->async_rados, sync_env->store->svc.sysobj,
+ rgw_raw_obj(sync_env->store->svc.zone->get_zone_params().log_pool, oid),
+ &attrs, true));
+ if (retcode == -ENOENT) {
+ *status = rgw_bucket_shard_sync_info();
+ return set_cr_done();
+ }
+ if (retcode < 0) {
+ ldout(sync_env->cct, 0) << "ERROR: failed to call fetch bucket shard info oid=" << oid << " ret=" << retcode << dendl;
+ return set_cr_error(retcode);
+ }
+ status->decode_from_attrs(sync_env->cct, attrs);
+ return set_cr_done();
+ }
+ return 0;
+}
+
+#define OMAP_READ_MAX_ENTRIES 10
+class RGWReadRecoveringBucketShardsCoroutine : public RGWCoroutine {
+ RGWDataSyncEnv *sync_env;
+ RGWRados *store;
+
+ const int shard_id;
+ int max_entries;
+
+ set<string>& recovering_buckets;
+ string marker;
+ string error_oid;
+
+ RGWRadosGetOmapKeysCR::ResultPtr omapkeys;
+ set<string> error_entries;
+ int max_omap_entries;
+ int count;
+
+public:
+ RGWReadRecoveringBucketShardsCoroutine(RGWDataSyncEnv *_sync_env, const int _shard_id,
+ set<string>& _recovering_buckets, const int _max_entries)
+ : RGWCoroutine(_sync_env->cct), sync_env(_sync_env),
+ store(sync_env->store), shard_id(_shard_id), max_entries(_max_entries),
+ recovering_buckets(_recovering_buckets), max_omap_entries(OMAP_READ_MAX_ENTRIES)
+ {
+ error_oid = RGWDataSyncStatusManager::shard_obj_name(sync_env->source_zone, shard_id) + ".retry";
+ }
+
+ int operate() override;
+};
+
+int RGWReadRecoveringBucketShardsCoroutine::operate()
+{
+ reenter(this){
+ //read recovering bucket shards
+ count = 0;
+ do {
+ omapkeys = std::make_shared<RGWRadosGetOmapKeysCR::Result>();
+ yield call(new RGWRadosGetOmapKeysCR(store, rgw_raw_obj(store->svc.zone->get_zone_params().log_pool, error_oid),
+ marker, max_omap_entries, omapkeys));
+
+ if (retcode == -ENOENT) {
+ break;
+ }
+
+ if (retcode < 0) {
+ ldout(sync_env->cct, 0) << "failed to read recovering bucket shards with "
+ << cpp_strerror(retcode) << dendl;
+ return set_cr_error(retcode);
+ }
+
+ error_entries = std::move(omapkeys->entries);
+ if (error_entries.empty()) {
+ break;
+ }
+
+ count += error_entries.size();
+ marker = *error_entries.rbegin();
+ recovering_buckets.insert(std::make_move_iterator(error_entries.begin()),
+ std::make_move_iterator(error_entries.end()));
+ } while (omapkeys->more && count < max_entries);
+
+ return set_cr_done();
+ }
+
+ return 0;
+}
+
+class RGWReadPendingBucketShardsCoroutine : public RGWCoroutine {
+ RGWDataSyncEnv *sync_env;
+ RGWRados *store;
+
+ const int shard_id;
+ int max_entries;
+
+ set<string>& pending_buckets;
+ string marker;
+ string status_oid;
+
+ rgw_data_sync_marker* sync_marker;
+ int count;
+
+ std::string next_marker;
+ list<rgw_data_change_log_entry> log_entries;
+ bool truncated;
+
+public:
+ RGWReadPendingBucketShardsCoroutine(RGWDataSyncEnv *_sync_env, const int _shard_id,
+ set<string>& _pending_buckets,
+ rgw_data_sync_marker* _sync_marker, const int _max_entries)
+ : RGWCoroutine(_sync_env->cct), sync_env(_sync_env),
+ store(sync_env->store), shard_id(_shard_id), max_entries(_max_entries),
+ pending_buckets(_pending_buckets), sync_marker(_sync_marker)
+ {
+ status_oid = RGWDataSyncStatusManager::shard_obj_name(sync_env->source_zone, shard_id);
+ }
+
+ int operate() override;
+};
+
+int RGWReadPendingBucketShardsCoroutine::operate()
+{
+ reenter(this){
+ //read sync status marker
+ using CR = RGWSimpleRadosReadCR<rgw_data_sync_marker>;
+ yield call(new CR(sync_env->async_rados, store->svc.sysobj,
+ rgw_raw_obj(store->svc.zone->get_zone_params().log_pool, status_oid),
+ sync_marker));
+ if (retcode < 0) {
+ ldout(sync_env->cct,0) << "failed to read sync status marker with "
+ << cpp_strerror(retcode) << dendl;
+ return set_cr_error(retcode);
+ }
+
+ //read pending bucket shards
+ marker = sync_marker->marker;
+ count = 0;
+ do{
+ yield call(new RGWReadRemoteDataLogShardCR(sync_env, shard_id, marker,
+ &next_marker, &log_entries, &truncated));
+
+ if (retcode == -ENOENT) {
+ break;
+ }
+
+ if (retcode < 0) {
+ ldout(sync_env->cct,0) << "failed to read remote data log info with "
+ << cpp_strerror(retcode) << dendl;
+ return set_cr_error(retcode);
+ }
+
+ if (log_entries.empty()) {
+ break;
+ }
+
+ count += log_entries.size();
+ for (const auto& entry : log_entries) {
+ pending_buckets.insert(entry.entry.key);
+ }
+ }while(truncated && count < max_entries);
+
+ return set_cr_done();
+ }
+
+ return 0;
+}
+
+int RGWRemoteDataLog::read_shard_status(int shard_id, set<string>& pending_buckets, set<string>& recovering_buckets, rgw_data_sync_marker *sync_marker, const int max_entries)
+{
+ // cannot run concurrently with run_sync(), so run in a separate manager
+ RGWCoroutinesManager crs(store->ctx(), store->get_cr_registry());
+ RGWHTTPManager http_manager(store->ctx(), crs.get_completion_mgr());
+ int ret = http_manager.start();
+ if (ret < 0) {
+ ldpp_dout(dpp, 0) << "failed in http_manager.start() ret=" << ret << dendl;
+ return ret;
+ }
+ RGWDataSyncEnv sync_env_local = sync_env;
+ sync_env_local.http_manager = &http_manager;
+ list<RGWCoroutinesStack *> stacks;
+ RGWCoroutinesStack* recovering_stack = new RGWCoroutinesStack(store->ctx(), &crs);
+ recovering_stack->call(new RGWReadRecoveringBucketShardsCoroutine(&sync_env_local, shard_id, recovering_buckets, max_entries));
+ stacks.push_back(recovering_stack);
+ RGWCoroutinesStack* pending_stack = new RGWCoroutinesStack(store->ctx(), &crs);
+ pending_stack->call(new RGWReadPendingBucketShardsCoroutine(&sync_env_local, shard_id, pending_buckets, sync_marker, max_entries));
+ stacks.push_back(pending_stack);
+ ret = crs.run(stacks);
+ http_manager.stop();
+ return ret;
+}
+
+RGWCoroutine *RGWRemoteBucketLog::read_sync_status_cr(rgw_bucket_shard_sync_info *sync_status)
+{
+ return new RGWReadBucketSyncStatusCoroutine(&sync_env, bs, sync_status);
+}
+
+RGWBucketSyncStatusManager::~RGWBucketSyncStatusManager() {
+ for (map<int, RGWRemoteBucketLog *>::iterator iter = source_logs.begin(); iter != source_logs.end(); ++iter) {
+ delete iter->second;
+ }
+ delete error_logger;
+}
+
+
+void rgw_bucket_entry_owner::decode_json(JSONObj *obj)
+{
+ JSONDecoder::decode_json("ID", id, obj);
+ JSONDecoder::decode_json("DisplayName", display_name, obj);
+}
+
+struct bucket_list_entry {
+ bool delete_marker;
+ rgw_obj_key key;
+ bool is_latest;
+ real_time mtime;
+ string etag;
+ uint64_t size;
+ string storage_class;
+ rgw_bucket_entry_owner owner;
+ uint64_t versioned_epoch;
+ string rgw_tag;
+
+ bucket_list_entry() : delete_marker(false), is_latest(false), size(0), versioned_epoch(0) {}
+
+ void decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("IsDeleteMarker", delete_marker, obj);
+ JSONDecoder::decode_json("Key", key.name, obj);
+ JSONDecoder::decode_json("VersionId", key.instance, obj);
+ JSONDecoder::decode_json("IsLatest", is_latest, obj);
+ string mtime_str;
+ JSONDecoder::decode_json("RgwxMtime", mtime_str, obj);
+
+ struct tm t;
+ uint32_t nsec;
+ if (parse_iso8601(mtime_str.c_str(), &t, &nsec)) {
+ ceph_timespec ts;
+ ts.tv_sec = (uint64_t)internal_timegm(&t);
+ ts.tv_nsec = nsec;
+ mtime = real_clock::from_ceph_timespec(ts);
+ }
+ JSONDecoder::decode_json("ETag", etag, obj);
+ JSONDecoder::decode_json("Size", size, obj);
+ JSONDecoder::decode_json("StorageClass", storage_class, obj);
+ JSONDecoder::decode_json("Owner", owner, obj);
+ JSONDecoder::decode_json("VersionedEpoch", versioned_epoch, obj);
+ JSONDecoder::decode_json("RgwxTag", rgw_tag, obj);
+ if (key.instance == "null" && !versioned_epoch) {
+ key.instance.clear();
+ }
+ }
+
+ RGWModifyOp get_modify_op() const {
+ if (delete_marker) {
+ return CLS_RGW_OP_LINK_OLH_DM;
+ } else if (!key.instance.empty() && key.instance != "null") {
+ return CLS_RGW_OP_LINK_OLH;
+ } else {
+ return CLS_RGW_OP_ADD;
+ }
+ }
+};
+
+struct bucket_list_result {
+ string name;
+ string prefix;
+ string key_marker;
+ string version_id_marker;
+ int max_keys;
+ bool is_truncated;
+ list<bucket_list_entry> entries;
+
+ bucket_list_result() : max_keys(0), is_truncated(false) {}
+
+ void decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("Name", name, obj);
+ JSONDecoder::decode_json("Prefix", prefix, obj);
+ JSONDecoder::decode_json("KeyMarker", key_marker, obj);
+ JSONDecoder::decode_json("VersionIdMarker", version_id_marker, obj);
+ JSONDecoder::decode_json("MaxKeys", max_keys, obj);
+ JSONDecoder::decode_json("IsTruncated", is_truncated, obj);
+ JSONDecoder::decode_json("Entries", entries, obj);
+ }
+};
+
+class RGWListBucketShardCR: public RGWCoroutine {
+ RGWDataSyncEnv *sync_env;
+ const rgw_bucket_shard& bs;
+ const string instance_key;
+ rgw_obj_key marker_position;
+
+ bucket_list_result *result;
+
+public:
+ RGWListBucketShardCR(RGWDataSyncEnv *_sync_env, const rgw_bucket_shard& bs,
+ rgw_obj_key& _marker_position, bucket_list_result *_result)
+ : RGWCoroutine(_sync_env->cct), sync_env(_sync_env), bs(bs),
+ instance_key(bs.get_key()), marker_position(_marker_position),
+ result(_result) {}
+
+ int operate() override {
+ reenter(this) {
+ yield {
+ rgw_http_param_pair pairs[] = { { "rgwx-bucket-instance", instance_key.c_str() },
+ { "versions" , NULL },
+ { "format" , "json" },
+ { "objs-container" , "true" },
+ { "key-marker" , marker_position.name.c_str() },
+ { "version-id-marker" , marker_position.instance.c_str() },
+ { NULL, NULL } };
+ // don't include tenant in the url, it's already part of instance_key
+ string p = string("/") + bs.bucket.name;
+ call(new RGWReadRESTResourceCR<bucket_list_result>(sync_env->cct, sync_env->conn, sync_env->http_manager, p, pairs, result));
+ }
+ if (retcode < 0) {
+ return set_cr_error(retcode);
+ }
+ return set_cr_done();
+ }
+ return 0;
+ }
+};
+
+class RGWListBucketIndexLogCR: public RGWCoroutine {
+ RGWDataSyncEnv *sync_env;
+ const string instance_key;
+ string marker;
+
+ list<rgw_bi_log_entry> *result;
+ std::optional<PerfGuard> timer;
+
+public:
+ RGWListBucketIndexLogCR(RGWDataSyncEnv *_sync_env, const rgw_bucket_shard& bs,
+ string& _marker, list<rgw_bi_log_entry> *_result)
+ : RGWCoroutine(_sync_env->cct), sync_env(_sync_env),
+ instance_key(bs.get_key()), marker(_marker), result(_result) {}
+
+ int operate() override {
+ reenter(this) {
+ if (sync_env->counters) {
+ timer.emplace(sync_env->counters, sync_counters::l_poll);
+ }
+ yield {
+ rgw_http_param_pair pairs[] = { { "bucket-instance", instance_key.c_str() },
+ { "format" , "json" },
+ { "marker" , marker.c_str() },
+ { "type", "bucket-index" },
+ { NULL, NULL } };
+
+ call(new RGWReadRESTResourceCR<list<rgw_bi_log_entry> >(sync_env->cct, sync_env->conn, sync_env->http_manager, "/admin/log", pairs, result));
+ }
+ timer.reset();
+ if (retcode < 0) {
+ if (sync_env->counters) {
+ sync_env->counters->inc(sync_counters::l_poll_err);
+ }
+ return set_cr_error(retcode);
+ }
+ return set_cr_done();
+ }
+ return 0;
+ }
+};
+
+#define BUCKET_SYNC_UPDATE_MARKER_WINDOW 10
+
+class RGWBucketFullSyncShardMarkerTrack : public RGWSyncShardMarkerTrack<rgw_obj_key, rgw_obj_key> {
+ RGWDataSyncEnv *sync_env;
+
+ string marker_oid;
+ rgw_bucket_shard_full_sync_marker sync_marker;
+
+ RGWSyncTraceNodeRef tn;
+
+public:
+ RGWBucketFullSyncShardMarkerTrack(RGWDataSyncEnv *_sync_env,
+ const string& _marker_oid,
+ const rgw_bucket_shard_full_sync_marker& _marker) : RGWSyncShardMarkerTrack(BUCKET_SYNC_UPDATE_MARKER_WINDOW),
+ sync_env(_sync_env),
+ marker_oid(_marker_oid),
+ sync_marker(_marker) {}
+
+ void set_tn(RGWSyncTraceNodeRef& _tn) {
+ tn = _tn;
+ }
+
+ RGWCoroutine *store_marker(const rgw_obj_key& new_marker, uint64_t index_pos, const real_time& timestamp) override {
+ sync_marker.position = new_marker;
+ sync_marker.count = index_pos;
+
+ map<string, bufferlist> attrs;
+ sync_marker.encode_attr(attrs);
+
+ RGWRados *store = sync_env->store;
+
+ tn->log(20, SSTR("updating marker marker_oid=" << marker_oid << " marker=" << new_marker));
+ return new RGWSimpleRadosWriteAttrsCR(sync_env->async_rados, store->svc.sysobj,
+ rgw_raw_obj(store->svc.zone->get_zone_params().log_pool, marker_oid),
+ attrs);
+ }
+
+ RGWOrderCallCR *allocate_order_control_cr() override {
+ return new RGWLastCallerWinsCR(sync_env->cct);
+ }
+};
+
+class RGWBucketIncSyncShardMarkerTrack : public RGWSyncShardMarkerTrack<string, rgw_obj_key> {
+ RGWDataSyncEnv *sync_env;
+
+ string marker_oid;
+ rgw_bucket_shard_inc_sync_marker sync_marker;
+
+ map<rgw_obj_key, string> key_to_marker;
+
+ struct operation {
+ rgw_obj_key key;
+ bool is_olh;
+ };
+ map<string, operation> marker_to_op;
+ std::set<std::string> pending_olh; // object names with pending olh operations
+
+ RGWSyncTraceNodeRef tn;
+
+ void handle_finish(const string& marker) override {
+ auto iter = marker_to_op.find(marker);
+ if (iter == marker_to_op.end()) {
+ return;
+ }
+ auto& op = iter->second;
+ key_to_marker.erase(op.key);
+ reset_need_retry(op.key);
+ if (op.is_olh) {
+ pending_olh.erase(op.key.name);
+ }
+ marker_to_op.erase(iter);
+ }
+
+public:
+ RGWBucketIncSyncShardMarkerTrack(RGWDataSyncEnv *_sync_env,
+ const string& _marker_oid,
+ const rgw_bucket_shard_inc_sync_marker& _marker) : RGWSyncShardMarkerTrack(BUCKET_SYNC_UPDATE_MARKER_WINDOW),
+ sync_env(_sync_env),
+ marker_oid(_marker_oid),
+ sync_marker(_marker) {}
+
+ void set_tn(RGWSyncTraceNodeRef& _tn) {
+ tn = _tn;
+ }
+
+ RGWCoroutine *store_marker(const string& new_marker, uint64_t index_pos, const real_time& timestamp) override {
+ sync_marker.position = new_marker;
+
+ map<string, bufferlist> attrs;
+ sync_marker.encode_attr(attrs);
+
+ RGWRados *store = sync_env->store;
+
+ tn->log(20, SSTR("updating marker marker_oid=" << marker_oid << " marker=" << new_marker));
+ return new RGWSimpleRadosWriteAttrsCR(sync_env->async_rados,
+ store->svc.sysobj,
+ rgw_raw_obj(store->svc.zone->get_zone_params().log_pool, marker_oid),
+ attrs);
+ }
+
+ /*
+ * create index from key -> <op, marker>, and from marker -> key
+ * this is useful so that we can insure that we only have one
+ * entry for any key that is used. This is needed when doing
+ * incremenatl sync of data, and we don't want to run multiple
+ * concurrent sync operations for the same bucket shard
+ * Also, we should make sure that we don't run concurrent operations on the same key with
+ * different ops.
+ */
+ bool index_key_to_marker(const rgw_obj_key& key, const string& marker, bool is_olh) {
+ auto result = key_to_marker.emplace(key, marker);
+ if (!result.second) { // exists
+ set_need_retry(key);
+ return false;
+ }
+ marker_to_op[marker] = operation{key, is_olh};
+ if (is_olh) {
+ // prevent other olh ops from starting on this object name
+ pending_olh.insert(key.name);
+ }
+ return true;
+ }
+
+ bool can_do_op(const rgw_obj_key& key, bool is_olh) {
+ // serialize olh ops on the same object name
+ if (is_olh && pending_olh.count(key.name)) {
+ tn->log(20, SSTR("sync of " << key << " waiting for pending olh op"));
+ return false;
+ }
+ return (key_to_marker.find(key) == key_to_marker.end());
+ }
+
+ RGWOrderCallCR *allocate_order_control_cr() override {
+ return new RGWLastCallerWinsCR(sync_env->cct);
+ }
+};
+
+template <class T, class K>
+class RGWBucketSyncSingleEntryCR : public RGWCoroutine {
+ RGWDataSyncEnv *sync_env;
+
+ RGWBucketInfo *bucket_info;
+ const rgw_bucket_shard& bs;
+
+ rgw_obj_key key;
+ bool versioned;
+ std::optional<uint64_t> versioned_epoch;
+ rgw_bucket_entry_owner owner;
+ real_time timestamp;
+ RGWModifyOp op;
+ RGWPendingState op_state;
+
+ T entry_marker;
+ RGWSyncShardMarkerTrack<T, K> *marker_tracker;
+
+ int sync_status;
+
+ stringstream error_ss;
+
+ bool error_injection;
+
+ RGWDataSyncModule *data_sync_module;
+
+ rgw_zone_set zones_trace;
+
+ RGWSyncTraceNodeRef tn;
+public:
+ RGWBucketSyncSingleEntryCR(RGWDataSyncEnv *_sync_env,
+ RGWBucketInfo *_bucket_info,
+ const rgw_bucket_shard& bs,
+ const rgw_obj_key& _key, bool _versioned,
+ std::optional<uint64_t> _versioned_epoch,
+ real_time& _timestamp,
+ const rgw_bucket_entry_owner& _owner,
+ RGWModifyOp _op, RGWPendingState _op_state,
+ const T& _entry_marker, RGWSyncShardMarkerTrack<T, K> *_marker_tracker, rgw_zone_set& _zones_trace,
+ RGWSyncTraceNodeRef& _tn_parent) : RGWCoroutine(_sync_env->cct),
+ sync_env(_sync_env),
+ bucket_info(_bucket_info), bs(bs),
+ key(_key), versioned(_versioned), versioned_epoch(_versioned_epoch),
+ owner(_owner),
+ timestamp(_timestamp), op(_op),
+ op_state(_op_state),
+ entry_marker(_entry_marker),
+ marker_tracker(_marker_tracker),
+ sync_status(0){
+ stringstream ss;
+ ss << bucket_shard_str{bs} << "/" << key << "[" << versioned_epoch.value_or(0) << "]";
+ set_description() << "bucket sync single entry (source_zone=" << sync_env->source_zone << ") b=" << ss.str() << " log_entry=" << entry_marker << " op=" << (int)op << " op_state=" << (int)op_state;
+ set_status("init");
+
+ tn = sync_env->sync_tracer->add_node(_tn_parent, "entry", SSTR(key));
+
+ tn->log(20, SSTR("bucket sync single entry (source_zone=" << sync_env->source_zone << ") b=" << ss.str() << " log_entry=" << entry_marker << " op=" << (int)op << " op_state=" << (int)op_state));
+ error_injection = (sync_env->cct->_conf->rgw_sync_data_inject_err_probability > 0);
+
+ data_sync_module = sync_env->sync_module->get_data_handler();
+
+ zones_trace = _zones_trace;
+ zones_trace.insert(sync_env->store->svc.zone->get_zone().id);
+ }
+
+ int operate() override {
+ reenter(this) {
+ /* skip entries that are not complete */
+ if (op_state != CLS_RGW_STATE_COMPLETE) {
+ goto done;
+ }
+ tn->set_flag(RGW_SNS_FLAG_ACTIVE);
+ do {
+ yield {
+ marker_tracker->reset_need_retry(key);
+ if (key.name.empty()) {
+ /* shouldn't happen */
+ set_status("skipping empty entry");
+ tn->log(0, "entry with empty obj name, skipping");
+ goto done;
+ }
+ if (error_injection &&
+ rand() % 10000 < cct->_conf->rgw_sync_data_inject_err_probability * 10000.0) {
+ tn->log(0, SSTR(": injecting data sync error on key=" << key.name));
+ retcode = -EIO;
+ } else if (op == CLS_RGW_OP_ADD ||
+ op == CLS_RGW_OP_LINK_OLH) {
+ set_status("syncing obj");
+ tn->log(5, SSTR("bucket sync: sync obj: " << sync_env->source_zone << "/" << bucket_info->bucket << "/" << key << "[" << versioned_epoch.value_or(0) << "]"));
+ call(data_sync_module->sync_object(sync_env, *bucket_info, key, versioned_epoch, &zones_trace));
+ } else if (op == CLS_RGW_OP_DEL || op == CLS_RGW_OP_UNLINK_INSTANCE) {
+ set_status("removing obj");
+ if (op == CLS_RGW_OP_UNLINK_INSTANCE) {
+ versioned = true;
+ }
+ tn->log(10, SSTR("removing obj: " << sync_env->source_zone << "/" << bucket_info->bucket << "/" << key << "[" << versioned_epoch.value_or(0) << "]"));
+ call(data_sync_module->remove_object(sync_env, *bucket_info, key, timestamp, versioned, versioned_epoch.value_or(0), &zones_trace));
+ // our copy of the object is more recent, continue as if it succeeded
+ if (retcode == -ERR_PRECONDITION_FAILED) {
+ retcode = 0;
+ }
+ } else if (op == CLS_RGW_OP_LINK_OLH_DM) {
+ set_status("creating delete marker");
+ tn->log(10, SSTR("creating delete marker: obj: " << sync_env->source_zone << "/" << bucket_info->bucket << "/" << key << "[" << versioned_epoch.value_or(0) << "]"));
+ call(data_sync_module->create_delete_marker(sync_env, *bucket_info, key, timestamp, owner, versioned, versioned_epoch.value_or(0), &zones_trace));
+ }
+ tn->set_resource_name(SSTR(bucket_str_noinstance(bucket_info->bucket) << "/" << key));
+ }
+ } while (marker_tracker->need_retry(key));
+ {
+ tn->unset_flag(RGW_SNS_FLAG_ACTIVE);
+ if (retcode >= 0) {
+ tn->log(10, "success");
+ } else {
+ tn->log(10, SSTR("failed, retcode=" << retcode << " (" << cpp_strerror(-retcode) << ")"));
+ }
+ }
+
+ if (retcode < 0 && retcode != -ENOENT) {
+ set_status() << "failed to sync obj; retcode=" << retcode;
+ tn->log(0, SSTR("ERROR: failed to sync object: "
+ << bucket_shard_str{bs} << "/" << key.name));
+ error_ss << bucket_shard_str{bs} << "/" << key.name;
+ sync_status = retcode;
+ }
+ if (!error_ss.str().empty()) {
+ yield call(sync_env->error_logger->log_error_cr(sync_env->conn->get_remote_id(), "data", error_ss.str(), -retcode, string("failed to sync object") + cpp_strerror(-sync_status)));
+ }
+done:
+ if (sync_status == 0) {
+ /* update marker */
+ set_status() << "calling marker_tracker->finish(" << entry_marker << ")";
+ yield call(marker_tracker->finish(entry_marker));
+ sync_status = retcode;
+ }
+ if (sync_status < 0) {
+ return set_cr_error(sync_status);
+ }
+ return set_cr_done();
+ }
+ return 0;
+ }
+};
+
+#define BUCKET_SYNC_SPAWN_WINDOW 20
+
+class RGWBucketShardFullSyncCR : public RGWCoroutine {
+ RGWDataSyncEnv *sync_env;
+ const rgw_bucket_shard& bs;
+ RGWBucketInfo *bucket_info;
+ boost::intrusive_ptr<RGWContinuousLeaseCR> lease_cr;
+ bucket_list_result list_result;
+ list<bucket_list_entry>::iterator entries_iter;
+ rgw_bucket_shard_sync_info& sync_info;
+ RGWBucketFullSyncShardMarkerTrack marker_tracker;
+ rgw_obj_key list_marker;
+ bucket_list_entry *entry{nullptr};
+
+ int total_entries{0};
+
+ int sync_status{0};
+
+ const string& status_oid;
+
+ rgw_zone_set zones_trace;
+
+ RGWSyncTraceNodeRef tn;
+public:
+ RGWBucketShardFullSyncCR(RGWDataSyncEnv *_sync_env, const rgw_bucket_shard& bs,
+ RGWBucketInfo *_bucket_info,
+ const std::string& status_oid,
+ RGWContinuousLeaseCR *lease_cr,
+ rgw_bucket_shard_sync_info& sync_info,
+ RGWSyncTraceNodeRef tn_parent)
+ : RGWCoroutine(_sync_env->cct), sync_env(_sync_env), bs(bs),
+ bucket_info(_bucket_info), lease_cr(lease_cr), sync_info(sync_info),
+ marker_tracker(sync_env, status_oid, sync_info.full_marker),
+ status_oid(status_oid),
+ tn(sync_env->sync_tracer->add_node(tn_parent, "full_sync",
+ SSTR(bucket_shard_str{bs}))) {
+ zones_trace.insert(sync_env->source_zone);
+ marker_tracker.set_tn(tn);
+ }
+
+ int operate() override;
+};
+
+int RGWBucketShardFullSyncCR::operate()
+{
+ int ret;
+ reenter(this) {
+ list_marker = sync_info.full_marker.position;
+
+ total_entries = sync_info.full_marker.count;
+ do {
+ if (!lease_cr->is_locked()) {
+ drain_all();
+ return set_cr_error(-ECANCELED);
+ }
+ set_status("listing remote bucket");
+ tn->log(20, "listing bucket for full sync");
+ yield call(new RGWListBucketShardCR(sync_env, bs, list_marker,
+ &list_result));
+ if (retcode < 0 && retcode != -ENOENT) {
+ set_status("failed bucket listing, going down");
+ drain_all();
+ return set_cr_error(retcode);
+ }
+ if (list_result.entries.size() > 0) {
+ tn->set_flag(RGW_SNS_FLAG_ACTIVE); /* actually have entries to sync */
+ }
+ entries_iter = list_result.entries.begin();
+ for (; entries_iter != list_result.entries.end(); ++entries_iter) {
+ if (!lease_cr->is_locked()) {
+ drain_all();
+ return set_cr_error(-ECANCELED);
+ }
+ tn->log(20, SSTR("[full sync] syncing object: "
+ << bucket_shard_str{bs} << "/" << entries_iter->key));
+ entry = &(*entries_iter);
+ total_entries++;
+ list_marker = entries_iter->key;
+ if (!marker_tracker.start(entry->key, total_entries, real_time())) {
+ tn->log(0, SSTR("ERROR: cannot start syncing " << entry->key << ". Duplicate entry?"));
+ } else {
+ using SyncCR = RGWBucketSyncSingleEntryCR<rgw_obj_key, rgw_obj_key>;
+ yield spawn(new SyncCR(sync_env, bucket_info, bs, entry->key,
+ false, /* versioned, only matters for object removal */
+ entry->versioned_epoch, entry->mtime,
+ entry->owner, entry->get_modify_op(), CLS_RGW_STATE_COMPLETE,
+ entry->key, &marker_tracker, zones_trace, tn),
+ false);
+ }
+ while (num_spawned() > BUCKET_SYNC_SPAWN_WINDOW) {
+ yield wait_for_child();
+ bool again = true;
+ while (again) {
+ again = collect(&ret, nullptr);
+ if (ret < 0) {
+ tn->log(10, "a sync operation returned error");
+ sync_status = ret;
+ /* we have reported this error */
+ }
+ }
+ }
+ }
+ } while (list_result.is_truncated && sync_status == 0);
+ set_status("done iterating over all objects");
+ /* wait for all operations to complete */
+ while (num_spawned()) {
+ yield wait_for_child();
+ bool again = true;
+ while (again) {
+ again = collect(&ret, nullptr);
+ if (ret < 0) {
+ tn->log(10, "a sync operation returned error");
+ sync_status = ret;
+ /* we have reported this error */
+ }
+ }
+ }
+ tn->unset_flag(RGW_SNS_FLAG_ACTIVE);
+ if (!lease_cr->is_locked()) {
+ return set_cr_error(-ECANCELED);
+ }
+ /* update sync state to incremental */
+ if (sync_status == 0) {
+ yield {
+ sync_info.state = rgw_bucket_shard_sync_info::StateIncrementalSync;
+ map<string, bufferlist> attrs;
+ sync_info.encode_state_attr(attrs);
+ RGWRados *store = sync_env->store;
+ call(new RGWSimpleRadosWriteAttrsCR(sync_env->async_rados, store->svc.sysobj,
+ rgw_raw_obj(store->svc.zone->get_zone_params().log_pool, status_oid),
+ attrs));
+ }
+ } else {
+ tn->log(10, SSTR("backing out with sync_status=" << sync_status));
+ }
+ if (retcode < 0 && sync_status == 0) { /* actually tried to set incremental state and failed */
+ tn->log(0, SSTR("ERROR: failed to set sync state on bucket "
+ << bucket_shard_str{bs} << " retcode=" << retcode));
+ return set_cr_error(retcode);
+ }
+ if (sync_status < 0) {
+ return set_cr_error(sync_status);
+ }
+ return set_cr_done();
+ }
+ return 0;
+}
+
+static bool has_olh_epoch(RGWModifyOp op) {
+ return op == CLS_RGW_OP_LINK_OLH || op == CLS_RGW_OP_UNLINK_INSTANCE;
+}
+
+class RGWBucketShardIncrementalSyncCR : public RGWCoroutine {
+ RGWDataSyncEnv *sync_env;
+ const rgw_bucket_shard& bs;
+ RGWBucketInfo *bucket_info;
+ boost::intrusive_ptr<RGWContinuousLeaseCR> lease_cr;
+ list<rgw_bi_log_entry> list_result;
+ list<rgw_bi_log_entry>::iterator entries_iter, entries_end;
+ map<pair<string, string>, pair<real_time, RGWModifyOp> > squash_map;
+ rgw_bucket_shard_sync_info& sync_info;
+ rgw_obj_key key;
+ rgw_bi_log_entry *entry{nullptr};
+ RGWBucketIncSyncShardMarkerTrack marker_tracker;
+ bool updated_status{false};
+ const string& status_oid;
+ const string& zone_id;
+
+ string cur_id;
+
+ int sync_status{0};
+ bool syncstopped{false};
+
+ RGWSyncTraceNodeRef tn;
+public:
+ RGWBucketShardIncrementalSyncCR(RGWDataSyncEnv *_sync_env,
+ const rgw_bucket_shard& bs,
+ RGWBucketInfo *_bucket_info,
+ const std::string& status_oid,
+ RGWContinuousLeaseCR *lease_cr,
+ rgw_bucket_shard_sync_info& sync_info,
+ RGWSyncTraceNodeRef& _tn_parent)
+ : RGWCoroutine(_sync_env->cct), sync_env(_sync_env), bs(bs),
+ bucket_info(_bucket_info), lease_cr(lease_cr), sync_info(sync_info),
+ marker_tracker(sync_env, status_oid, sync_info.inc_marker),
+ status_oid(status_oid), zone_id(_sync_env->store->svc.zone->get_zone().id),
+ tn(sync_env->sync_tracer->add_node(_tn_parent, "inc_sync",
+ SSTR(bucket_shard_str{bs})))
+ {
+ set_description() << "bucket shard incremental sync bucket="
+ << bucket_shard_str{bs};
+ set_status("init");
+ marker_tracker.set_tn(tn);
+ }
+
+ int operate() override;
+};
+
+int RGWBucketShardIncrementalSyncCR::operate()
+{
+ int ret;
+ reenter(this) {
+ do {
+ if (!lease_cr->is_locked()) {
+ drain_all();
+ tn->log(0, "ERROR: lease is not taken, abort");
+ return set_cr_error(-ECANCELED);
+ }
+ tn->log(20, SSTR("listing bilog for incremental sync" << sync_info.inc_marker.position));
+ set_status() << "listing bilog; position=" << sync_info.inc_marker.position;
+ yield call(new RGWListBucketIndexLogCR(sync_env, bs, sync_info.inc_marker.position,
+ &list_result));
+ if (retcode < 0 && retcode != -ENOENT) {
+ /* wait for all operations to complete */
+ drain_all();
+ return set_cr_error(retcode);
+ }
+ squash_map.clear();
+ entries_iter = list_result.begin();
+ entries_end = list_result.end();
+ for (; entries_iter != entries_end; ++entries_iter) {
+ auto e = *entries_iter;
+ if (e.op == RGWModifyOp::CLS_RGW_OP_SYNCSTOP) {
+ ldout(sync_env->cct, 20) << "syncstop on " << e.timestamp << dendl;
+ syncstopped = true;
+ entries_end = entries_iter; // dont sync past here
+ break;
+ }
+ if (e.op == RGWModifyOp::CLS_RGW_OP_RESYNC) {
+ continue;
+ }
+ if (e.op == CLS_RGW_OP_CANCEL) {
+ continue;
+ }
+ if (e.state != CLS_RGW_STATE_COMPLETE) {
+ continue;
+ }
+ if (e.zones_trace.find(zone_id) != e.zones_trace.end()) {
+ continue;
+ }
+ auto& squash_entry = squash_map[make_pair(e.object, e.instance)];
+ // don't squash over olh entries - we need to apply their olh_epoch
+ if (has_olh_epoch(squash_entry.second) && !has_olh_epoch(e.op)) {
+ continue;
+ }
+ if (squash_entry.first <= e.timestamp) {
+ squash_entry = make_pair<>(e.timestamp, e.op);
+ }
+ }
+
+ entries_iter = list_result.begin();
+ for (; entries_iter != entries_end; ++entries_iter) {
+ if (!lease_cr->is_locked()) {
+ drain_all();
+ return set_cr_error(-ECANCELED);
+ }
+ entry = &(*entries_iter);
+ {
+ ssize_t p = entry->id.find('#'); /* entries might have explicit shard info in them, e.g., 6#00000000004.94.3 */
+ if (p < 0) {
+ cur_id = entry->id;
+ } else {
+ cur_id = entry->id.substr(p + 1);
+ }
+ }
+ sync_info.inc_marker.position = cur_id;
+
+ if (entry->op == RGWModifyOp::CLS_RGW_OP_SYNCSTOP || entry->op == RGWModifyOp::CLS_RGW_OP_RESYNC) {
+ ldout(sync_env->cct, 20) << "detected syncstop or resync on " << entries_iter->timestamp << ", skipping entry" << dendl;
+ marker_tracker.try_update_high_marker(cur_id, 0, entry->timestamp);
+ continue;
+ }
+
+ if (!key.set(rgw_obj_index_key{entry->object, entry->instance})) {
+ set_status() << "parse_raw_oid() on " << entry->object << " returned false, skipping entry";
+ tn->log(20, SSTR("parse_raw_oid() on " << entry->object << " returned false, skipping entry"));
+ marker_tracker.try_update_high_marker(cur_id, 0, entry->timestamp);
+ continue;
+ }
+
+ tn->log(20, SSTR("parsed entry: id=" << cur_id << " iter->object=" << entry->object << " iter->instance=" << entry->instance << " name=" << key.name << " instance=" << key.instance << " ns=" << key.ns));
+
+ if (!key.ns.empty()) {
+ set_status() << "skipping entry in namespace: " << entry->object;
+ tn->log(20, SSTR("skipping entry in namespace: " << entry->object));
+ marker_tracker.try_update_high_marker(cur_id, 0, entry->timestamp);
+ continue;
+ }
+
+ set_status() << "got entry.id=" << cur_id << " key=" << key << " op=" << (int)entry->op;
+ if (entry->op == CLS_RGW_OP_CANCEL) {
+ set_status() << "canceled operation, skipping";
+ tn->log(20, SSTR("skipping object: "
+ << bucket_shard_str{bs} << "/" << key << ": canceled operation"));
+ marker_tracker.try_update_high_marker(cur_id, 0, entry->timestamp);
+ continue;
+ }
+ if (entry->state != CLS_RGW_STATE_COMPLETE) {
+ set_status() << "non-complete operation, skipping";
+ tn->log(20, SSTR("skipping object: "
+ << bucket_shard_str{bs} << "/" << key << ": non-complete operation"));
+ marker_tracker.try_update_high_marker(cur_id, 0, entry->timestamp);
+ continue;
+ }
+ if (entry->zones_trace.find(zone_id) != entry->zones_trace.end()) {
+ set_status() << "redundant operation, skipping";
+ tn->log(20, SSTR("skipping object: "
+ <<bucket_shard_str{bs} <<"/"<<key<<": redundant operation"));
+ marker_tracker.try_update_high_marker(cur_id, 0, entry->timestamp);
+ continue;
+ }
+ if (make_pair<>(entry->timestamp, entry->op) != squash_map[make_pair(entry->object, entry->instance)]) {
+ set_status() << "squashed operation, skipping";
+ tn->log(20, SSTR("skipping object: "
+ << bucket_shard_str{bs} << "/" << key << ": squashed operation"));
+ marker_tracker.try_update_high_marker(cur_id, 0, entry->timestamp);
+ continue;
+ }
+ tn->set_flag(RGW_SNS_FLAG_ACTIVE);
+ tn->log(20, SSTR("syncing object: "
+ << bucket_shard_str{bs} << "/" << key));
+ updated_status = false;
+ while (!marker_tracker.can_do_op(key, has_olh_epoch(entry->op))) {
+ if (!updated_status) {
+ set_status() << "can't do op, conflicting inflight operation";
+ updated_status = true;
+ }
+ tn->log(5, SSTR("can't do op on key=" << key << " need to wait for conflicting operation to complete"));
+ yield wait_for_child();
+ bool again = true;
+ while (again) {
+ again = collect(&ret, nullptr);
+ if (ret < 0) {
+ tn->log(0, SSTR("ERROR: a child operation returned error (ret=" << ret << ")"));
+ sync_status = ret;
+ /* we have reported this error */
+ }
+ }
+ if (sync_status != 0)
+ break;
+ }
+ if (sync_status != 0) {
+ /* get error, stop */
+ break;
+ }
+ if (!marker_tracker.index_key_to_marker(key, cur_id, has_olh_epoch(entry->op))) {
+ set_status() << "can't do op, sync already in progress for object";
+ tn->log(20, SSTR("skipping sync of entry: " << cur_id << ":" << key << " sync already in progress for object"));
+ marker_tracker.try_update_high_marker(cur_id, 0, entry->timestamp);
+ continue;
+ }
+ // yield {
+ set_status() << "start object sync";
+ if (!marker_tracker.start(cur_id, 0, entry->timestamp)) {
+ tn->log(0, SSTR("ERROR: cannot start syncing " << cur_id << ". Duplicate entry?"));
+ } else {
+ std::optional<uint64_t> versioned_epoch;
+ rgw_bucket_entry_owner owner(entry->owner, entry->owner_display_name);
+ if (entry->ver.pool < 0) {
+ versioned_epoch = entry->ver.epoch;
+ }
+ tn->log(20, SSTR("entry->timestamp=" << entry->timestamp));
+ using SyncCR = RGWBucketSyncSingleEntryCR<string, rgw_obj_key>;
+ spawn(new SyncCR(sync_env, bucket_info, bs, key,
+ entry->is_versioned(), versioned_epoch,
+ entry->timestamp, owner, entry->op, entry->state,
+ cur_id, &marker_tracker, entry->zones_trace, tn),
+ false);
+ }
+ // }
+ while (num_spawned() > BUCKET_SYNC_SPAWN_WINDOW) {
+ set_status() << "num_spawned() > spawn_window";
+ yield wait_for_child();
+ bool again = true;
+ while (again) {
+ again = collect(&ret, nullptr);
+ if (ret < 0) {
+ tn->log(10, "a sync operation returned error");
+ sync_status = ret;
+ /* we have reported this error */
+ }
+ /* not waiting for child here */
+ }
+ }
+ }
+ } while (!list_result.empty() && sync_status == 0 && !syncstopped);
+
+ while (num_spawned()) {
+ yield wait_for_child();
+ bool again = true;
+ while (again) {
+ again = collect(&ret, nullptr);
+ if (ret < 0) {
+ tn->log(10, "a sync operation returned error");
+ sync_status = ret;
+ /* we have reported this error */
+ }
+ /* not waiting for child here */
+ }
+ }
+ tn->unset_flag(RGW_SNS_FLAG_ACTIVE);
+
+ if (syncstopped) {
+ // transition back to StateInit in RGWRunBucketSyncCoroutine. if sync is
+ // still disabled, we'll delete the sync status object. otherwise we'll
+ // restart full sync to catch any changes that happened while sync was
+ // disabled
+ sync_info.state = rgw_bucket_shard_sync_info::StateInit;
+ return set_cr_done();
+ }
+
+ yield call(marker_tracker.flush());
+ if (retcode < 0) {
+ tn->log(0, SSTR("ERROR: marker_tracker.flush() returned retcode=" << retcode));
+ return set_cr_error(retcode);
+ }
+ if (sync_status < 0) {
+ tn->log(10, SSTR("backing out with sync_status=" << sync_status));
+ return set_cr_error(sync_status);
+ }
+ return set_cr_done();
+ }
+ return 0;
+}
+
+int RGWRunBucketSyncCoroutine::operate()
+{
+ reenter(this) {
+ yield {
+ set_status("acquiring sync lock");
+ auto store = sync_env->store;
+ lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, store,
+ rgw_raw_obj(store->svc.zone->get_zone_params().log_pool, status_oid),
+ "sync_lock",
+ cct->_conf->rgw_sync_lease_period,
+ this));
+ lease_stack.reset(spawn(lease_cr.get(), false));
+ }
+ while (!lease_cr->is_locked()) {
+ if (lease_cr->is_done()) {
+ tn->log(5, "failed to take lease");
+ set_status("lease lock failed, early abort");
+ drain_all();
+ return set_cr_error(lease_cr->get_ret_status());
+ }
+ set_sleeping(true);
+ yield;
+ }
+
+ tn->log(10, "took lease");
+ yield call(new RGWReadBucketSyncStatusCoroutine(sync_env, bs, &sync_status));
+ if (retcode < 0 && retcode != -ENOENT) {
+ tn->log(0, "ERROR: failed to read sync status for bucket");
+ lease_cr->go_down();
+ drain_all();
+ return set_cr_error(retcode);
+ }
+
+ tn->log(20, SSTR("sync status for bucket: " << sync_status.state));
+
+ yield call(new RGWGetBucketInstanceInfoCR(sync_env->async_rados, sync_env->store, bs.bucket, &bucket_info));
+ if (retcode == -ENOENT) {
+ /* bucket instance info has not been synced in yet, fetch it now */
+ yield {
+ tn->log(10, SSTR("no local info for bucket:" << ": fetching metadata"));
+ string raw_key = string("bucket.instance:") + bs.bucket.get_key();
+
+ meta_sync_env.init(sync_env->dpp, cct, sync_env->store, sync_env->store->svc.zone->get_master_conn(), sync_env->async_rados,
+ sync_env->http_manager, sync_env->error_logger, sync_env->sync_tracer);
+
+ call(new RGWMetaSyncSingleEntryCR(&meta_sync_env, raw_key,
+ string() /* no marker */,
+ MDLOG_STATUS_COMPLETE,
+ NULL /* no marker tracker */,
+ tn));
+ }
+ if (retcode < 0) {
+ tn->log(0, SSTR("ERROR: failed to fetch bucket instance info for " << bucket_str{bs.bucket}));
+ lease_cr->go_down();
+ drain_all();
+ return set_cr_error(retcode);
+ }
+
+ yield call(new RGWGetBucketInstanceInfoCR(sync_env->async_rados, sync_env->store, bs.bucket, &bucket_info));
+ }
+ if (retcode < 0) {
+ tn->log(0, SSTR("ERROR: failed to retrieve bucket info for bucket=" << bucket_str{bs.bucket}));
+ lease_cr->go_down();
+ drain_all();
+ return set_cr_error(retcode);
+ }
+
+ do {
+ if (sync_status.state == rgw_bucket_shard_sync_info::StateInit) {
+ yield call(new RGWInitBucketShardSyncStatusCoroutine(sync_env, bs, sync_status));
+ if (retcode == -ENOENT) {
+ tn->log(0, "bucket sync disabled");
+ lease_cr->abort(); // deleted lease object, abort/wakeup instead of unlock
+ lease_cr->wakeup();
+ lease_cr.reset();
+ drain_all();
+ return set_cr_done();
+ }
+ if (retcode < 0) {
+ tn->log(0, SSTR("ERROR: init sync on bucket failed, retcode=" << retcode));
+ lease_cr->go_down();
+ drain_all();
+ return set_cr_error(retcode);
+ }
+ }
+
+ if (sync_status.state == rgw_bucket_shard_sync_info::StateFullSync) {
+ yield call(new RGWBucketShardFullSyncCR(sync_env, bs, &bucket_info,
+ status_oid, lease_cr.get(),
+ sync_status, tn));
+ if (retcode < 0) {
+ tn->log(5, SSTR("full sync on bucket failed, retcode=" << retcode));
+ lease_cr->go_down();
+ drain_all();
+ return set_cr_error(retcode);
+ }
+ }
+
+ if (sync_status.state == rgw_bucket_shard_sync_info::StateIncrementalSync) {
+ yield call(new RGWBucketShardIncrementalSyncCR(sync_env, bs, &bucket_info,
+ status_oid, lease_cr.get(),
+ sync_status, tn));
+ if (retcode < 0) {
+ tn->log(5, SSTR("incremental sync on bucket failed, retcode=" << retcode));
+ lease_cr->go_down();
+ drain_all();
+ return set_cr_error(retcode);
+ }
+ }
+ // loop back to previous states unless incremental sync returns normally
+ } while (sync_status.state != rgw_bucket_shard_sync_info::StateIncrementalSync);
+
+ lease_cr->go_down();
+ drain_all();
+ return set_cr_done();
+ }
+
+ return 0;
+}
+
+RGWCoroutine *RGWRemoteBucketLog::run_sync_cr()
+{
+ return new RGWRunBucketSyncCoroutine(&sync_env, bs, sync_env.sync_tracer->root_node);
+}
+
+int RGWBucketSyncStatusManager::init()
+{
+ conn = store->svc.zone->get_zone_conn_by_id(source_zone);
+ if (!conn) {
+ ldpp_dout(this, 0) << "connection object to zone " << source_zone << " does not exist" << dendl;
+ return -EINVAL;
+ }
+
+ int ret = http_manager.start();
+ if (ret < 0) {
+ ldpp_dout(this, 0) << "failed in http_manager.start() ret=" << ret << dendl;
+ return ret;
+ }
+
+
+ const string key = bucket.get_key();
+
+ rgw_http_param_pair pairs[] = { { "key", key.c_str() },
+ { NULL, NULL } };
+
+ string path = string("/admin/metadata/bucket.instance");
+
+ bucket_instance_meta_info result;
+ ret = cr_mgr.run(new RGWReadRESTResourceCR<bucket_instance_meta_info>(store->ctx(), conn, &http_manager, path, pairs, &result));
+ if (ret < 0) {
+ ldpp_dout(this, 0) << "ERROR: failed to fetch bucket metadata info from zone=" << source_zone << " path=" << path << " key=" << key << " ret=" << ret << dendl;
+ return ret;
+ }
+
+ RGWBucketInfo& bi = result.data.get_bucket_info();
+ num_shards = bi.num_shards;
+
+ error_logger = new RGWSyncErrorLogger(store, RGW_SYNC_ERROR_LOG_SHARD_PREFIX, ERROR_LOGGER_SHARDS);
+
+ sync_module.reset(new RGWDefaultSyncModuleInstance());
+
+ int effective_num_shards = (num_shards ? num_shards : 1);
+
+ auto async_rados = store->get_async_rados();
+
+ for (int i = 0; i < effective_num_shards; i++) {
+ RGWRemoteBucketLog *l = new RGWRemoteBucketLog(this, store, this, async_rados, &http_manager);
+ ret = l->init(source_zone, conn, bucket, (num_shards ? i : -1), error_logger, store->get_sync_tracer(), sync_module);
+ if (ret < 0) {
+ ldpp_dout(this, 0) << "ERROR: failed to initialize RGWRemoteBucketLog object" << dendl;
+ return ret;
+ }
+ source_logs[i] = l;
+ }
+
+ return 0;
+}
+
+int RGWBucketSyncStatusManager::init_sync_status()
+{
+ list<RGWCoroutinesStack *> stacks;
+
+ for (map<int, RGWRemoteBucketLog *>::iterator iter = source_logs.begin(); iter != source_logs.end(); ++iter) {
+ RGWCoroutinesStack *stack = new RGWCoroutinesStack(store->ctx(), &cr_mgr);
+ RGWRemoteBucketLog *l = iter->second;
+ stack->call(l->init_sync_status_cr());
+
+ stacks.push_back(stack);
+ }
+
+ return cr_mgr.run(stacks);
+}
+
+int RGWBucketSyncStatusManager::read_sync_status()
+{
+ list<RGWCoroutinesStack *> stacks;
+
+ for (map<int, RGWRemoteBucketLog *>::iterator iter = source_logs.begin(); iter != source_logs.end(); ++iter) {
+ RGWCoroutinesStack *stack = new RGWCoroutinesStack(store->ctx(), &cr_mgr);
+ RGWRemoteBucketLog *l = iter->second;
+ stack->call(l->read_sync_status_cr(&sync_status[iter->first]));
+
+ stacks.push_back(stack);
+ }
+
+ int ret = cr_mgr.run(stacks);
+ if (ret < 0) {
+ ldpp_dout(this, 0) << "ERROR: failed to read sync status for "
+ << bucket_str{bucket} << dendl;
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWBucketSyncStatusManager::run()
+{
+ list<RGWCoroutinesStack *> stacks;
+
+ for (map<int, RGWRemoteBucketLog *>::iterator iter = source_logs.begin(); iter != source_logs.end(); ++iter) {
+ RGWCoroutinesStack *stack = new RGWCoroutinesStack(store->ctx(), &cr_mgr);
+ RGWRemoteBucketLog *l = iter->second;
+ stack->call(l->run_sync_cr());
+
+ stacks.push_back(stack);
+ }
+
+ int ret = cr_mgr.run(stacks);
+ if (ret < 0) {
+ ldpp_dout(this, 0) << "ERROR: failed to read sync status for "
+ << bucket_str{bucket} << dendl;
+ return ret;
+ }
+
+ return 0;
+}
+
+unsigned RGWBucketSyncStatusManager::get_subsys() const
+{
+ return dout_subsys;
+}
+
+std::ostream& RGWBucketSyncStatusManager::gen_prefix(std::ostream& out) const
+{
+ auto zone = std::string_view{source_zone};
+ return out << "bucket sync zone:" << zone.substr(0, 8)
+ << " bucket:" << bucket.name << ' ';
+}
+
+string RGWBucketSyncStatusManager::status_oid(const string& source_zone,
+ const rgw_bucket_shard& bs)
+{
+ return bucket_status_oid_prefix + "." + source_zone + ":" + bs.get_key();
+}
+
+string RGWBucketSyncStatusManager::obj_status_oid(const string& source_zone,
+ const rgw_obj& obj)
+{
+ return object_status_oid_prefix + "." + source_zone + ":" + obj.bucket.get_key() + ":" +
+ obj.key.name + ":" + obj.key.instance;
+}
+
+class RGWCollectBucketSyncStatusCR : public RGWShardCollectCR {
+ static constexpr int max_concurrent_shards = 16;
+ RGWRados *const store;
+ RGWDataSyncEnv *const env;
+ const int num_shards;
+ rgw_bucket_shard bs;
+
+ using Vector = std::vector<rgw_bucket_shard_sync_info>;
+ Vector::iterator i, end;
+
+ public:
+ RGWCollectBucketSyncStatusCR(RGWRados *store, RGWDataSyncEnv *env,
+ int num_shards, const rgw_bucket& bucket,
+ Vector *status)
+ : RGWShardCollectCR(store->ctx(), max_concurrent_shards),
+ store(store), env(env), num_shards(num_shards),
+ bs(bucket, num_shards > 0 ? 0 : -1), // start at shard 0 or -1
+ i(status->begin()), end(status->end())
+ {}
+
+ bool spawn_next() override {
+ if (i == end) {
+ return false;
+ }
+ spawn(new RGWReadBucketSyncStatusCoroutine(env, bs, &*i), false);
+ ++i;
+ ++bs.shard_id;
+ return true;
+ }
+};
+
+int rgw_bucket_sync_status(const DoutPrefixProvider *dpp, RGWRados *store, const std::string& source_zone,
+ const RGWBucketInfo& bucket_info,
+ std::vector<rgw_bucket_shard_sync_info> *status)
+{
+ const auto num_shards = bucket_info.num_shards;
+ status->clear();
+ status->resize(std::max<size_t>(1, num_shards));
+
+ RGWDataSyncEnv env;
+ RGWSyncModuleInstanceRef module; // null sync module
+ env.init(dpp, store->ctx(), store, nullptr, store->get_async_rados(),
+ nullptr, nullptr, nullptr, source_zone, module, nullptr);
+
+ RGWCoroutinesManager crs(store->ctx(), store->get_cr_registry());
+ return crs.run(new RGWCollectBucketSyncStatusCR(store, &env, num_shards,
+ bucket_info.bucket, status));
+}
+
+
+// TODO: move into rgw_data_sync_trim.cc
+#undef dout_prefix
+#define dout_prefix (*_dout << "data trim: ")
+
+namespace {
+
+/// return the marker that it's safe to trim up to
+const std::string& get_stable_marker(const rgw_data_sync_marker& m)
+{
+ return m.state == m.FullSync ? m.next_step_marker : m.marker;
+}
+
+/// populate the container starting with 'dest' with the minimum stable marker
+/// of each shard for all of the peers in [first, last)
+template <typename IterIn, typename IterOut>
+void take_min_markers(IterIn first, IterIn last, IterOut dest)
+{
+ if (first == last) {
+ return;
+ }
+ for (auto p = first; p != last; ++p) {
+ auto m = dest;
+ for (auto &shard : p->sync_markers) {
+ const auto& stable = get_stable_marker(shard.second);
+ if (*m > stable) {
+ *m = stable;
+ }
+ ++m;
+ }
+ }
+}
+
+} // anonymous namespace
+
+class DataLogTrimCR : public RGWCoroutine {
+ using TrimCR = RGWSyncLogTrimCR;
+ RGWRados *store;
+ RGWHTTPManager *http;
+ const int num_shards;
+ const std::string& zone_id; //< my zone id
+ std::vector<rgw_data_sync_status> peer_status; //< sync status for each peer
+ std::vector<std::string> min_shard_markers; //< min marker per shard
+ std::vector<std::string>& last_trim; //< last trimmed marker per shard
+ int ret{0};
+
+ public:
+ DataLogTrimCR(RGWRados *store, RGWHTTPManager *http,
+ int num_shards, std::vector<std::string>& last_trim)
+ : RGWCoroutine(store->ctx()), store(store), http(http),
+ num_shards(num_shards),
+ zone_id(store->svc.zone->get_zone().id),
+ peer_status(store->svc.zone->get_zone_data_notify_to_map().size()),
+ min_shard_markers(num_shards, TrimCR::max_marker),
+ last_trim(last_trim)
+ {}
+
+ int operate() override;
+};
+
+int DataLogTrimCR::operate()
+{
+ reenter(this) {
+ ldout(cct, 10) << "fetching sync status for zone " << zone_id << dendl;
+ set_status("fetching sync status");
+ yield {
+ // query data sync status from each sync peer
+ rgw_http_param_pair params[] = {
+ { "type", "data" },
+ { "status", nullptr },
+ { "source-zone", zone_id.c_str() },
+ { nullptr, nullptr }
+ };
+
+ auto p = peer_status.begin();
+ for (auto& c : store->svc.zone->get_zone_data_notify_to_map()) {
+ ldout(cct, 20) << "query sync status from " << c.first << dendl;
+ using StatusCR = RGWReadRESTResourceCR<rgw_data_sync_status>;
+ spawn(new StatusCR(cct, c.second, http, "/admin/log/", params, &*p),
+ false);
+ ++p;
+ }
+ }
+
+ // must get a successful reply from all peers to consider trimming
+ ret = 0;
+ while (ret == 0 && num_spawned() > 0) {
+ yield wait_for_child();
+ collect_next(&ret);
+ }
+ drain_all();
+
+ if (ret < 0) {
+ ldout(cct, 4) << "failed to fetch sync status from all peers" << dendl;
+ return set_cr_error(ret);
+ }
+
+ ldout(cct, 10) << "trimming log shards" << dendl;
+ set_status("trimming log shards");
+ yield {
+ // determine the minimum marker for each shard
+ take_min_markers(peer_status.begin(), peer_status.end(),
+ min_shard_markers.begin());
+
+ for (int i = 0; i < num_shards; i++) {
+ const auto& m = min_shard_markers[i];
+ if (m <= last_trim[i]) {
+ continue;
+ }
+ ldout(cct, 10) << "trimming log shard " << i
+ << " at marker=" << m
+ << " last_trim=" << last_trim[i] << dendl;
+ spawn(new TrimCR(store, store->data_log->get_oid(i),
+ m, &last_trim[i]),
+ true);
+ }
+ }
+ return set_cr_done();
+ }
+ return 0;
+}
+
+RGWCoroutine* create_admin_data_log_trim_cr(RGWRados *store,
+ RGWHTTPManager *http,
+ int num_shards,
+ std::vector<std::string>& markers)
+{
+ return new DataLogTrimCR(store, http, num_shards, markers);
+}
+
+class DataLogTrimPollCR : public RGWCoroutine {
+ RGWRados *store;
+ RGWHTTPManager *http;
+ const int num_shards;
+ const utime_t interval; //< polling interval
+ const std::string lock_oid; //< use first data log shard for lock
+ const std::string lock_cookie;
+ std::vector<std::string> last_trim; //< last trimmed marker per shard
+
+ public:
+ DataLogTrimPollCR(RGWRados *store, RGWHTTPManager *http,
+ int num_shards, utime_t interval)
+ : RGWCoroutine(store->ctx()), store(store), http(http),
+ num_shards(num_shards), interval(interval),
+ lock_oid(store->data_log->get_oid(0)),
+ lock_cookie(RGWSimpleRadosLockCR::gen_random_cookie(cct)),
+ last_trim(num_shards)
+ {}
+
+ int operate() override;
+};
+
+int DataLogTrimPollCR::operate()
+{
+ reenter(this) {
+ for (;;) {
+ set_status("sleeping");
+ wait(interval);
+
+ // request a 'data_trim' lock that covers the entire wait interval to
+ // prevent other gateways from attempting to trim for the duration
+ set_status("acquiring trim lock");
+ yield call(new RGWSimpleRadosLockCR(store->get_async_rados(), store,
+ rgw_raw_obj(store->svc.zone->get_zone_params().log_pool, lock_oid),
+ "data_trim", lock_cookie,
+ interval.sec()));
+ if (retcode < 0) {
+ // if the lock is already held, go back to sleep and try again later
+ ldout(cct, 4) << "failed to lock " << lock_oid << ", trying again in "
+ << interval.sec() << "s" << dendl;
+ continue;
+ }
+
+ set_status("trimming");
+ yield call(new DataLogTrimCR(store, http, num_shards, last_trim));
+
+ // note that the lock is not released. this is intentional, as it avoids
+ // duplicating this work in other gateways
+ }
+ }
+ return 0;
+}
+
+RGWCoroutine* create_data_log_trim_cr(RGWRados *store,
+ RGWHTTPManager *http,
+ int num_shards, utime_t interval)
+{
+ return new DataLogTrimPollCR(store, http, num_shards, interval);
+}
diff --git a/src/rgw/rgw_data_sync.h b/src/rgw/rgw_data_sync.h
new file mode 100644
index 00000000..55a71d72
--- /dev/null
+++ b/src/rgw/rgw_data_sync.h
@@ -0,0 +1,625 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_DATA_SYNC_H
+#define CEPH_RGW_DATA_SYNC_H
+
+#include "include/encoding.h"
+
+#include "common/RWLock.h"
+#include "common/ceph_json.h"
+
+
+#include "rgw_coroutine.h"
+#include "rgw_http_client.h"
+#include "rgw_bucket.h"
+
+#include "rgw_sync_module.h"
+#include "rgw_sync_trace.h"
+
+struct rgw_datalog_info {
+ uint32_t num_shards;
+
+ rgw_datalog_info() : num_shards(0) {}
+
+ void decode_json(JSONObj *obj);
+};
+
+struct rgw_data_sync_info {
+ enum SyncState {
+ StateInit = 0,
+ StateBuildingFullSyncMaps = 1,
+ StateSync = 2,
+ };
+
+ uint16_t state;
+ uint32_t num_shards;
+
+ uint64_t instance_id{0};
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(2, 1, bl);
+ encode(state, bl);
+ encode(num_shards, bl);
+ encode(instance_id, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(2, bl);
+ decode(state, bl);
+ decode(num_shards, bl);
+ if (struct_v >= 2) {
+ decode(instance_id, bl);
+ }
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const {
+ string s;
+ switch ((SyncState)state) {
+ case StateInit:
+ s = "init";
+ break;
+ case StateBuildingFullSyncMaps:
+ s = "building-full-sync-maps";
+ break;
+ case StateSync:
+ s = "sync";
+ break;
+ default:
+ s = "unknown";
+ break;
+ }
+ encode_json("status", s, f);
+ encode_json("num_shards", num_shards, f);
+ encode_json("instance_id", instance_id, f);
+ }
+ void decode_json(JSONObj *obj) {
+ std::string s;
+ JSONDecoder::decode_json("status", s, obj);
+ if (s == "building-full-sync-maps") {
+ state = StateBuildingFullSyncMaps;
+ } else if (s == "sync") {
+ state = StateSync;
+ } else {
+ state = StateInit;
+ }
+ JSONDecoder::decode_json("num_shards", num_shards, obj);
+ JSONDecoder::decode_json("instance_id", instance_id, obj);
+ }
+ static void generate_test_instances(std::list<rgw_data_sync_info*>& o);
+
+ rgw_data_sync_info() : state((int)StateInit), num_shards(0) {}
+};
+WRITE_CLASS_ENCODER(rgw_data_sync_info)
+
+struct rgw_data_sync_marker {
+ enum SyncState {
+ FullSync = 0,
+ IncrementalSync = 1,
+ };
+ uint16_t state;
+ string marker;
+ string next_step_marker;
+ uint64_t total_entries;
+ uint64_t pos;
+ real_time timestamp;
+
+ rgw_data_sync_marker() : state(FullSync), total_entries(0), pos(0) {}
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(state, bl);
+ encode(marker, bl);
+ encode(next_step_marker, bl);
+ encode(total_entries, bl);
+ encode(pos, bl);
+ encode(timestamp, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(state, bl);
+ decode(marker, bl);
+ decode(next_step_marker, bl);
+ decode(total_entries, bl);
+ decode(pos, bl);
+ decode(timestamp, bl);
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const {
+ const char *s{nullptr};
+ switch ((SyncState)state) {
+ case FullSync:
+ s = "full-sync";
+ break;
+ case IncrementalSync:
+ s = "incremental-sync";
+ break;
+ default:
+ s = "unknown";
+ break;
+ }
+ encode_json("status", s, f);
+ encode_json("marker", marker, f);
+ encode_json("next_step_marker", next_step_marker, f);
+ encode_json("total_entries", total_entries, f);
+ encode_json("pos", pos, f);
+ encode_json("timestamp", utime_t(timestamp), f);
+ }
+ void decode_json(JSONObj *obj) {
+ std::string s;
+ JSONDecoder::decode_json("status", s, obj);
+ if (s == "full-sync") {
+ state = FullSync;
+ } else if (s == "incremental-sync") {
+ state = IncrementalSync;
+ }
+ JSONDecoder::decode_json("marker", marker, obj);
+ JSONDecoder::decode_json("next_step_marker", next_step_marker, obj);
+ JSONDecoder::decode_json("total_entries", total_entries, obj);
+ JSONDecoder::decode_json("pos", pos, obj);
+ utime_t t;
+ JSONDecoder::decode_json("timestamp", t, obj);
+ timestamp = t.to_real_time();
+ }
+ static void generate_test_instances(std::list<rgw_data_sync_marker*>& o);
+};
+WRITE_CLASS_ENCODER(rgw_data_sync_marker)
+
+struct rgw_data_sync_status {
+ rgw_data_sync_info sync_info;
+ map<uint32_t, rgw_data_sync_marker> sync_markers;
+
+ rgw_data_sync_status() {}
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(sync_info, bl);
+ /* sync markers are encoded separately */
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(sync_info, bl);
+ /* sync markers are decoded separately */
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const {
+ encode_json("info", sync_info, f);
+ encode_json("markers", sync_markers, f);
+ }
+ void decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("info", sync_info, obj);
+ JSONDecoder::decode_json("markers", sync_markers, obj);
+ }
+ static void generate_test_instances(std::list<rgw_data_sync_status*>& o);
+};
+WRITE_CLASS_ENCODER(rgw_data_sync_status)
+
+struct rgw_datalog_entry {
+ string key;
+ ceph::real_time timestamp;
+
+ void decode_json(JSONObj *obj);
+};
+
+struct rgw_datalog_shard_data {
+ string marker;
+ bool truncated;
+ vector<rgw_datalog_entry> entries;
+
+ void decode_json(JSONObj *obj);
+};
+
+class RGWAsyncRadosProcessor;
+class RGWDataSyncControlCR;
+
+struct rgw_bucket_entry_owner {
+ string id;
+ string display_name;
+
+ rgw_bucket_entry_owner() {}
+ rgw_bucket_entry_owner(const string& _id, const string& _display_name) : id(_id), display_name(_display_name) {}
+
+ void decode_json(JSONObj *obj);
+};
+
+class RGWSyncErrorLogger;
+class RGWRESTConn;
+
+struct RGWDataSyncEnv {
+ const DoutPrefixProvider *dpp{nullptr};
+ CephContext *cct{nullptr};
+ RGWRados *store{nullptr};
+ RGWRESTConn *conn{nullptr};
+ RGWAsyncRadosProcessor *async_rados{nullptr};
+ RGWHTTPManager *http_manager{nullptr};
+ RGWSyncErrorLogger *error_logger{nullptr};
+ RGWSyncTraceManager *sync_tracer{nullptr};
+ string source_zone;
+ RGWSyncModuleInstanceRef sync_module{nullptr};
+ PerfCounters* counters{nullptr};
+
+ RGWDataSyncEnv() {}
+
+ void init(const DoutPrefixProvider *_dpp, CephContext *_cct, RGWRados *_store, RGWRESTConn *_conn,
+ RGWAsyncRadosProcessor *_async_rados, RGWHTTPManager *_http_manager,
+ RGWSyncErrorLogger *_error_logger, RGWSyncTraceManager *_sync_tracer,
+ const string& _source_zone, RGWSyncModuleInstanceRef& _sync_module,
+ PerfCounters* _counters) {
+ dpp = _dpp;
+ cct = _cct;
+ store = _store;
+ conn = _conn;
+ async_rados = _async_rados;
+ http_manager = _http_manager;
+ error_logger = _error_logger;
+ sync_tracer = _sync_tracer;
+ source_zone = _source_zone;
+ sync_module = _sync_module;
+ counters = _counters;
+ }
+
+ string shard_obj_name(int shard_id);
+ string status_oid();
+};
+
+class RGWRemoteDataLog : public RGWCoroutinesManager {
+ const DoutPrefixProvider *dpp;
+ RGWRados *store;
+ RGWAsyncRadosProcessor *async_rados;
+ RGWHTTPManager http_manager;
+
+ RGWDataSyncEnv sync_env;
+
+ RWLock lock;
+ RGWDataSyncControlCR *data_sync_cr;
+
+ RGWSyncTraceNodeRef tn;
+
+ bool initialized;
+
+public:
+ RGWRemoteDataLog(const DoutPrefixProvider *dpp, RGWRados *_store,
+ RGWAsyncRadosProcessor *async_rados)
+ : RGWCoroutinesManager(_store->ctx(), _store->get_cr_registry()),
+ dpp(dpp), store(_store), async_rados(async_rados),
+ http_manager(store->ctx(), completion_mgr),
+ lock("RGWRemoteDataLog::lock"), data_sync_cr(NULL),
+ initialized(false) {}
+ int init(const string& _source_zone, RGWRESTConn *_conn, RGWSyncErrorLogger *_error_logger,
+ RGWSyncTraceManager *_sync_tracer, RGWSyncModuleInstanceRef& module,
+ PerfCounters* _counters);
+ void finish();
+
+ int read_log_info(rgw_datalog_info *log_info);
+ int read_source_log_shards_info(map<int, RGWDataChangesLogInfo> *shards_info);
+ int read_source_log_shards_next(map<int, string> shard_markers, map<int, rgw_datalog_shard_data> *result);
+ int read_sync_status(rgw_data_sync_status *sync_status);
+ int read_recovering_shards(const int num_shards, set<int>& recovering_shards);
+ int read_shard_status(int shard_id, set<string>& lagging_buckets,set<string>& recovering_buckets, rgw_data_sync_marker* sync_marker, const int max_entries);
+ int init_sync_status(int num_shards);
+ int run_sync(int num_shards);
+
+ void wakeup(int shard_id, set<string>& keys);
+};
+
+class RGWDataSyncStatusManager : public DoutPrefixProvider {
+ RGWRados *store;
+ rgw_rados_ref ref;
+
+ string source_zone;
+ RGWRESTConn *conn;
+ RGWSyncErrorLogger *error_logger;
+ RGWSyncModuleInstanceRef sync_module;
+ PerfCounters* counters;
+
+ RGWRemoteDataLog source_log;
+
+ string source_status_oid;
+ string source_shard_status_oid_prefix;
+
+ map<int, rgw_raw_obj> shard_objs;
+
+ int num_shards;
+
+public:
+ RGWDataSyncStatusManager(RGWRados *_store, RGWAsyncRadosProcessor *async_rados,
+ const string& _source_zone, PerfCounters* counters)
+ : store(_store), source_zone(_source_zone), conn(NULL), error_logger(NULL),
+ sync_module(nullptr), counters(counters),
+ source_log(this, store, async_rados), num_shards(0) {}
+ RGWDataSyncStatusManager(RGWRados *_store, RGWAsyncRadosProcessor *async_rados,
+ const string& _source_zone, PerfCounters* counters,
+ const RGWSyncModuleInstanceRef& _sync_module)
+ : store(_store), source_zone(_source_zone), conn(NULL), error_logger(NULL),
+ sync_module(_sync_module), counters(counters),
+ source_log(this, store, async_rados), num_shards(0) {}
+ ~RGWDataSyncStatusManager() {
+ finalize();
+ }
+ int init();
+ void finalize();
+
+ static string shard_obj_name(const string& source_zone, int shard_id);
+ static string sync_status_oid(const string& source_zone);
+
+ int read_sync_status(rgw_data_sync_status *sync_status) {
+ return source_log.read_sync_status(sync_status);
+ }
+
+ int read_recovering_shards(const int num_shards, set<int>& recovering_shards) {
+ return source_log.read_recovering_shards(num_shards, recovering_shards);
+ }
+
+ int read_shard_status(int shard_id, set<string>& lagging_buckets, set<string>& recovering_buckets, rgw_data_sync_marker *sync_marker, const int max_entries) {
+ return source_log.read_shard_status(shard_id, lagging_buckets, recovering_buckets,sync_marker, max_entries);
+ }
+ int init_sync_status() { return source_log.init_sync_status(num_shards); }
+
+ int read_log_info(rgw_datalog_info *log_info) {
+ return source_log.read_log_info(log_info);
+ }
+ int read_source_log_shards_info(map<int, RGWDataChangesLogInfo> *shards_info) {
+ return source_log.read_source_log_shards_info(shards_info);
+ }
+ int read_source_log_shards_next(map<int, string> shard_markers, map<int, rgw_datalog_shard_data> *result) {
+ return source_log.read_source_log_shards_next(shard_markers, result);
+ }
+
+ int run() { return source_log.run_sync(num_shards); }
+
+ void wakeup(int shard_id, set<string>& keys) { return source_log.wakeup(shard_id, keys); }
+ void stop() {
+ source_log.finish();
+ }
+
+ // implements DoutPrefixProvider
+ CephContext *get_cct() const override { return store->ctx(); }
+ unsigned get_subsys() const override;
+ std::ostream& gen_prefix(std::ostream& out) const override;
+};
+
+class RGWBucketSyncStatusManager;
+class RGWBucketSyncCR;
+
+struct rgw_bucket_shard_full_sync_marker {
+ rgw_obj_key position;
+ uint64_t count;
+
+ rgw_bucket_shard_full_sync_marker() : count(0) {}
+
+ void encode_attr(map<string, bufferlist>& attrs);
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(position, bl);
+ encode(count, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(position, bl);
+ decode(count, bl);
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+};
+WRITE_CLASS_ENCODER(rgw_bucket_shard_full_sync_marker)
+
+struct rgw_bucket_shard_inc_sync_marker {
+ string position;
+
+ rgw_bucket_shard_inc_sync_marker() {}
+
+ void encode_attr(map<string, bufferlist>& attrs);
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(position, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(position, bl);
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+
+ bool operator<(const rgw_bucket_shard_inc_sync_marker& m) const {
+ return (position < m.position);
+ }
+};
+WRITE_CLASS_ENCODER(rgw_bucket_shard_inc_sync_marker)
+
+struct rgw_bucket_shard_sync_info {
+ enum SyncState {
+ StateInit = 0,
+ StateFullSync = 1,
+ StateIncrementalSync = 2,
+ };
+
+ uint16_t state;
+ rgw_bucket_shard_full_sync_marker full_marker;
+ rgw_bucket_shard_inc_sync_marker inc_marker;
+
+ void decode_from_attrs(CephContext *cct, map<string, bufferlist>& attrs);
+ void encode_all_attrs(map<string, bufferlist>& attrs);
+ void encode_state_attr(map<string, bufferlist>& attrs);
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(state, bl);
+ encode(full_marker, bl);
+ encode(inc_marker, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(state, bl);
+ decode(full_marker, bl);
+ decode(inc_marker, bl);
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+
+ rgw_bucket_shard_sync_info() : state((int)StateInit) {}
+
+};
+WRITE_CLASS_ENCODER(rgw_bucket_shard_sync_info)
+
+struct rgw_bucket_index_marker_info {
+ string bucket_ver;
+ string master_ver;
+ string max_marker;
+ bool syncstopped{false};
+
+ void decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("bucket_ver", bucket_ver, obj);
+ JSONDecoder::decode_json("master_ver", master_ver, obj);
+ JSONDecoder::decode_json("max_marker", max_marker, obj);
+ JSONDecoder::decode_json("syncstopped", syncstopped, obj);
+ }
+};
+
+
+class RGWRemoteBucketLog : public RGWCoroutinesManager {
+ const DoutPrefixProvider *dpp;
+ RGWRados *store;
+ RGWRESTConn *conn{nullptr};
+ string source_zone;
+ rgw_bucket_shard bs;
+
+ RGWBucketSyncStatusManager *status_manager;
+ RGWAsyncRadosProcessor *async_rados;
+ RGWHTTPManager *http_manager;
+
+ RGWDataSyncEnv sync_env;
+ rgw_bucket_shard_sync_info init_status;
+
+ RGWBucketSyncCR *sync_cr{nullptr};
+
+public:
+ RGWRemoteBucketLog(const DoutPrefixProvider *_dpp, RGWRados *_store,
+ RGWBucketSyncStatusManager *_sm,
+ RGWAsyncRadosProcessor *_async_rados,
+ RGWHTTPManager *_http_manager)
+ : RGWCoroutinesManager(_store->ctx(), _store->get_cr_registry()),
+ dpp(_dpp), store(_store), status_manager(_sm),
+ async_rados(_async_rados), http_manager(_http_manager)
+ {}
+
+ int init(const string& _source_zone, RGWRESTConn *_conn,
+ const rgw_bucket& bucket, int shard_id,
+ RGWSyncErrorLogger *_error_logger,
+ RGWSyncTraceManager *_sync_tracer,
+ RGWSyncModuleInstanceRef& _sync_module);
+ void finish();
+
+ RGWCoroutine *read_sync_status_cr(rgw_bucket_shard_sync_info *sync_status);
+ RGWCoroutine *init_sync_status_cr();
+ RGWCoroutine *run_sync_cr();
+
+ void wakeup();
+};
+
+class RGWBucketSyncStatusManager : public DoutPrefixProvider {
+ RGWRados *store;
+
+ RGWCoroutinesManager cr_mgr;
+
+ RGWHTTPManager http_manager;
+
+ string source_zone;
+ RGWRESTConn *conn;
+ RGWSyncErrorLogger *error_logger;
+ RGWSyncModuleInstanceRef sync_module;
+
+ rgw_bucket bucket;
+
+ map<int, RGWRemoteBucketLog *> source_logs;
+
+ string source_status_oid;
+ string source_shard_status_oid_prefix;
+
+ map<int, rgw_bucket_shard_sync_info> sync_status;
+ rgw_raw_obj status_obj;
+
+ int num_shards;
+
+public:
+ RGWBucketSyncStatusManager(RGWRados *_store, const string& _source_zone,
+ const rgw_bucket& bucket) : store(_store),
+ cr_mgr(_store->ctx(), _store->get_cr_registry()),
+ http_manager(store->ctx(), cr_mgr.get_completion_mgr()),
+ source_zone(_source_zone),
+ conn(NULL), error_logger(NULL),
+ bucket(bucket),
+ num_shards(0) {}
+ ~RGWBucketSyncStatusManager();
+
+ int init();
+
+ map<int, rgw_bucket_shard_sync_info>& get_sync_status() { return sync_status; }
+ int init_sync_status();
+
+ static string status_oid(const string& source_zone, const rgw_bucket_shard& bs);
+ static string obj_status_oid(const string& source_zone, const rgw_obj& obj); /* can be used by sync modules */
+
+ // implements DoutPrefixProvider
+ CephContext *get_cct() const override { return store->ctx(); }
+ unsigned get_subsys() const override;
+ std::ostream& gen_prefix(std::ostream& out) const override;
+
+ int read_sync_status();
+ int run();
+};
+
+/// read the sync status of all bucket shards from the given source zone
+int rgw_bucket_sync_status(const DoutPrefixProvider *dpp, RGWRados *store, const std::string& source_zone,
+ const RGWBucketInfo& bucket_info,
+ std::vector<rgw_bucket_shard_sync_info> *status);
+
+class RGWDefaultSyncModule : public RGWSyncModule {
+public:
+ RGWDefaultSyncModule() {}
+ bool supports_writes() override { return true; }
+ bool supports_data_export() override { return true; }
+ int create_instance(CephContext *cct, const JSONFormattable& config, RGWSyncModuleInstanceRef *instance) override;
+};
+
+class RGWArchiveSyncModule : public RGWDefaultSyncModule {
+public:
+ RGWArchiveSyncModule() {}
+ bool supports_writes() override { return true; }
+ bool supports_data_export() override { return false; }
+ int create_instance(CephContext *cct, const JSONFormattable& config, RGWSyncModuleInstanceRef *instance) override;
+};
+
+// DataLogTrimCR factory function
+extern RGWCoroutine* create_data_log_trim_cr(RGWRados *store,
+ RGWHTTPManager *http,
+ int num_shards, utime_t interval);
+
+// factory function for datalog trim via radosgw-admin
+RGWCoroutine* create_admin_data_log_trim_cr(RGWRados *store,
+ RGWHTTPManager *http,
+ int num_shards,
+ std::vector<std::string>& markers);
+
+#endif
diff --git a/src/rgw/rgw_dencoder.cc b/src/rgw/rgw_dencoder.cc
new file mode 100644
index 00000000..91078c15
--- /dev/null
+++ b/src/rgw/rgw_dencoder.cc
@@ -0,0 +1,564 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "rgw_common.h"
+#include "rgw_rados.h"
+#include "rgw_zone.h"
+#include "rgw_log.h"
+#include "rgw_acl.h"
+#include "rgw_acl_s3.h"
+#include "rgw_cache.h"
+#include "rgw_meta_sync_status.h"
+#include "rgw_data_sync.h"
+
+#include "common/Formatter.h"
+
+static string shadow_ns = RGW_OBJ_NS_SHADOW;
+
+static void init_bucket(rgw_bucket *b, const char *t, const char *n, const char *dp, const char *ip, const char *m, const char *id)
+{
+ b->tenant = t;
+ b->name = n;
+ b->marker = m;
+ b->bucket_id = id;
+ b->explicit_placement.data_pool = rgw_pool(dp);
+ b->explicit_placement.index_pool = rgw_pool(ip);
+}
+
+void RGWObjManifestPart::generate_test_instances(std::list<RGWObjManifestPart*>& o)
+{
+ o.push_back(new RGWObjManifestPart);
+
+ RGWObjManifestPart *p = new RGWObjManifestPart;
+ rgw_bucket b;
+ init_bucket(&b, "tenant", "bucket", ".pool", ".index_pool", "marker_", "12");
+
+ p->loc = rgw_obj(b, "object");
+ p->loc_ofs = 512 * 1024;
+ p->size = 128 * 1024;
+ o.push_back(p);
+}
+
+void RGWObjManifest::obj_iterator::seek(uint64_t o)
+{
+ ofs = o;
+ if (manifest->explicit_objs) {
+ explicit_iter = manifest->objs.upper_bound(ofs);
+ if (explicit_iter != manifest->objs.begin()) {
+ --explicit_iter;
+ }
+ if (ofs >= manifest->obj_size) {
+ ofs = manifest->obj_size;
+ return;
+ }
+ update_explicit_pos();
+ update_location();
+ return;
+ }
+ if (o < manifest->get_head_size()) {
+ rule_iter = manifest->rules.begin();
+ stripe_ofs = 0;
+ stripe_size = manifest->get_head_size();
+ if (rule_iter != manifest->rules.end()) {
+ cur_part_id = rule_iter->second.start_part_num;
+ cur_override_prefix = rule_iter->second.override_prefix;
+ }
+ update_location();
+ return;
+ }
+
+ rule_iter = manifest->rules.upper_bound(ofs);
+ next_rule_iter = rule_iter;
+ if (rule_iter != manifest->rules.begin()) {
+ --rule_iter;
+ }
+
+ if (rule_iter == manifest->rules.end()) {
+ update_location();
+ return;
+ }
+
+ RGWObjManifestRule& rule = rule_iter->second;
+
+ if (rule.part_size > 0) {
+ cur_part_id = rule.start_part_num + (ofs - rule.start_ofs) / rule.part_size;
+ } else {
+ cur_part_id = rule.start_part_num;
+ }
+ part_ofs = rule.start_ofs + (cur_part_id - rule.start_part_num) * rule.part_size;
+
+ if (rule.stripe_max_size > 0) {
+ cur_stripe = (ofs - part_ofs) / rule.stripe_max_size;
+
+ stripe_ofs = part_ofs + cur_stripe * rule.stripe_max_size;
+ if (!cur_part_id && manifest->get_head_size() > 0) {
+ cur_stripe++;
+ }
+ } else {
+ cur_stripe = 0;
+ stripe_ofs = part_ofs;
+ }
+
+ if (!rule.part_size) {
+ stripe_size = rule.stripe_max_size;
+ stripe_size = std::min(manifest->get_obj_size() - stripe_ofs, stripe_size);
+ } else {
+ uint64_t next = std::min(stripe_ofs + rule.stripe_max_size, part_ofs + rule.part_size);
+ stripe_size = next - stripe_ofs;
+ }
+
+ cur_override_prefix = rule.override_prefix;
+
+ update_location();
+}
+
+void RGWObjManifest::obj_iterator::update_location()
+{
+ if (manifest->explicit_objs) {
+ location = explicit_iter->second.loc;
+ return;
+ }
+
+ if (ofs < manifest->get_head_size()) {
+ location = manifest->get_obj();
+ location.set_placement_rule(manifest->get_head_placement_rule());
+ return;
+ }
+
+ manifest->get_implicit_location(cur_part_id, cur_stripe, ofs, &cur_override_prefix, &location);
+}
+
+void RGWObjManifest::obj_iterator::update_explicit_pos()
+{
+ ofs = explicit_iter->first;
+ stripe_ofs = ofs;
+
+ map<uint64_t, RGWObjManifestPart>::iterator next_iter = explicit_iter;
+ ++next_iter;
+ if (next_iter != manifest->objs.end()) {
+ stripe_size = next_iter->first - ofs;
+ } else {
+ stripe_size = manifest->obj_size - ofs;
+ }
+}
+
+void RGWObjManifest::generate_test_instances(std::list<RGWObjManifest*>& o)
+{
+ RGWObjManifest *m = new RGWObjManifest;
+ for (int i = 0; i<10; i++) {
+ RGWObjManifestPart p;
+ rgw_bucket b;
+ init_bucket(&b, "tenant", "bucket", ".pool", ".index_pool", "marker_", "12");
+ p.loc = rgw_obj(b, "object");
+ p.loc_ofs = 0;
+ p.size = 512 * 1024;
+ m->objs[(uint64_t)i * 512 * 1024] = p;
+ }
+ m->obj_size = 5 * 1024 * 1024;
+
+ o.push_back(m);
+
+ o.push_back(new RGWObjManifest);
+}
+
+void RGWObjManifest::get_implicit_location(uint64_t cur_part_id, uint64_t cur_stripe, uint64_t ofs, string *override_prefix, rgw_obj_select *location)
+{
+ rgw_obj loc;
+
+ string& oid = loc.key.name;
+ string& ns = loc.key.ns;
+
+ if (!override_prefix || override_prefix->empty()) {
+ oid = prefix;
+ } else {
+ oid = *override_prefix;
+ }
+
+ if (!cur_part_id) {
+ if (ofs < max_head_size) {
+ location->set_placement_rule(head_placement_rule);
+ *location = obj;
+ return;
+ } else {
+ char buf[16];
+ snprintf(buf, sizeof(buf), "%d", (int)cur_stripe);
+ oid += buf;
+ ns = shadow_ns;
+ }
+ } else {
+ char buf[32];
+ if (cur_stripe == 0) {
+ snprintf(buf, sizeof(buf), ".%d", (int)cur_part_id);
+ oid += buf;
+ ns= RGW_OBJ_NS_MULTIPART;
+ } else {
+ snprintf(buf, sizeof(buf), ".%d_%d", (int)cur_part_id, (int)cur_stripe);
+ oid += buf;
+ ns = shadow_ns;
+ }
+ }
+
+ if (!tail_placement.bucket.name.empty()) {
+ loc.bucket = tail_placement.bucket;
+ } else {
+ loc.bucket = obj.bucket;
+ }
+
+ // Always overwrite instance with tail_instance
+ // to get the right shadow object location
+ loc.key.set_instance(tail_instance);
+
+ location->set_placement_rule(tail_placement.placement_rule);
+ *location = loc;
+}
+
+
+
+void rgw_log_entry::generate_test_instances(list<rgw_log_entry*>& o)
+{
+ rgw_log_entry *e = new rgw_log_entry;
+ e->object_owner = "object_owner";
+ e->bucket_owner = "bucket_owner";
+ e->bucket = "bucket";
+ e->remote_addr = "1.2.3.4";
+ e->user = "user";
+ e->obj = rgw_obj_key("obj");
+ e->uri = "http://uri/bucket/obj";
+ e->http_status = "200";
+ e->error_code = "error_code";
+ e->bytes_sent = 1024;
+ e->bytes_received = 512;
+ e->obj_size = 2048;
+ e->user_agent = "user_agent";
+ e->referrer = "referrer";
+ e->bucket_id = "10";
+ o.push_back(e);
+ o.push_back(new rgw_log_entry);
+}
+
+void ACLPermission::generate_test_instances(list<ACLPermission*>& o)
+{
+ ACLPermission *p = new ACLPermission;
+ p->set_permissions(RGW_PERM_WRITE_ACP);
+ o.push_back(p);
+ o.push_back(new ACLPermission);
+}
+
+void ACLGranteeType::generate_test_instances(list<ACLGranteeType*>& o)
+{
+ ACLGranteeType *t = new ACLGranteeType;
+ t->set(ACL_TYPE_CANON_USER);
+ o.push_back(t);
+ o.push_back(new ACLGranteeType);
+}
+
+/* the following is copied here from rgw_acl_s3.cc, to avoid having to have excessive linking
+ with everything it needs */
+
+#define RGW_URI_ALL_USERS "http://acs.amazonaws.com/groups/global/AllUsers"
+#define RGW_URI_AUTH_USERS "http://acs.amazonaws.com/groups/global/AuthenticatedUsers"
+
+static string rgw_uri_all_users = RGW_URI_ALL_USERS;
+static string rgw_uri_auth_users = RGW_URI_AUTH_USERS;
+
+ACLGroupTypeEnum ACLGrant::uri_to_group(string& uri)
+{
+ // this is required for backward compatibility
+ return ACLGrant_S3::uri_to_group(uri);
+}
+
+ACLGroupTypeEnum ACLGrant_S3::uri_to_group(string& uri)
+{
+ if (uri.compare(rgw_uri_all_users) == 0)
+ return ACL_GROUP_ALL_USERS;
+ else if (uri.compare(rgw_uri_auth_users) == 0)
+ return ACL_GROUP_AUTHENTICATED_USERS;
+
+ return ACL_GROUP_NONE;
+}
+
+void ACLGrant::generate_test_instances(list<ACLGrant*>& o)
+{
+ rgw_user id("rgw");
+ string name, email;
+ name = "Mr. RGW";
+ email = "r@gw";
+
+ ACLGrant *g1 = new ACLGrant;
+ g1->set_canon(id, name, RGW_PERM_READ);
+ g1->email = email;
+ o.push_back(g1);
+
+ ACLGrant *g2 = new ACLGrant;
+ g1->set_group(ACL_GROUP_AUTHENTICATED_USERS, RGW_PERM_WRITE);
+ o.push_back(g2);
+
+ o.push_back(new ACLGrant);
+}
+
+void RGWAccessControlList::generate_test_instances(list<RGWAccessControlList*>& o)
+{
+ RGWAccessControlList *acl = new RGWAccessControlList(NULL);
+
+ list<ACLGrant *> glist;
+ list<ACLGrant *>::iterator iter;
+
+ ACLGrant::generate_test_instances(glist);
+ for (iter = glist.begin(); iter != glist.end(); ++iter) {
+ ACLGrant *grant = *iter;
+ acl->add_grant(grant);
+
+ delete grant;
+ }
+ o.push_back(acl);
+ o.push_back(new RGWAccessControlList(NULL));
+}
+
+void ACLOwner::generate_test_instances(list<ACLOwner*>& o)
+{
+ ACLOwner *owner = new ACLOwner;
+ owner->id = "rgw";
+ owner->display_name = "Mr. RGW";
+ o.push_back(owner);
+ o.push_back(new ACLOwner);
+}
+
+void RGWAccessControlPolicy::generate_test_instances(list<RGWAccessControlPolicy*>& o)
+{
+ list<RGWAccessControlList *> acl_list;
+ list<RGWAccessControlList *>::iterator iter;
+ for (iter = acl_list.begin(); iter != acl_list.end(); ++iter) {
+ RGWAccessControlList::generate_test_instances(acl_list);
+ iter = acl_list.begin();
+
+ RGWAccessControlPolicy *p = new RGWAccessControlPolicy(NULL);
+ RGWAccessControlList *l = *iter;
+ p->acl = *l;
+
+ string name = "radosgw";
+ rgw_user id("rgw");
+ p->owner.set_name(name);
+ p->owner.set_id(id);
+
+ o.push_back(p);
+
+ delete l;
+ }
+
+ o.push_back(new RGWAccessControlPolicy(NULL));
+}
+
+
+void ObjectMetaInfo::generate_test_instances(list<ObjectMetaInfo*>& o)
+{
+ ObjectMetaInfo *m = new ObjectMetaInfo;
+ m->size = 1024 * 1024;
+ o.push_back(m);
+ o.push_back(new ObjectMetaInfo);
+}
+
+void ObjectCacheInfo::generate_test_instances(list<ObjectCacheInfo*>& o)
+{
+ using ceph::encode;
+ ObjectCacheInfo *i = new ObjectCacheInfo;
+ i->status = 0;
+ i->flags = CACHE_FLAG_MODIFY_XATTRS;
+ string s = "this is a string";
+ string s2 = "this is a another string";
+ bufferlist data, data2;
+ encode(s, data);
+ encode(s2, data2);
+ i->data = data;
+ i->xattrs["x1"] = data;
+ i->xattrs["x2"] = data2;
+ i->rm_xattrs["r2"] = data2;
+ i->rm_xattrs["r3"] = data;
+ i->meta.size = 512 * 1024;
+ o.push_back(i);
+ o.push_back(new ObjectCacheInfo);
+}
+
+void RGWCacheNotifyInfo::generate_test_instances(list<RGWCacheNotifyInfo*>& o)
+{
+ o.push_back(new RGWCacheNotifyInfo);
+}
+
+void RGWAccessKey::generate_test_instances(list<RGWAccessKey*>& o)
+{
+ RGWAccessKey *k = new RGWAccessKey;
+ k->id = "id";
+ k->key = "key";
+ k->subuser = "subuser";
+ o.push_back(k);
+ o.push_back(new RGWAccessKey);
+}
+
+void RGWSubUser::generate_test_instances(list<RGWSubUser*>& o)
+{
+ RGWSubUser *u = new RGWSubUser;
+ u->name = "name";
+ u->perm_mask = 0xf;
+ o.push_back(u);
+ o.push_back(new RGWSubUser);
+}
+
+void RGWUserInfo::generate_test_instances(list<RGWUserInfo*>& o)
+{
+ RGWUserInfo *i = new RGWUserInfo;
+ i->user_id = "user_id";
+ i->display_name = "display_name";
+ i->user_email = "user@email";
+ RGWAccessKey k1, k2;
+ k1.id = "id1";
+ k1.key = "key1";
+ k2.id = "id2";
+ k2.subuser = "subuser";
+ RGWSubUser u;
+ u.name = "id2";
+ u.perm_mask = 0x1;
+ i->access_keys[k1.id] = k1;
+ i->swift_keys[k2.id] = k2;
+ i->subusers[u.name] = u;
+ o.push_back(i);
+
+ o.push_back(new RGWUserInfo);
+}
+
+void rgw_bucket::generate_test_instances(list<rgw_bucket*>& o)
+{
+ rgw_bucket *b = new rgw_bucket;
+ init_bucket(b, "tenant", "name", "pool", ".index_pool", "marker", "123");
+ o.push_back(b);
+ o.push_back(new rgw_bucket);
+}
+
+void RGWBucketInfo::generate_test_instances(list<RGWBucketInfo*>& o)
+{
+ RGWBucketInfo *i = new RGWBucketInfo;
+ init_bucket(&i->bucket, "tenant", "bucket", "pool", ".index_pool", "marker", "10");
+ i->owner = "owner";
+ i->flags = BUCKET_SUSPENDED;
+ o.push_back(i);
+ o.push_back(new RGWBucketInfo);
+}
+
+void RGWZoneGroup::generate_test_instances(list<RGWZoneGroup*>& o)
+{
+ RGWZoneGroup *r = new RGWZoneGroup;
+ o.push_back(r);
+ o.push_back(new RGWZoneGroup);
+}
+
+void RGWZone::generate_test_instances(list<RGWZone*> &o)
+{
+ RGWZone *z = new RGWZone;
+ o.push_back(z);
+ o.push_back(new RGWZone);
+}
+
+void RGWRealm::generate_test_instances(list<RGWRealm*> &o)
+{
+ RGWRealm *z = new RGWRealm;
+ o.push_back(z);
+ o.push_back(new RGWRealm);
+}
+
+void RGWPeriod::generate_test_instances(list<RGWPeriod*> &o)
+{
+ RGWPeriod *z = new RGWPeriod;
+ o.push_back(z);
+ o.push_back(new RGWPeriod);
+}
+
+void RGWZoneParams::generate_test_instances(list<RGWZoneParams*> &o)
+{
+ o.push_back(new RGWZoneParams);
+ o.push_back(new RGWZoneParams);
+}
+
+void RGWOLHInfo::generate_test_instances(list<RGWOLHInfo*> &o)
+{
+ RGWOLHInfo *olh = new RGWOLHInfo;
+ olh->removed = false;
+ o.push_back(olh);
+ o.push_back(new RGWOLHInfo);
+}
+
+void RGWBucketEnt::generate_test_instances(list<RGWBucketEnt*>& o)
+{
+ RGWBucketEnt *e = new RGWBucketEnt;
+ init_bucket(&e->bucket, "tenant", "bucket", "pool", ".index_pool", "marker", "10");
+ e->size = 1024;
+ e->size_rounded = 4096;
+ e->count = 1;
+ o.push_back(e);
+ o.push_back(new RGWBucketEnt);
+}
+
+void RGWUploadPartInfo::generate_test_instances(list<RGWUploadPartInfo*>& o)
+{
+ RGWUploadPartInfo *i = new RGWUploadPartInfo;
+ i->num = 1;
+ i->size = 10 * 1024 * 1024;
+ i->etag = "etag";
+ o.push_back(i);
+ o.push_back(new RGWUploadPartInfo);
+}
+
+void rgw_obj::generate_test_instances(list<rgw_obj*>& o)
+{
+ rgw_bucket b;
+ init_bucket(&b, "tenant", "bucket", "pool", ".index_pool", "marker", "10");
+ rgw_obj *obj = new rgw_obj(b, "object");
+ o.push_back(obj);
+ o.push_back(new rgw_obj);
+}
+
+void rgw_meta_sync_info::generate_test_instances(list<rgw_meta_sync_info*>& o)
+{
+ auto info = new rgw_meta_sync_info;
+ info->state = rgw_meta_sync_info::StateBuildingFullSyncMaps;
+ info->period = "periodid";
+ info->realm_epoch = 5;
+ o.push_back(info);
+ o.push_back(new rgw_meta_sync_info);
+}
+
+void rgw_meta_sync_marker::generate_test_instances(list<rgw_meta_sync_marker*>& o)
+{
+ auto marker = new rgw_meta_sync_marker;
+ marker->state = rgw_meta_sync_marker::IncrementalSync;
+ marker->marker = "01234";
+ marker->realm_epoch = 5;
+ o.push_back(marker);
+ o.push_back(new rgw_meta_sync_marker);
+}
+
+void rgw_meta_sync_status::generate_test_instances(list<rgw_meta_sync_status*>& o)
+{
+ o.push_back(new rgw_meta_sync_status);
+}
+
+void rgw_data_sync_info::generate_test_instances(list<rgw_data_sync_info*>& o)
+{
+ auto info = new rgw_data_sync_info;
+ info->state = rgw_data_sync_info::StateBuildingFullSyncMaps;
+ info->num_shards = 8;
+ o.push_back(info);
+ o.push_back(new rgw_data_sync_info);
+}
+
+void rgw_data_sync_marker::generate_test_instances(list<rgw_data_sync_marker*>& o)
+{
+ auto marker = new rgw_data_sync_marker;
+ marker->state = rgw_data_sync_marker::IncrementalSync;
+ marker->marker = "01234";
+ marker->pos = 5;
+ o.push_back(marker);
+ o.push_back(new rgw_data_sync_marker);
+}
+
+void rgw_data_sync_status::generate_test_instances(list<rgw_data_sync_status*>& o)
+{
+ o.push_back(new rgw_data_sync_status);
+}
diff --git a/src/rgw/rgw_dmclock.h b/src/rgw/rgw_dmclock.h
new file mode 100644
index 00000000..79c0aeb7
--- /dev/null
+++ b/src/rgw/rgw_dmclock.h
@@ -0,0 +1,54 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ * Copyright (C) 2019 SUSE LLC
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef RGW_DMCLOCK_H
+#define RGW_DMCLOCK_H
+#include "dmclock/src/dmclock_server.h"
+
+namespace rgw::dmclock {
+// TODO: implement read vs write
+enum class client_id {
+ admin, //< /admin apis
+ auth, //< swift auth, sts
+ data, //< PutObj, GetObj
+ metadata, //< bucket operations, object metadata
+ count
+};
+
+// TODO move these to dmclock/types or so in submodule
+using crimson::dmclock::Cost;
+using crimson::dmclock::ClientInfo;
+
+enum class scheduler_t {
+ none,
+ throttler,
+ dmclock
+};
+
+inline scheduler_t get_scheduler_t(CephContext* const cct)
+{
+ const auto scheduler_type = cct->_conf.get_val<std::string>("rgw_scheduler_type");
+ if (scheduler_type == "dmclock")
+ return scheduler_t::dmclock;
+ else if (scheduler_type == "throttler")
+ return scheduler_t::throttler;
+ else
+ return scheduler_t::none;
+}
+
+} // namespace rgw::dmclock
+
+#endif /* RGW_DMCLOCK_H */
diff --git a/src/rgw/rgw_dmclock_async_scheduler.cc b/src/rgw/rgw_dmclock_async_scheduler.cc
new file mode 100644
index 00000000..18ba5a5e
--- /dev/null
+++ b/src/rgw/rgw_dmclock_async_scheduler.cc
@@ -0,0 +1,175 @@
+
+#include "common/async/completion.h"
+#include "rgw_dmclock_async_scheduler.h"
+#include "rgw_dmclock_scheduler.h"
+
+namespace rgw::dmclock {
+
+AsyncScheduler::~AsyncScheduler()
+{
+ cancel();
+ if (observer) {
+ cct->_conf.remove_observer(this);
+ }
+}
+
+const char** AsyncScheduler::get_tracked_conf_keys() const
+{
+ if (observer) {
+ return observer->get_tracked_conf_keys();
+ }
+ static const char* keys[] = { "rgw_max_concurrent_requests", nullptr };
+ return keys;
+}
+
+void AsyncScheduler::handle_conf_change(const ConfigProxy& conf,
+ const std::set<std::string>& changed)
+{
+ if (observer) {
+ observer->handle_conf_change(conf, changed);
+ }
+ if (changed.count("rgw_max_concurrent_requests")) {
+ auto new_max = conf.get_val<int64_t>("rgw_max_concurrent_requests");
+ max_requests = new_max > 0 ? new_max : std::numeric_limits<int64_t>::max();
+ }
+ queue.update_client_infos();
+ schedule(crimson::dmclock::TimeZero);
+}
+
+int AsyncScheduler::schedule_request_impl(const client_id& client,
+ const ReqParams& params,
+ const Time& time, const Cost& cost,
+ optional_yield yield_ctx)
+{
+ ceph_assert(yield_ctx);
+
+ auto &yield = yield_ctx.get_yield_context();
+ boost::system::error_code ec;
+ async_request(client, params, time, cost, yield[ec]);
+
+ if (ec){
+ if (ec == boost::system::errc::resource_unavailable_try_again)
+ return -EAGAIN;
+ else
+ return -ec.value();
+ }
+
+ return 0;
+}
+
+void AsyncScheduler::request_complete()
+{
+ --outstanding_requests;
+ schedule(crimson::dmclock::TimeZero);
+}
+
+void AsyncScheduler::cancel()
+{
+ ClientSums sums;
+
+ queue.remove_by_req_filter([&] (RequestRef&& request) {
+ inc(sums, request->client, request->cost);
+ auto c = static_cast<Completion*>(request.release());
+ Completion::dispatch(std::unique_ptr<Completion>{c},
+ boost::asio::error::operation_aborted,
+ PhaseType::priority);
+ return true;
+ });
+ timer.cancel();
+
+ for (size_t i = 0; i < client_count; i++) {
+ if (auto c = counters(static_cast<client_id>(i))) {
+ on_cancel(c, sums[i]);
+ }
+ }
+}
+
+void AsyncScheduler::cancel(const client_id& client)
+{
+ ClientSum sum;
+
+ queue.remove_by_client(client, false, [&] (RequestRef&& request) {
+ sum.count++;
+ sum.cost += request->cost;
+ auto c = static_cast<Completion*>(request.release());
+ Completion::dispatch(std::unique_ptr<Completion>{c},
+ boost::asio::error::operation_aborted,
+ PhaseType::priority);
+ });
+ if (auto c = counters(client)) {
+ on_cancel(c, sum);
+ }
+ schedule(crimson::dmclock::TimeZero);
+}
+
+void AsyncScheduler::schedule(const Time& time)
+{
+ timer.expires_at(Clock::from_double(time));
+ timer.async_wait([this] (boost::system::error_code ec) {
+ // process requests unless the wait was canceled. note that a canceled
+ // wait may execute after this AsyncScheduler destructs
+ if (ec != boost::asio::error::operation_aborted) {
+ process(get_time());
+ }
+ });
+}
+
+void AsyncScheduler::process(const Time& now)
+{
+ // must run in the executor. we should only invoke completion handlers if the
+ // executor is running
+ assert(get_executor().running_in_this_thread());
+
+ ClientSums rsums, psums;
+
+ while (outstanding_requests < max_requests) {
+ auto pull = queue.pull_request(now);
+
+ if (pull.is_none()) {
+ // no pending requests, cancel the timer
+ timer.cancel();
+ break;
+ }
+ if (pull.is_future()) {
+ // update the timer based on the future time
+ schedule(pull.getTime());
+ break;
+ }
+ ++outstanding_requests;
+
+ // complete the request
+ auto& r = pull.get_retn();
+ auto client = r.client;
+ auto phase = r.phase;
+ auto started = r.request->started;
+ auto cost = r.request->cost;
+ auto c = static_cast<Completion*>(r.request.release());
+ Completion::post(std::unique_ptr<Completion>{c},
+ boost::system::error_code{}, phase);
+
+ if (auto c = counters(client)) {
+ auto lat = Clock::from_double(now) - Clock::from_double(started);
+ if (phase == PhaseType::reservation) {
+ inc(rsums, client, cost);
+ c->tinc(queue_counters::l_res_latency, lat);
+ } else {
+ inc(psums, client, cost);
+ c->tinc(queue_counters::l_prio_latency, lat);
+ }
+ }
+ }
+
+ if (outstanding_requests >= max_requests) {
+ if(auto c = counters(client_id::count)){
+ c->inc(throttle_counters::l_throttle);
+ }
+ }
+
+ for (size_t i = 0; i < client_count; i++) {
+ if (auto c = counters(static_cast<client_id>(i))) {
+ on_process(c, rsums[i], psums[i]);
+ }
+ }
+}
+
+} // namespace rgw::dmclock
diff --git a/src/rgw/rgw_dmclock_async_scheduler.h b/src/rgw/rgw_dmclock_async_scheduler.h
new file mode 100644
index 00000000..1d454acd
--- /dev/null
+++ b/src/rgw/rgw_dmclock_async_scheduler.h
@@ -0,0 +1,217 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2018 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef RGW_DMCLOCK_ASYNC_SCHEDULER_H
+#define RGW_DMCLOCK_ASYNC_SCHEDULER_H
+
+#include "common/async/completion.h"
+
+#include <boost/asio.hpp>
+#include "rgw_dmclock_scheduler.h"
+#include "rgw_dmclock_scheduler_ctx.h"
+
+namespace rgw::dmclock {
+ namespace async = ceph::async;
+
+/*
+ * A dmclock request scheduling service for use with boost::asio.
+ *
+ * An asynchronous dmclock priority queue, where scheduled requests complete
+ * on a boost::asio executor.
+ */
+class AsyncScheduler : public md_config_obs_t, public Scheduler {
+ public:
+ template <typename ...Args> // args forwarded to PullPriorityQueue ctor
+ AsyncScheduler(CephContext *cct, boost::asio::io_context& context,
+ GetClientCounters&& counters, md_config_obs_t *observer,
+ Args&& ...args);
+ ~AsyncScheduler();
+
+ using executor_type = boost::asio::io_context::executor_type;
+
+ /// return the default executor for async_request() callbacks
+ executor_type get_executor() noexcept {
+ return timer.get_executor();
+ }
+
+ /// submit an async request for dmclock scheduling. the given completion
+ /// handler will be invoked with (error_code, PhaseType) when the request
+ /// is ready or canceled. on success, this grants a throttle unit that must
+ /// be returned with a call to request_complete()
+ template <typename CompletionToken>
+ auto async_request(const client_id& client, const ReqParams& params,
+ const Time& time, Cost cost, CompletionToken&& token);
+
+ /// returns a throttle unit granted by async_request()
+ void request_complete() override;
+
+ /// cancel all queued requests, invoking their completion handlers with an
+ /// operation_aborted error and default-constructed result
+ void cancel();
+
+ /// cancel all queued requests for a given client, invoking their completion
+ /// handler with an operation_aborted error and default-constructed result
+ void cancel(const client_id& client);
+
+ const char** get_tracked_conf_keys() const override;
+ void handle_conf_change(const ConfigProxy& conf,
+ const std::set<std::string>& changed) override;
+
+ private:
+ int schedule_request_impl(const client_id& client, const ReqParams& params,
+ const Time& time, const Cost& cost,
+ optional_yield yield_ctx) override;
+
+ static constexpr bool IsDelayed = false;
+ using Queue = crimson::dmclock::PullPriorityQueue<client_id, Request, IsDelayed>;
+ using RequestRef = typename Queue::RequestRef;
+ Queue queue; //< dmclock priority queue
+
+ using Signature = void(boost::system::error_code, PhaseType);
+ using Completion = async::Completion<Signature, async::AsBase<Request>>;
+
+ using Clock = ceph::coarse_real_clock;
+#if BOOST_VERSION < 107000
+ using Timer = boost::asio::basic_waitable_timer<Clock>;
+#else
+ using Timer = boost::asio::basic_waitable_timer<Clock,
+ boost::asio::wait_traits<Clock>, executor_type>;
+#endif
+ Timer timer; //< timer for the next scheduled request
+
+ CephContext *const cct;
+ md_config_obs_t *const observer; //< observer to update ClientInfoFunc
+ GetClientCounters counters; //< provides per-client perf counters
+
+ /// max request throttle
+ std::atomic<int64_t> max_requests;
+ std::atomic<int64_t> outstanding_requests = 0;
+
+ /// set a timer to process the next request
+ void schedule(const Time& time);
+
+ /// process ready requests, then schedule the next pending request
+ void process(const Time& now);
+};
+
+
+template <typename ...Args>
+AsyncScheduler::AsyncScheduler(CephContext *cct, boost::asio::io_context& context,
+ GetClientCounters&& counters,
+ md_config_obs_t *observer, Args&& ...args)
+ : queue(std::forward<Args>(args)...),
+ timer(context), cct(cct), observer(observer),
+ counters(std::move(counters)),
+ max_requests(cct->_conf.get_val<int64_t>("rgw_max_concurrent_requests"))
+{
+ if (max_requests <= 0) {
+ max_requests = std::numeric_limits<int64_t>::max();
+ }
+ if (observer) {
+ cct->_conf.add_observer(this);
+ }
+}
+
+template <typename CompletionToken>
+auto AsyncScheduler::async_request(const client_id& client,
+ const ReqParams& params,
+ const Time& time, Cost cost,
+ CompletionToken&& token)
+{
+ boost::asio::async_completion<CompletionToken, Signature> init(token);
+
+ auto ex1 = get_executor();
+ auto& handler = init.completion_handler;
+
+ // allocate the Request and add it to the queue
+ auto completion = Completion::create(ex1, std::move(handler),
+ Request{client, time, cost});
+ // cast to unique_ptr<Request>
+ auto req = RequestRef{std::move(completion)};
+ int r = queue.add_request(std::move(req), client, params, time, cost);
+ if (r == 0) {
+ // schedule an immediate call to process() on the executor
+ schedule(crimson::dmclock::TimeZero);
+ if (auto c = counters(client)) {
+ c->inc(queue_counters::l_qlen);
+ c->inc(queue_counters::l_cost, cost);
+ }
+ } else {
+ // post the error code
+ boost::system::error_code ec(r, boost::system::system_category());
+ // cast back to Completion
+ auto completion = static_cast<Completion*>(req.release());
+ async::post(std::unique_ptr<Completion>{completion},
+ ec, PhaseType::priority);
+ if (auto c = counters(client)) {
+ c->inc(queue_counters::l_limit);
+ c->inc(queue_counters::l_limit_cost, cost);
+ }
+ }
+
+ return init.result.get();
+}
+
+class SimpleThrottler : public md_config_obs_t, public dmclock::Scheduler {
+public:
+ SimpleThrottler(CephContext *cct) :
+ max_requests(cct->_conf.get_val<int64_t>("rgw_max_concurrent_requests")),
+ counters(cct, "simple-throttler")
+ {
+ if (max_requests <= 0) {
+ max_requests = std::numeric_limits<int64_t>::max();
+ }
+ cct->_conf.add_observer(this);
+ }
+
+ const char** get_tracked_conf_keys() const override {
+ static const char* keys[] = { "rgw_max_concurrent_requests", nullptr };
+ return keys;
+ }
+
+ void handle_conf_change(const ConfigProxy& conf,
+ const std::set<std::string>& changed) override
+ {
+ if (changed.count("rgw_max_concurrent_requests")) {
+ auto new_max = conf.get_val<int64_t>("rgw_max_concurrent_requests");
+ max_requests = new_max > 0 ? new_max : std::numeric_limits<int64_t>::max();
+ }
+ }
+
+ void request_complete() override {
+ --outstanding_requests;
+ }
+
+private:
+ int schedule_request_impl(const client_id&, const ReqParams&,
+ const Time&, const Cost&,
+ optional_yield) override {
+ if (outstanding_requests++ >= max_requests) {
+ if (auto c = counters();
+ c != nullptr) {
+ c->inc(throttle_counters::l_throttle);
+ }
+ return -EAGAIN;
+ }
+
+ return 0 ;
+ }
+
+ std::atomic<int64_t> max_requests;
+ std::atomic<int64_t> outstanding_requests = 0;
+ ThrottleCounters counters;
+};
+
+} // namespace rgw::dmclock
+#endif /* RGW_DMCLOCK_ASYNC_SCHEDULER_H */
diff --git a/src/rgw/rgw_dmclock_scheduler.h b/src/rgw/rgw_dmclock_scheduler.h
new file mode 100644
index 00000000..aeeb695e
--- /dev/null
+++ b/src/rgw/rgw_dmclock_scheduler.h
@@ -0,0 +1,89 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ * (C) 2019 SUSE LLC
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef RGW_DMCLOCK_SCHEDULER_H
+#define RGW_DMCLOCK_SCHEDULER_H
+
+#include "common/ceph_time.h"
+#include "common/ceph_context.h"
+#include "common/config.h"
+#include "common/async/yield_context.h"
+#include "rgw_dmclock.h"
+
+namespace rgw::dmclock {
+
+using crimson::dmclock::ReqParams;
+using crimson::dmclock::PhaseType;
+using crimson::dmclock::AtLimit;
+using crimson::dmclock::Time;
+using crimson::dmclock::get_time;
+
+/// function to provide client counters
+using GetClientCounters = std::function<PerfCounters*(client_id)>;
+
+struct Request {
+ client_id client;
+ Time started;
+ Cost cost;
+};
+
+enum class ReqState {
+ Wait,
+ Ready,
+ Cancelled
+};
+
+template <typename F>
+class Completer {
+public:
+ Completer(F &&f): f(std::move(f)) {}
+ // Default constructor is needed as we need to create an empty completer
+ // that'll be move assigned later in process request
+ Completer() = default;
+ ~Completer() {
+ if (f) {
+ f();
+ }
+ }
+ Completer(const Completer&) = delete;
+ Completer& operator=(const Completer&) = delete;
+ Completer(Completer&& other) = default;
+ Completer& operator=(Completer&& other) = default;
+private:
+ F f;
+};
+
+using SchedulerCompleter = Completer<std::function<void()>>;
+
+class Scheduler {
+public:
+ auto schedule_request(const client_id& client, const ReqParams& params,
+ const Time& time, const Cost& cost,
+ optional_yield yield)
+ {
+ int r = schedule_request_impl(client,params,time,cost,yield);
+ return std::make_pair(r,SchedulerCompleter(std::bind(&Scheduler::request_complete,this)));
+ }
+ virtual void request_complete() {};
+
+ virtual ~Scheduler() {};
+private:
+ virtual int schedule_request_impl(const client_id&, const ReqParams&,
+ const Time&, const Cost&,
+ optional_yield) = 0;
+};
+
+} // namespace rgw::dmclock
+
+#endif // RGW_DMCLOCK_SCHEDULER_H
diff --git a/src/rgw/rgw_dmclock_scheduler_ctx.cc b/src/rgw/rgw_dmclock_scheduler_ctx.cc
new file mode 100644
index 00000000..3ecc977f
--- /dev/null
+++ b/src/rgw/rgw_dmclock_scheduler_ctx.cc
@@ -0,0 +1,177 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ * (C) 2019 SUSE Linux LLC
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+#include "rgw_dmclock_scheduler_ctx.h"
+
+namespace rgw::dmclock {
+
+ClientConfig::ClientConfig(CephContext *cct)
+{
+ update(cct->_conf);
+}
+
+ClientInfo* ClientConfig::operator()(client_id client)
+{
+ return &clients[static_cast<size_t>(client)];
+}
+
+const char** ClientConfig::get_tracked_conf_keys() const
+{
+ static const char* keys[] = {
+ "rgw_dmclock_admin_res",
+ "rgw_dmclock_admin_wgt",
+ "rgw_dmclock_admin_lim",
+ "rgw_dmclock_auth_res",
+ "rgw_dmclock_auth_wgt",
+ "rgw_dmclock_auth_lim",
+ "rgw_dmclock_data_res",
+ "rgw_dmclock_data_wgt",
+ "rgw_dmclock_data_lim",
+ "rgw_dmclock_metadata_res",
+ "rgw_dmclock_metadata_wgt",
+ "rgw_dmclock_metadata_lim",
+ "rgw_max_concurrent_requests",
+ nullptr
+ };
+ return keys;
+}
+
+void ClientConfig::update(const ConfigProxy& conf)
+{
+ clients.clear();
+ static_assert(0 == static_cast<int>(client_id::admin));
+ clients.emplace_back(conf.get_val<double>("rgw_dmclock_admin_res"),
+ conf.get_val<double>("rgw_dmclock_admin_wgt"),
+ conf.get_val<double>("rgw_dmclock_admin_lim"));
+ static_assert(1 == static_cast<int>(client_id::auth));
+ clients.emplace_back(conf.get_val<double>("rgw_dmclock_auth_res"),
+ conf.get_val<double>("rgw_dmclock_auth_wgt"),
+ conf.get_val<double>("rgw_dmclock_auth_lim"));
+ static_assert(2 == static_cast<int>(client_id::data));
+ clients.emplace_back(conf.get_val<double>("rgw_dmclock_data_res"),
+ conf.get_val<double>("rgw_dmclock_data_wgt"),
+ conf.get_val<double>("rgw_dmclock_data_lim"));
+ static_assert(3 == static_cast<int>(client_id::metadata));
+ clients.emplace_back(conf.get_val<double>("rgw_dmclock_metadata_res"),
+ conf.get_val<double>("rgw_dmclock_metadata_wgt"),
+ conf.get_val<double>("rgw_dmclock_metadata_lim"));
+}
+
+void ClientConfig::handle_conf_change(const ConfigProxy& conf,
+ const std::set<std::string>& changed)
+{
+ update(conf);
+}
+
+ClientCounters::ClientCounters(CephContext *cct)
+{
+ clients[static_cast<size_t>(client_id::admin)] =
+ queue_counters::build(cct, "dmclock-admin");
+ clients[static_cast<size_t>(client_id::auth)] =
+ queue_counters::build(cct, "dmclock-auth");
+ clients[static_cast<size_t>(client_id::data)] =
+ queue_counters::build(cct, "dmclock-data");
+ clients[static_cast<size_t>(client_id::metadata)] =
+ queue_counters::build(cct, "dmclock-metadata");
+ clients[static_cast<size_t>(client_id::count)] =
+ throttle_counters::build(cct, "dmclock-scheduler");
+}
+
+void inc(ClientSums& sums, client_id client, Cost cost)
+{
+ auto& sum = sums[static_cast<size_t>(client)];
+ sum.count++;
+ sum.cost += cost;
+}
+
+void on_cancel(PerfCounters *c, const ClientSum& sum)
+{
+ if (sum.count) {
+ c->dec(queue_counters::l_qlen, sum.count);
+ c->inc(queue_counters::l_cancel, sum.count);
+ }
+ if (sum.cost) {
+ c->dec(queue_counters::l_cost, sum.cost);
+ c->inc(queue_counters::l_cancel_cost, sum.cost);
+ }
+}
+
+void on_process(PerfCounters* c, const ClientSum& rsum, const ClientSum& psum)
+{
+ if (rsum.count) {
+ c->inc(queue_counters::l_res, rsum.count);
+ }
+ if (rsum.cost) {
+ c->inc(queue_counters::l_res_cost, rsum.cost);
+ }
+ if (psum.count) {
+ c->inc(queue_counters::l_prio, psum.count);
+ }
+ if (psum.cost) {
+ c->inc(queue_counters::l_prio_cost, psum.cost);
+ }
+ if (rsum.count + psum.count) {
+ c->dec(queue_counters::l_qlen, rsum.count + psum.count);
+ }
+ if (rsum.cost + psum.cost) {
+ c->dec(queue_counters::l_cost, rsum.cost + psum.cost);
+ }
+}
+} // namespace rgw::dmclock
+
+namespace queue_counters {
+
+PerfCountersRef build(CephContext *cct, const std::string& name)
+{
+ if (!cct->_conf->throttler_perf_counter) {
+ return {};
+ }
+
+ PerfCountersBuilder b(cct, name, l_first, l_last);
+ b.add_u64(l_qlen, "qlen", "Queue size");
+ b.add_u64(l_cost, "cost", "Cost of queued requests");
+ b.add_u64_counter(l_res, "res", "Requests satisfied by reservation");
+ b.add_u64_counter(l_res_cost, "res_cost", "Cost satisfied by reservation");
+ b.add_u64_counter(l_prio, "prio", "Requests satisfied by priority");
+ b.add_u64_counter(l_prio_cost, "prio_cost", "Cost satisfied by priority");
+ b.add_u64_counter(l_limit, "limit", "Requests rejected by limit");
+ b.add_u64_counter(l_limit_cost, "limit_cost", "Cost rejected by limit");
+ b.add_u64_counter(l_cancel, "cancel", "Cancels");
+ b.add_u64_counter(l_cancel_cost, "cancel_cost", "Canceled cost");
+ b.add_time_avg(l_res_latency, "res latency", "Reservation latency");
+ b.add_time_avg(l_prio_latency, "prio latency", "Priority latency");
+
+ auto logger = PerfCountersRef{ b.create_perf_counters(), cct };
+ cct->get_perfcounters_collection()->add(logger.get());
+ return logger;
+}
+
+} // namespace queue_counters
+
+namespace throttle_counters {
+
+PerfCountersRef build(CephContext *cct, const std::string& name)
+{
+ if (!cct->_conf->throttler_perf_counter) {
+ return {};
+ }
+
+ PerfCountersBuilder b(cct, name, l_first, l_last);
+ b.add_u64(l_throttle, "throttle", "Requests throttled");
+
+ auto logger = PerfCountersRef{ b.create_perf_counters(), cct };
+ cct->get_perfcounters_collection()->add(logger.get());
+ return logger;
+}
+
+} // namespace throttle_counters
diff --git a/src/rgw/rgw_dmclock_scheduler_ctx.h b/src/rgw/rgw_dmclock_scheduler_ctx.h
new file mode 100644
index 00000000..fe34e180
--- /dev/null
+++ b/src/rgw/rgw_dmclock_scheduler_ctx.h
@@ -0,0 +1,118 @@
+#ifndef RGW_DMCLOCK_SCHEDULER_CTX_H
+#define RGW_DMCLOCK_SCHEDULER_CTX_H
+
+#include "common/perf_counters.h"
+#include "common/ceph_context.h"
+#include "common/config.h"
+#include "rgw_dmclock.h"
+
+namespace queue_counters {
+
+ enum {
+ l_first = 427150,
+ l_qlen,
+ l_cost,
+ l_res,
+ l_res_cost,
+ l_prio,
+ l_prio_cost,
+ l_limit,
+ l_limit_cost,
+ l_cancel,
+ l_cancel_cost,
+ l_res_latency,
+ l_prio_latency,
+ l_last,
+ };
+
+ PerfCountersRef build(CephContext *cct, const std::string& name);
+
+} // namespace queue_counters
+
+namespace throttle_counters {
+ enum {
+ l_first = 437219,
+ l_throttle,
+ l_last
+ };
+
+ PerfCountersRef build(CephContext *cct, const std::string& name);
+} // namespace throttle
+
+namespace rgw::dmclock {
+
+// the last client counter would be for global scheduler stats
+static constexpr auto counter_size = static_cast<size_t>(client_id::count) + 1;
+/// array of per-client counters to serve as GetClientCounters
+class ClientCounters {
+ std::array<PerfCountersRef, counter_size> clients;
+ public:
+ ClientCounters(CephContext *cct);
+
+ PerfCounters* operator()(client_id client) const {
+ return clients[static_cast<size_t>(client)].get();
+ }
+};
+
+class ThrottleCounters {
+ PerfCountersRef counters;
+public:
+ ThrottleCounters(CephContext* const cct,const std::string& name):
+ counters(throttle_counters::build(cct, name)) {}
+
+ PerfCounters* operator()() const {
+ return counters.get();
+ }
+};
+
+
+struct ClientSum {
+ uint64_t count{0};
+ Cost cost{0};
+};
+
+constexpr auto client_count = static_cast<size_t>(client_id::count);
+using ClientSums = std::array<ClientSum, client_count>;
+
+void inc(ClientSums& sums, client_id client, Cost cost);
+void on_cancel(PerfCounters *c, const ClientSum& sum);
+void on_process(PerfCounters* c, const ClientSum& rsum, const ClientSum& psum);
+
+
+class ClientConfig : public md_config_obs_t {
+ std::vector<ClientInfo> clients;
+
+ void update(const ConfigProxy &conf);
+
+public:
+ ClientConfig(CephContext *cct);
+
+ ClientInfo* operator()(client_id client);
+
+ const char** get_tracked_conf_keys() const override;
+ void handle_conf_change(const ConfigProxy& conf,
+ const std::set<std::string>& changed) override;
+};
+
+class SchedulerCtx {
+public:
+ SchedulerCtx(CephContext* const cct) : sched_t(get_scheduler_t(cct))
+ {
+ if(sched_t == scheduler_t::dmclock) {
+ dmc_client_config = std::make_shared<ClientConfig>(cct);
+ // we don't have a move only cref std::function yet
+ dmc_client_counters = std::make_optional<ClientCounters>(cct);
+ }
+ }
+ // We need to construct a std::function from a NonCopyable object
+ ClientCounters& get_dmc_client_counters() { return dmc_client_counters.value(); }
+ ClientConfig* const get_dmc_client_config() const { return dmc_client_config.get(); }
+private:
+ scheduler_t sched_t;
+ std::shared_ptr<ClientConfig> dmc_client_config {nullptr};
+ std::optional<ClientCounters> dmc_client_counters {std::nullopt};
+};
+
+} // namespace rgw::dmclock
+
+#endif /* RGW_DMCLOCK_SCHEDULER_CTX_H */
diff --git a/src/rgw/rgw_dmclock_sync_scheduler.cc b/src/rgw/rgw_dmclock_sync_scheduler.cc
new file mode 100644
index 00000000..650a995d
--- /dev/null
+++ b/src/rgw/rgw_dmclock_sync_scheduler.cc
@@ -0,0 +1,114 @@
+#include "rgw_dmclock_scheduler.h"
+#include "rgw_dmclock_sync_scheduler.h"
+#include "rgw_dmclock_scheduler_ctx.h"
+
+namespace rgw::dmclock {
+
+SyncScheduler::~SyncScheduler()
+{
+ cancel();
+}
+
+int SyncScheduler::add_request(const client_id& client, const ReqParams& params,
+ const Time& time, Cost cost)
+{
+ std::mutex req_mtx;
+ std::condition_variable req_cv;
+ ReqState rstate {ReqState::Wait};
+ auto req = SyncRequest{client, time, cost, req_mtx, req_cv, rstate, counters};
+ int r = queue.add_request_time(req, client, params, time, cost);
+ if (r == 0) {
+ if (auto c = counters(client)) {
+ c->inc(queue_counters::l_qlen);
+ c->inc(queue_counters::l_cost, cost);
+ }
+ queue.request_completed();
+ // Perform a blocking wait until the request callback is called
+ {
+ std::unique_lock lock{req_mtx};
+ req_cv.wait(lock, [&rstate] {return rstate != ReqState::Wait;});
+ }
+ if (rstate == ReqState::Cancelled) {
+ //FIXME: decide on error code for cancelled request
+ r = -ECONNABORTED;
+ }
+ } else {
+ // post the error code
+ if (auto c = counters(client)) {
+ c->inc(queue_counters::l_limit);
+ c->inc(queue_counters::l_limit_cost, cost);
+ }
+ }
+ return r;
+}
+
+void SyncScheduler::handle_request_cb(const client_id &c,
+ std::unique_ptr<SyncRequest> req,
+ PhaseType phase, Cost cost)
+{
+ { std::lock_guard<std::mutex> lg(req->req_mtx);
+ req->req_state = ReqState::Ready;
+ req->req_cv.notify_one();
+ }
+
+ if (auto ctr = req->counters(c)) {
+ auto lat = Clock::from_double(get_time()) - Clock::from_double(req->started);
+ if (phase == PhaseType::reservation){
+ ctr->tinc(queue_counters::l_res_latency, lat);
+ ctr->inc(queue_counters::l_res);
+ if (cost) ctr->inc(queue_counters::l_res_cost);
+ } else if (phase == PhaseType::priority){
+ ctr->tinc(queue_counters::l_prio_latency, lat);
+ ctr->inc(queue_counters::l_prio);
+ if (cost) ctr->inc(queue_counters::l_prio_cost);
+ }
+ ctr->dec(queue_counters::l_qlen);
+ if (cost) ctr->dec(queue_counters::l_cost);
+ }
+}
+
+
+void SyncScheduler::cancel(const client_id& client)
+{
+ ClientSum sum;
+
+ queue.remove_by_client(client, false, [&](RequestRef&& request)
+ {
+ sum.count++;
+ sum.cost += request->cost;
+ {
+ std::lock_guard <std::mutex> lg(request->req_mtx);
+ request->req_state = ReqState::Cancelled;
+ request->req_cv.notify_one();
+ }
+ });
+ if (auto c = counters(client)) {
+ on_cancel(c, sum);
+ }
+
+ queue.request_completed();
+}
+
+void SyncScheduler::cancel()
+{
+ ClientSums sums;
+
+ queue.remove_by_req_filter([&](RequestRef&& request) -> bool
+ {
+ inc(sums, request->client, request->cost);
+ {
+ std::lock_guard<std::mutex> lg(request->req_mtx);
+ request->req_state = ReqState::Cancelled;
+ request->req_cv.notify_one();
+ }
+ return true;
+ });
+
+ for (size_t i = 0; i < client_count; i++) {
+ if (auto c = counters(static_cast<client_id>(i))) {
+ on_cancel(c, sums[i]);
+ }
+ }
+}
+
+} // namespace rgw::dmclock
diff --git a/src/rgw/rgw_dmclock_sync_scheduler.h b/src/rgw/rgw_dmclock_sync_scheduler.h
new file mode 100644
index 00000000..ca7223f2
--- /dev/null
+++ b/src/rgw/rgw_dmclock_sync_scheduler.h
@@ -0,0 +1,79 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2018 SUSE Linux Gmbh
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef RGW_DMCLOCK_SYNC_SCHEDULER_H
+#define RGW_DMCLOCK_SYNC_SCHEDULER_H
+
+#include "rgw_dmclock_scheduler.h"
+#include "rgw_dmclock_scheduler_ctx.h"
+
+namespace rgw::dmclock {
+// For a blocking SyncRequest we hold a reference to a cv and the caller must
+// ensure the lifetime
+struct SyncRequest : public Request {
+ std::mutex& req_mtx;
+ std::condition_variable& req_cv;
+ ReqState& req_state;
+ GetClientCounters& counters;
+ explicit SyncRequest(client_id _id, Time started, Cost cost,
+ std::mutex& mtx, std::condition_variable& _cv,
+ ReqState& _state, GetClientCounters& counters):
+ Request{_id, started, cost}, req_mtx(mtx), req_cv(_cv), req_state(_state), counters(counters) {};
+};
+
+class SyncScheduler: public Scheduler {
+public:
+ template <typename ...Args>
+ SyncScheduler(CephContext *cct, GetClientCounters&& counters,
+ Args&& ...args);
+ ~SyncScheduler();
+
+ // submit a blocking request for dmclock scheduling, this function waits until
+ // the request is ready.
+ int add_request(const client_id& client, const ReqParams& params,
+ const Time& time, Cost cost);
+
+
+ void cancel();
+
+ void cancel(const client_id& client);
+
+ static void handle_request_cb(const client_id& c, std::unique_ptr<SyncRequest> req,
+ PhaseType phase, Cost cost);
+private:
+ int schedule_request_impl(const client_id& client, const ReqParams& params,
+ const Time& time, const Cost& cost,
+ optional_yield _y [[maybe_unused]]) override
+ {
+ return add_request(client, params, time, cost);
+ }
+
+ static constexpr bool IsDelayed = false;
+ using Queue = crimson::dmclock::PushPriorityQueue<client_id, SyncRequest, IsDelayed>;
+ using RequestRef = typename Queue::RequestRef;
+ using Clock = ceph::coarse_real_clock;
+
+ Queue queue;
+ CephContext const *cct;
+ GetClientCounters counters; //< provides per-client perf counters
+};
+
+template <typename ...Args>
+SyncScheduler::SyncScheduler(CephContext *cct, GetClientCounters&& counters,
+ Args&& ...args):
+ queue(std::forward<Args>(args)...), cct(cct), counters(std::move(counters))
+{}
+
+} // namespace rgw::dmclock
+#endif /* RGW_DMCLOCK_SYNC_SCHEDULER_H */
diff --git a/src/rgw/rgw_env.cc b/src/rgw/rgw_env.cc
new file mode 100644
index 00000000..95b6eeca
--- /dev/null
+++ b/src/rgw/rgw_env.cc
@@ -0,0 +1,141 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "rgw_common.h"
+#include "rgw_log.h"
+
+#include <string>
+#include <map>
+#include "include/ceph_assert.h"
+#include "rgw_crypt_sanitize.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+void RGWEnv::init(CephContext *cct)
+{
+ conf.init(cct);
+}
+
+void RGWEnv::set(std::string name, std::string val)
+{
+ env_map[std::move(name)] = std::move(val);
+}
+
+void RGWEnv::init(CephContext *cct, char **envp)
+{
+ const char *p;
+
+ env_map.clear();
+
+ for (int i=0; (p = envp[i]); ++i) {
+ string s(p);
+ int pos = s.find('=');
+ if (pos <= 0) // should never be 0
+ continue;
+ string name = s.substr(0, pos);
+ string val = s.substr(pos + 1);
+ env_map[name] = val;
+ }
+
+ init(cct);
+}
+
+const char *rgw_conf_get(const map<string, string, ltstr_nocase>& conf_map, const char *name, const char *def_val)
+{
+ auto iter = conf_map.find(name);
+ if (iter == conf_map.end())
+ return def_val;
+
+ return iter->second.c_str();
+}
+
+const char *RGWEnv::get(const char *name, const char *def_val) const
+{
+ return rgw_conf_get(env_map, name, def_val);
+}
+
+int rgw_conf_get_int(const map<string, string, ltstr_nocase>& conf_map, const char *name, int def_val)
+{
+ auto iter = conf_map.find(name);
+ if (iter == conf_map.end())
+ return def_val;
+
+ const char *s = iter->second.c_str();
+ return atoi(s);
+}
+
+int RGWEnv::get_int(const char *name, int def_val) const
+{
+ return rgw_conf_get_int(env_map, name, def_val);
+}
+
+bool rgw_conf_get_bool(const map<string, string, ltstr_nocase>& conf_map, const char *name, bool def_val)
+{
+ auto iter = conf_map.find(name);
+ if (iter == conf_map.end())
+ return def_val;
+
+ const char *s = iter->second.c_str();
+ return rgw_str_to_bool(s, def_val);
+}
+
+bool RGWEnv::get_bool(const char *name, bool def_val)
+{
+ return rgw_conf_get_bool(env_map, name, def_val);
+}
+
+size_t RGWEnv::get_size(const char *name, size_t def_val) const
+{
+ const auto iter = env_map.find(name);
+ if (iter == env_map.end())
+ return def_val;
+
+ size_t sz;
+ try{
+ sz = stoull(iter->second);
+ } catch(...){
+ /* it is very unlikely that we'll ever encounter out_of_range, but let's
+ return the default eitherway */
+ sz = def_val;
+ }
+
+ return sz;
+}
+
+bool RGWEnv::exists(const char *name) const
+{
+ return env_map.find(name)!= env_map.end();
+}
+
+bool RGWEnv::exists_prefix(const char *prefix) const
+{
+ if (env_map.empty() || prefix == NULL)
+ return false;
+
+ const auto iter = env_map.lower_bound(prefix);
+ if (iter == env_map.end())
+ return false;
+
+ return (strncmp(iter->first.c_str(), prefix, strlen(prefix)) == 0);
+}
+
+void RGWEnv::remove(const char *name)
+{
+ map<string, string, ltstr_nocase>::iterator iter = env_map.find(name);
+ if (iter != env_map.end())
+ env_map.erase(iter);
+}
+
+void RGWConf::init(CephContext *cct)
+{
+ enable_ops_log = cct->_conf->rgw_enable_ops_log;
+ enable_usage_log = cct->_conf->rgw_enable_usage_log;
+
+ defer_to_bucket_acls = 0; // default
+ if (cct->_conf->rgw_defer_to_bucket_acls == "recurse") {
+ defer_to_bucket_acls = RGW_DEFER_TO_BUCKET_ACLS_RECURSE;
+ } else if (cct->_conf->rgw_defer_to_bucket_acls == "full_control") {
+ defer_to_bucket_acls = RGW_DEFER_TO_BUCKET_ACLS_FULL_CONTROL;
+ }
+}
diff --git a/src/rgw/rgw_es_main.cc b/src/rgw/rgw_es_main.cc
new file mode 100644
index 00000000..5983dd91
--- /dev/null
+++ b/src/rgw/rgw_es_main.cc
@@ -0,0 +1,76 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <list>
+#include <string>
+#include <iostream>
+
+#include "global/global_init.h"
+#include "global/global_context.h"
+
+#include "common/ceph_argparse.h"
+#include "common/ceph_json.h"
+#include "rgw_es_query.h"
+
+
+int main(int argc, char *argv[])
+{
+ vector<const char*> args;
+ argv_to_vec(argc, (const char **)argv, args);
+
+ auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT,
+ CODE_ENVIRONMENT_UTILITY, 0);
+
+ common_init_finish(g_ceph_context);
+
+ string expr;
+
+ if (argc > 1) {
+ expr = argv[1];
+ } else {
+ expr = "age >= 30";
+ }
+
+ ESQueryCompiler es_query(expr, nullptr, "x-amz-meta-");
+
+ map<string, string, ltstr_nocase> aliases = { { "key", "name" },
+ { "etag", "meta.etag" },
+ { "size", "meta.size" },
+ { "mtime", "meta.mtime" },
+ { "lastmodified", "meta.mtime" },
+ { "contenttype", "meta.contenttype" },
+ };
+ es_query.set_field_aliases(&aliases);
+
+ map<string, ESEntityTypeMap::EntityType> generic_map = { {"bucket", ESEntityTypeMap::ES_ENTITY_STR},
+ {"name", ESEntityTypeMap::ES_ENTITY_STR},
+ {"instance", ESEntityTypeMap::ES_ENTITY_STR},
+ {"meta.etag", ESEntityTypeMap::ES_ENTITY_STR},
+ {"meta.contenttype", ESEntityTypeMap::ES_ENTITY_STR},
+ {"meta.mtime", ESEntityTypeMap::ES_ENTITY_DATE},
+ {"meta.size", ESEntityTypeMap::ES_ENTITY_INT} };
+ ESEntityTypeMap gm(generic_map);
+ es_query.set_generic_type_map(&gm);
+
+ map<string, ESEntityTypeMap::EntityType> custom_map = { {"str", ESEntityTypeMap::ES_ENTITY_STR},
+ {"int", ESEntityTypeMap::ES_ENTITY_INT},
+ {"date", ESEntityTypeMap::ES_ENTITY_DATE} };
+ ESEntityTypeMap em(custom_map);
+ es_query.set_custom_type_map(&em);
+
+ string err;
+
+ bool valid = es_query.compile(&err);
+ if (!valid) {
+ cout << "failed to compile query: " << err << std::endl;
+ return EINVAL;
+ }
+
+ JSONFormatter f;
+ encode_json("root", es_query, &f);
+
+ f.flush(cout);
+
+ return 0;
+}
+
diff --git a/src/rgw/rgw_es_query.cc b/src/rgw/rgw_es_query.cc
new file mode 100644
index 00000000..a2c460e9
--- /dev/null
+++ b/src/rgw/rgw_es_query.cc
@@ -0,0 +1,694 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <list>
+#include <map>
+#include <string>
+#include <iostream>
+#include <boost/algorithm/string.hpp>
+
+#include "common/ceph_json.h"
+#include "rgw_common.h"
+#include "rgw_es_query.h"
+
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+bool pop_front(list<string>& l, string *s)
+{
+ if (l.empty()) {
+ return false;
+ }
+ *s = l.front();
+ l.pop_front();
+ return true;
+}
+
+map<string, int> operator_map = {
+ { "or", 1 },
+ { "and", 2 },
+ { "<", 3 },
+ { "<=", 3 },
+ { "==", 3 },
+ { "!=", 3 },
+ { ">=", 3 },
+ { ">", 3 },
+};
+
+bool is_operator(const string& s)
+{
+ return (operator_map.find(s) != operator_map.end());
+}
+
+int operand_value(const string& op)
+{
+ auto i = operator_map.find(op);
+ if (i == operator_map.end()) {
+ return 0;
+ }
+
+ return i->second;
+}
+
+int check_precedence(const string& op1, const string& op2)
+{
+ return operand_value(op1) - operand_value(op2);
+}
+
+static bool infix_to_prefix(list<string>& source, list<string> *out)
+{
+ list<string> operator_stack;
+ list<string> operand_stack;
+
+ operator_stack.push_front("(");
+ source.push_back(")");
+
+ for (string& entity : source) {
+ if (entity == "(") {
+ operator_stack.push_front(entity);
+ } else if (entity == ")") {
+ string popped_operator;
+ if (!pop_front(operator_stack, &popped_operator)) {
+ return false;
+ }
+
+ while (popped_operator != "(") {
+ operand_stack.push_front(popped_operator);
+ if (!pop_front(operator_stack, &popped_operator)) {
+ return false;
+ }
+ }
+
+ } else if (is_operator(entity)) {
+ string popped_operator;
+ if (!pop_front(operator_stack, &popped_operator)) {
+ return false;
+ }
+
+ int precedence = check_precedence(popped_operator, entity);
+
+ while (precedence >= 0) {
+ operand_stack.push_front(popped_operator);
+ if (!pop_front(operator_stack, &popped_operator)) {
+ return false;
+ }
+ precedence = check_precedence(popped_operator, entity);
+ }
+
+ operator_stack.push_front(popped_operator);
+ operator_stack.push_front(entity);
+ } else {
+ operand_stack.push_front(entity);
+ }
+
+ }
+
+ if (!operator_stack.empty()) {
+ return false;
+ }
+
+ out->swap(operand_stack);
+ return true;
+}
+
+class ESQueryNode {
+protected:
+ ESQueryCompiler *compiler;
+public:
+ ESQueryNode(ESQueryCompiler *_compiler) : compiler(_compiler) {}
+ virtual ~ESQueryNode() {}
+
+ virtual bool init(ESQueryStack *s, ESQueryNode **pnode, string *perr) = 0;
+
+ virtual void dump(Formatter *f) const = 0;
+};
+
+static bool alloc_node(ESQueryCompiler *compiler, ESQueryStack *s, ESQueryNode **pnode, string *perr);
+
+class ESQueryNode_Bool : public ESQueryNode {
+ string op;
+ ESQueryNode *first{nullptr};
+ ESQueryNode *second{nullptr};
+public:
+ explicit ESQueryNode_Bool(ESQueryCompiler *compiler) : ESQueryNode(compiler) {}
+ ESQueryNode_Bool(ESQueryCompiler *compiler, const string& _op, ESQueryNode *_first, ESQueryNode *_second) :ESQueryNode(compiler), op(_op), first(_first), second(_second) {}
+ bool init(ESQueryStack *s, ESQueryNode **pnode, string *perr) override {
+ bool valid = s->pop(&op);
+ if (!valid) {
+ *perr = "incorrect expression";
+ return false;
+ }
+ valid = alloc_node(compiler, s, &first, perr) &&
+ alloc_node(compiler, s, &second, perr);
+ if (!valid) {
+ return false;
+ }
+ *pnode = this;
+ return true;
+ }
+ virtual ~ESQueryNode_Bool() {
+ delete first;
+ delete second;
+ }
+
+ void dump(Formatter *f) const override {
+ f->open_object_section("bool");
+ const char *section = (op == "and" ? "must" : "should");
+ f->open_array_section(section);
+ encode_json("entry", *first, f);
+ encode_json("entry", *second, f);
+ f->close_section();
+ f->close_section();
+ }
+
+};
+
+class ESQueryNodeLeafVal {
+public:
+ ESQueryNodeLeafVal() = default;
+ virtual ~ESQueryNodeLeafVal() {}
+
+ virtual bool init(const string& str_val, string *perr) = 0;
+ virtual void encode_json(const string& field, Formatter *f) const = 0;
+};
+
+class ESQueryNodeLeafVal_Str : public ESQueryNodeLeafVal {
+ string val;
+public:
+ ESQueryNodeLeafVal_Str() {}
+ bool init(const string& str_val, string *perr) override {
+ val = str_val;
+ return true;
+ }
+ void encode_json(const string& field, Formatter *f) const override {
+ ::encode_json(field.c_str(), val.c_str(), f);
+ }
+};
+
+class ESQueryNodeLeafVal_Int : public ESQueryNodeLeafVal {
+ int64_t val{0};
+public:
+ ESQueryNodeLeafVal_Int() {}
+ bool init(const string& str_val, string *perr) override {
+ string err;
+ val = strict_strtoll(str_val.c_str(), 10, &err);
+ if (!err.empty()) {
+ *perr = string("failed to parse integer: ") + err;
+ return false;
+ }
+ return true;
+ }
+ void encode_json(const string& field, Formatter *f) const override {
+ ::encode_json(field.c_str(), val, f);
+ }
+};
+
+class ESQueryNodeLeafVal_Date : public ESQueryNodeLeafVal {
+ ceph::real_time val;
+public:
+ ESQueryNodeLeafVal_Date() {}
+ bool init(const string& str_val, string *perr) override {
+ if (parse_time(str_val.c_str(), &val) < 0) {
+ *perr = string("failed to parse date: ") + str_val;
+ return false;
+ }
+ return true;
+ }
+ void encode_json(const string& field, Formatter *f) const override {
+ string s;
+ rgw_to_iso8601(val, &s);
+ ::encode_json(field.c_str(), s, f);
+ }
+};
+
+class ESQueryNode_Op : public ESQueryNode {
+protected:
+ string op;
+ string field;
+ string str_val;
+ ESQueryNodeLeafVal *val{nullptr};
+ ESEntityTypeMap::EntityType entity_type{ESEntityTypeMap::ES_ENTITY_NONE};
+ bool allow_restricted{false};
+
+ bool val_from_str(string *perr) {
+ switch (entity_type) {
+ case ESEntityTypeMap::ES_ENTITY_DATE:
+ val = new ESQueryNodeLeafVal_Date;
+ break;
+ case ESEntityTypeMap::ES_ENTITY_INT:
+ val = new ESQueryNodeLeafVal_Int;
+ break;
+ default:
+ val = new ESQueryNodeLeafVal_Str;
+ }
+ return val->init(str_val, perr);
+ }
+ bool do_init(ESQueryNode **pnode, string *perr) {
+ field = compiler->unalias_field(field);
+ ESQueryNode *effective_node;
+ if (!handle_nested(&effective_node, perr)) {
+ return false;
+ }
+ if (!val_from_str(perr)) {
+ return false;
+ }
+ *pnode = effective_node;
+ return true;
+ }
+
+public:
+ ESQueryNode_Op(ESQueryCompiler *compiler) : ESQueryNode(compiler) {}
+ ~ESQueryNode_Op() {
+ delete val;
+ }
+ virtual bool init(ESQueryStack *s, ESQueryNode **pnode, string *perr) override {
+ bool valid = s->pop(&op) &&
+ s->pop(&str_val) &&
+ s->pop(&field);
+ if (!valid) {
+ *perr = "invalid expression";
+ return false;
+ }
+ return do_init(pnode, perr);
+ }
+ bool handle_nested(ESQueryNode **pnode, string *perr);
+
+ void set_allow_restricted(bool allow) {
+ allow_restricted = allow;
+ }
+
+ virtual void dump(Formatter *f) const override = 0;
+};
+
+class ESQueryNode_Op_Equal : public ESQueryNode_Op {
+public:
+ explicit ESQueryNode_Op_Equal(ESQueryCompiler *compiler) : ESQueryNode_Op(compiler) {}
+ ESQueryNode_Op_Equal(ESQueryCompiler *compiler, const string& f, const string& v) : ESQueryNode_Op(compiler) {
+ op = "==";
+ field = f;
+ str_val = v;
+ }
+
+ bool init(ESQueryStack *s, ESQueryNode **pnode, string *perr) override {
+ if (op.empty()) {
+ return ESQueryNode_Op::init(s, pnode, perr);
+ }
+ return do_init(pnode, perr);
+ }
+
+ virtual void dump(Formatter *f) const override {
+ f->open_object_section("term");
+ val->encode_json(field, f);
+ f->close_section();
+ }
+};
+
+class ESQueryNode_Op_NotEqual : public ESQueryNode_Op {
+public:
+ explicit ESQueryNode_Op_NotEqual(ESQueryCompiler *compiler) : ESQueryNode_Op(compiler) {}
+ ESQueryNode_Op_NotEqual(ESQueryCompiler *compiler, const string& f, const string& v) : ESQueryNode_Op(compiler) {
+ op = "!=";
+ field = f;
+ str_val = v;
+ }
+
+ bool init(ESQueryStack *s, ESQueryNode **pnode, string *perr) override {
+ if (op.empty()) {
+ return ESQueryNode_Op::init(s, pnode, perr);
+ }
+ return do_init(pnode, perr);
+ }
+
+ virtual void dump(Formatter *f) const override {
+ f->open_object_section("bool");
+ f->open_object_section("must_not");
+ f->open_object_section("term");
+ val->encode_json(field, f);
+ f->close_section();
+ f->close_section();
+ f->close_section();
+ }
+};
+
+class ESQueryNode_Op_Range : public ESQueryNode_Op {
+ string range_str;
+public:
+ ESQueryNode_Op_Range(ESQueryCompiler *compiler, const string& rs) : ESQueryNode_Op(compiler), range_str(rs) {}
+
+ virtual void dump(Formatter *f) const override {
+ f->open_object_section("range");
+ f->open_object_section(field.c_str());
+ val->encode_json(range_str, f);
+ f->close_section();
+ f->close_section();
+ }
+};
+
+class ESQueryNode_Op_Nested_Parent : public ESQueryNode_Op {
+public:
+ ESQueryNode_Op_Nested_Parent(ESQueryCompiler *compiler) : ESQueryNode_Op(compiler) {}
+
+ virtual string get_custom_leaf_field_name() = 0;
+};
+
+template <class T>
+class ESQueryNode_Op_Nested : public ESQueryNode_Op_Nested_Parent {
+ string name;
+ ESQueryNode *next;
+public:
+ ESQueryNode_Op_Nested(ESQueryCompiler *compiler, const string& _name, ESQueryNode *_next) : ESQueryNode_Op_Nested_Parent(compiler),
+ name(_name), next(_next) {}
+ ~ESQueryNode_Op_Nested() {
+ delete next;
+ }
+
+ virtual void dump(Formatter *f) const override {
+ f->open_object_section("nested");
+ string s = string("meta.custom-") + type_str();
+ encode_json("path", s.c_str(), f);
+ f->open_object_section("query");
+ f->open_object_section("bool");
+ f->open_array_section("must");
+ f->open_object_section("entry");
+ f->open_object_section("match");
+ string n = s + ".name";
+ encode_json(n.c_str(), name.c_str(), f);
+ f->close_section();
+ f->close_section();
+ encode_json("entry", *next, f);
+ f->close_section();
+ f->close_section();
+ f->close_section();
+ f->close_section();
+ }
+
+ string type_str() const;
+ string get_custom_leaf_field_name() override {
+ return string("meta.custom-") + type_str() + ".value";
+ }
+};
+
+template<>
+string ESQueryNode_Op_Nested<string>::type_str() const {
+ return "string";
+}
+
+template<>
+string ESQueryNode_Op_Nested<int64_t>::type_str() const {
+ return "int";
+}
+
+template<>
+string ESQueryNode_Op_Nested<ceph::real_time>::type_str() const {
+ return "date";
+}
+
+bool ESQueryNode_Op::handle_nested(ESQueryNode **pnode, string *perr)
+{
+ string field_name = field;
+ const string& custom_prefix = compiler->get_custom_prefix();
+ if (!boost::algorithm::starts_with(field_name, custom_prefix)) {
+ *pnode = this;
+ auto m = compiler->get_generic_type_map();
+ if (m) {
+ bool found = m->find(field_name, &entity_type) &&
+ (allow_restricted || !compiler->is_restricted(field_name));
+ if (!found) {
+ *perr = string("unexpected generic field '") + field_name + "'";
+ }
+ return found;
+ }
+ *perr = "query parser does not support generic types";
+ return false;
+ }
+
+ field_name = field_name.substr(custom_prefix.size());
+ auto m = compiler->get_custom_type_map();
+ if (m) {
+ m->find(field_name, &entity_type);
+ /* ignoring returned bool, for now just treat it as string */
+ }
+
+ ESQueryNode_Op_Nested_Parent *new_node;
+ switch (entity_type) {
+ case ESEntityTypeMap::ES_ENTITY_INT:
+ new_node = new ESQueryNode_Op_Nested<int64_t>(compiler, field_name, this);
+ break;
+ case ESEntityTypeMap::ES_ENTITY_DATE:
+ new_node = new ESQueryNode_Op_Nested<ceph::real_time>(compiler, field_name, this);
+ break;
+ default:
+ new_node = new ESQueryNode_Op_Nested<string>(compiler, field_name, this);
+ }
+
+ field = new_node->get_custom_leaf_field_name();
+ *pnode = new_node;
+
+ return true;
+}
+
+static bool is_bool_op(const string& str)
+{
+ return (str == "or" || str == "and");
+}
+
+static bool alloc_node(ESQueryCompiler *compiler, ESQueryStack *s, ESQueryNode **pnode, string *perr)
+{
+ string op;
+ bool valid = s->peek(&op);
+ if (!valid) {
+ *perr = "incorrect expression";
+ return false;
+ }
+
+ ESQueryNode *node;
+
+ if (is_bool_op(op)) {
+ node = new ESQueryNode_Bool(compiler);
+ } else if (op == "==") {
+ node = new ESQueryNode_Op_Equal(compiler);
+ } else if (op == "!=") {
+ node = new ESQueryNode_Op_NotEqual(compiler);
+ } else {
+ static map<string, string> range_op_map = {
+ { "<", "lt"},
+ { "<=", "lte"},
+ { ">=", "gte"},
+ { ">", "gt"},
+ };
+
+ auto iter = range_op_map.find(op);
+ if (iter == range_op_map.end()) {
+ *perr = string("invalid operator: ") + op;
+ return false;
+ }
+
+ node = new ESQueryNode_Op_Range(compiler, iter->second);
+ }
+
+ if (!node->init(s, pnode, perr)) {
+ delete node;
+ return false;
+ }
+ return true;
+}
+
+
+bool is_key_char(char c)
+{
+ switch (c) {
+ case '(':
+ case ')':
+ case '<':
+ case '>':
+ case '!':
+ case '@':
+ case ',':
+ case ';':
+ case ':':
+ case '\\':
+ case '"':
+ case '/':
+ case '[':
+ case ']':
+ case '?':
+ case '=':
+ case '{':
+ case '}':
+ case ' ':
+ case '\t':
+ return false;
+ };
+ return (isascii(c) > 0);
+}
+
+static bool is_op_char(char c)
+{
+ switch (c) {
+ case '!':
+ case '<':
+ case '=':
+ case '>':
+ return true;
+ };
+ return false;
+}
+
+static bool is_val_char(char c)
+{
+ if (isspace(c)) {
+ return false;
+ }
+ return (c != ')');
+}
+
+void ESInfixQueryParser::skip_whitespace(const char *str, int size, int& pos) {
+ while (pos < size && isspace(str[pos])) {
+ ++pos;
+ }
+}
+
+bool ESInfixQueryParser::get_next_token(bool (*filter)(char)) {
+ skip_whitespace(str, size, pos);
+ int token_start = pos;
+ while (pos < size && filter(str[pos])) {
+ ++pos;
+ }
+ if (pos == token_start) {
+ return false;
+ }
+ string token = string(str + token_start, pos - token_start);
+ args.push_back(token);
+ return true;
+}
+
+bool ESInfixQueryParser::parse_condition() {
+ /*
+ * condition: <key> <operator> <val>
+ *
+ * whereas key: needs to conform to http header field restrictions
+ * operator: one of the following: < <= == != >= >
+ * val: ascii, terminated by either space or ')' (or end of string)
+ */
+
+ /* parse key */
+ bool valid = get_next_token(is_key_char) &&
+ get_next_token(is_op_char) &&
+ get_next_token(is_val_char);
+
+ if (!valid) {
+ return false;
+ }
+
+ return true;
+}
+
+bool ESInfixQueryParser::parse_and_or() {
+ skip_whitespace(str, size, pos);
+ if (pos + 3 <= size && strncmp(str + pos, "and", 3) == 0) {
+ pos += 3;
+ args.push_back("and");
+ return true;
+ }
+
+ if (pos + 2 <= size && strncmp(str + pos, "or", 2) == 0) {
+ pos += 2;
+ args.push_back("or");
+ return true;
+ }
+
+ return false;
+}
+
+bool ESInfixQueryParser::parse_specific_char(const char *pchar) {
+ skip_whitespace(str, size, pos);
+ if (pos >= size) {
+ return false;
+ }
+ if (str[pos] != *pchar) {
+ return false;
+ }
+
+ args.push_back(pchar);
+ ++pos;
+ return true;
+}
+
+bool ESInfixQueryParser::parse_open_bracket() {
+ return parse_specific_char("(");
+}
+
+bool ESInfixQueryParser::parse_close_bracket() {
+ return parse_specific_char(")");
+}
+
+bool ESInfixQueryParser::parse(list<string> *result) {
+ /*
+ * expression: [(]<condition>[[and/or]<condition>][)][and/or]...
+ */
+
+ while (pos < size) {
+ parse_open_bracket();
+ if (!parse_condition()) {
+ return false;
+ }
+ parse_close_bracket();
+ parse_and_or();
+ }
+
+ result->swap(args);
+
+ return true;
+}
+
+bool ESQueryCompiler::convert(list<string>& infix, string *perr) {
+ list<string> prefix;
+ if (!infix_to_prefix(infix, &prefix)) {
+ *perr = "invalid query";
+ return false;
+ }
+ stack.assign(prefix);
+ if (!alloc_node(this, &stack, &query_root, perr)) {
+ return false;
+ }
+ if (!stack.done()) {
+ *perr = "invalid query";
+ return false;
+ }
+ return true;
+}
+
+ESQueryCompiler::~ESQueryCompiler() {
+ delete query_root;
+}
+
+bool ESQueryCompiler::compile(string *perr) {
+ list<string> infix;
+ if (!parser.parse(&infix)) {
+ *perr = "failed to parse query";
+ return false;
+ }
+
+ if (!convert(infix, perr)) {
+ return false;
+ }
+
+ for (auto& c : eq_conds) {
+ ESQueryNode_Op_Equal *eq_node = new ESQueryNode_Op_Equal(this, c.first, c.second);
+ eq_node->set_allow_restricted(true); /* can access restricted fields */
+ ESQueryNode *effective_node;
+ if (!eq_node->init(nullptr, &effective_node, perr)) {
+ delete eq_node;
+ return false;
+ }
+ query_root = new ESQueryNode_Bool(this, "and", effective_node, query_root);
+ }
+
+ return true;
+}
+
+void ESQueryCompiler::dump(Formatter *f) const {
+ encode_json("query", *query_root, f);
+}
+
diff --git a/src/rgw/rgw_es_query.h b/src/rgw/rgw_es_query.h
new file mode 100644
index 00000000..b8421f4d
--- /dev/null
+++ b/src/rgw/rgw_es_query.h
@@ -0,0 +1,165 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_ES_QUERY_H
+#define CEPH_RGW_ES_QUERY_H
+
+#include "rgw_string.h"
+
+class ESQueryStack {
+ list<string> l;
+ list<string>::iterator iter;
+
+public:
+ explicit ESQueryStack(list<string>& src) {
+ assign(src);
+ }
+
+ ESQueryStack() {}
+
+ void assign(list<string>& src) {
+ l.swap(src);
+ iter = l.begin();
+ }
+
+ bool peek(string *dest) {
+ if (done()) {
+ return false;
+ }
+ *dest = *iter;
+ return true;
+ }
+
+ bool pop(string *dest) {
+ bool valid = peek(dest);
+ if (!valid) {
+ return false;
+ }
+ ++iter;
+ return true;
+ }
+
+ bool done() {
+ return (iter == l.end());
+ }
+};
+
+class ESInfixQueryParser {
+ string query;
+ int size;
+ const char *str;
+ int pos{0};
+ list<string> args;
+
+ void skip_whitespace(const char *str, int size, int& pos);
+ bool get_next_token(bool (*filter)(char));
+
+ bool parse_condition();
+ bool parse_and_or();
+ bool parse_specific_char(const char *pchar);
+ bool parse_open_bracket();
+ bool parse_close_bracket();
+
+public:
+ explicit ESInfixQueryParser(const string& _query) : query(_query), size(query.size()), str(query.c_str()) {}
+ bool parse(list<string> *result);
+};
+
+class ESQueryNode;
+
+struct ESEntityTypeMap {
+ enum EntityType {
+ ES_ENTITY_NONE = 0,
+ ES_ENTITY_STR = 1,
+ ES_ENTITY_INT = 2,
+ ES_ENTITY_DATE = 3,
+ };
+
+ map<string, EntityType> m;
+
+ explicit ESEntityTypeMap(map<string, EntityType>& _m) : m(_m) {}
+
+ bool find(const string& entity, EntityType *ptype) {
+ auto i = m.find(entity);
+ if (i != m.end()) {
+ *ptype = i->second;
+ return true;
+ }
+
+ *ptype = ES_ENTITY_NONE;
+ return false;
+ }
+};
+
+class ESQueryCompiler {
+ ESInfixQueryParser parser;
+ ESQueryStack stack;
+ ESQueryNode *query_root{nullptr};
+
+ string custom_prefix;
+
+ bool convert(list<string>& infix, string *perr);
+
+ list<pair<string, string> > eq_conds;
+
+ ESEntityTypeMap *generic_type_map{nullptr};
+ ESEntityTypeMap *custom_type_map{nullptr};
+
+ map<string, string, ltstr_nocase> *field_aliases = nullptr;
+ set<string> *restricted_fields = nullptr;
+
+public:
+ ESQueryCompiler(const string& query, list<pair<string, string> > *prepend_eq_conds, const string& _custom_prefix) : parser(query), custom_prefix(_custom_prefix) {
+ if (prepend_eq_conds) {
+ eq_conds = std::move(*prepend_eq_conds);
+ }
+ }
+ ~ESQueryCompiler();
+
+ bool compile(string *perr);
+ void dump(Formatter *f) const;
+
+ void set_generic_type_map(ESEntityTypeMap *entity_map) {
+ generic_type_map = entity_map;
+ }
+
+ ESEntityTypeMap *get_generic_type_map() {
+ return generic_type_map;
+ }
+ const string& get_custom_prefix() { return custom_prefix; }
+
+ void set_custom_type_map(ESEntityTypeMap *entity_map) {
+ custom_type_map = entity_map;
+ }
+
+ ESEntityTypeMap *get_custom_type_map() {
+ return custom_type_map;
+ }
+
+ void set_field_aliases(map<string, string, ltstr_nocase> *fa) {
+ field_aliases = fa;
+ }
+
+ string unalias_field(const string& field) {
+ if (!field_aliases) {
+ return field;
+ }
+ auto i = field_aliases->find(field);
+ if (i == field_aliases->end()) {
+ return field;
+ }
+
+ return i->second;
+ }
+
+ void set_restricted_fields(set<string> *rf) {
+ restricted_fields = rf;
+ }
+
+ bool is_restricted(const string& f) {
+ return (restricted_fields && restricted_fields->find(f) != restricted_fields->end());
+ }
+};
+
+
+#endif
diff --git a/src/rgw/rgw_etag_verifier.cc b/src/rgw/rgw_etag_verifier.cc
new file mode 100644
index 00000000..6a9d5cc3
--- /dev/null
+++ b/src/rgw/rgw_etag_verifier.cc
@@ -0,0 +1,185 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#include "rgw_etag_verifier.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+namespace rgw::putobj {
+
+int create_etag_verifier(CephContext* cct, DataProcessor* filter,
+ const bufferlist& manifest_bl,
+ const std::optional<RGWCompressionInfo>& compression,
+ etag_verifier_ptr& verifier)
+{
+ RGWObjManifest manifest;
+
+ try {
+ auto miter = manifest_bl.cbegin();
+ decode(manifest, miter);
+ } catch (buffer::error& err) {
+ ldout(cct, 0) << "ERROR: couldn't decode manifest" << dendl;
+ return -EIO;
+ }
+
+ RGWObjManifestRule rule;
+ bool found = manifest.get_rule(0, &rule);
+ if (!found) {
+ lderr(cct) << "ERROR: manifest->get_rule() could not find rule" << dendl;
+ return -EIO;
+ }
+
+ if (rule.start_part_num == 0) {
+ /* Atomic object */
+ verifier.emplace<ETagVerifier_Atomic>(cct, filter);
+ return 0;
+ }
+
+ uint64_t cur_part_ofs = UINT64_MAX;
+ std::vector<uint64_t> part_ofs;
+
+ /*
+ * We must store the offset of each part to calculate the ETAGs for each
+ * MPU part. These part ETags then become the input for the MPU object
+ * Etag.
+ */
+ for (auto mi = manifest.obj_begin(); mi != manifest.obj_end(); ++mi) {
+ if (cur_part_ofs == mi.get_part_ofs())
+ continue;
+ cur_part_ofs = mi.get_part_ofs();
+ ldout(cct, 20) << "MPU Part offset:" << cur_part_ofs << dendl;
+ part_ofs.push_back(cur_part_ofs);
+ }
+
+ if (compression) {
+ // if the source object was compressed, the manifest is storing
+ // compressed part offsets. transform the compressed offsets back to
+ // their original offsets by finding the first block of each part
+ const auto& blocks = compression->blocks;
+ auto block = blocks.begin();
+ for (auto& ofs : part_ofs) {
+ // find the compression_block with new_ofs == ofs
+ constexpr auto less = [] (const compression_block& block, uint64_t ofs) {
+ return block.new_ofs < ofs;
+ };
+ block = std::lower_bound(block, blocks.end(), ofs, less);
+ if (block == blocks.end() || block->new_ofs != ofs) {
+ ldout(cct, 4) << "no match for compressed offset " << ofs
+ << ", disabling etag verification" << dendl;
+ return -EIO;
+ }
+ ofs = block->old_ofs;
+ ldout(cct, 20) << "MPU Part uncompressed offset:" << ofs << dendl;
+ }
+ }
+
+ verifier.emplace<ETagVerifier_MPU>(cct, std::move(part_ofs), filter);
+ return 0;
+}
+
+int ETagVerifier_Atomic::process(bufferlist&& in, uint64_t logical_offset)
+{
+ bufferlist out;
+ if (in.length() > 0)
+ hash.Update((const unsigned char *)in.c_str(), in.length());
+
+ return Pipe::process(std::move(in), logical_offset);
+}
+
+void ETagVerifier_Atomic::calculate_etag()
+{
+ unsigned char m[CEPH_CRYPTO_MD5_DIGESTSIZE];
+ char calc_md5[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 1];
+
+ /* Return early if ETag has already been calculated */
+ if (!calculated_etag.empty())
+ return;
+
+ hash.Final(m);
+ buf_to_hex(m, CEPH_CRYPTO_MD5_DIGESTSIZE, calc_md5);
+ calculated_etag = calc_md5;
+ ldout(cct, 20) << "Single part object: " << " etag:" << calculated_etag
+ << dendl;
+}
+
+void ETagVerifier_MPU::process_end_of_MPU_part()
+{
+ unsigned char m[CEPH_CRYPTO_MD5_DIGESTSIZE];
+ char calc_md5_part[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 1];
+ std::string calculated_etag_part;
+
+ hash.Final(m);
+ mpu_etag_hash.Update((const unsigned char *)m, sizeof(m));
+ hash.Restart();
+
+ if (cct->_conf->subsys.should_gather(dout_subsys, 20)) {
+ buf_to_hex(m, CEPH_CRYPTO_MD5_DIGESTSIZE, calc_md5_part);
+ calculated_etag_part = calc_md5_part;
+ ldout(cct, 20) << "Part etag: " << calculated_etag_part << dendl;
+ }
+
+ cur_part_index++;
+ next_part_index++;
+}
+
+int ETagVerifier_MPU::process(bufferlist&& in, uint64_t logical_offset)
+{
+ uint64_t bl_end = in.length() + logical_offset;
+
+ /* Handle the last MPU part */
+ if (next_part_index == part_ofs.size()) {
+ hash.Update((const unsigned char *)in.c_str(), in.length());
+ goto done;
+ }
+
+ /* Incoming bufferlist spans two MPU parts. Calculate separate ETags */
+ if (bl_end > part_ofs[next_part_index]) {
+
+ uint64_t part_one_len = part_ofs[next_part_index] - logical_offset;
+ hash.Update((const unsigned char *)in.c_str(), part_one_len);
+ process_end_of_MPU_part();
+
+ hash.Update((const unsigned char *)in.c_str() + part_one_len,
+ bl_end - part_ofs[cur_part_index]);
+ /*
+ * If we've moved to the last part of the MPU, avoid usage of
+ * parts_ofs[next_part_index] as it will lead to our-of-range access.
+ */
+ if (next_part_index == part_ofs.size())
+ goto done;
+ } else {
+ hash.Update((const unsigned char *)in.c_str(), in.length());
+ }
+
+ /* Update the MPU Etag if the current part has ended */
+ if (logical_offset + in.length() + 1 == part_ofs[next_part_index])
+ process_end_of_MPU_part();
+
+done:
+ return Pipe::process(std::move(in), logical_offset);
+}
+
+void ETagVerifier_MPU::calculate_etag()
+{
+ unsigned char m[CEPH_CRYPTO_MD5_DIGESTSIZE], mpu_m[CEPH_CRYPTO_MD5_DIGESTSIZE];
+ char final_etag_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 16];
+
+ /* Return early if ETag has already been calculated */
+ if (!calculated_etag.empty())
+ return;
+
+ hash.Final(m);
+ mpu_etag_hash.Update((const unsigned char *)m, sizeof(m));
+
+ /* Refer RGWCompleteMultipart::execute() for ETag calculation for MPU object */
+ mpu_etag_hash.Final(mpu_m);
+ buf_to_hex(mpu_m, CEPH_CRYPTO_MD5_DIGESTSIZE, final_etag_str);
+ snprintf(&final_etag_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2],
+ sizeof(final_etag_str) - CEPH_CRYPTO_MD5_DIGESTSIZE * 2,
+ "-%lld", (long long)(part_ofs.size()));
+
+ calculated_etag = final_etag_str;
+ ldout(cct, 20) << "MPU calculated ETag:" << calculated_etag << dendl;
+}
+
+} // namespace rgw::putobj
diff --git a/src/rgw/rgw_etag_verifier.h b/src/rgw/rgw_etag_verifier.h
new file mode 100644
index 00000000..7e2579b9
--- /dev/null
+++ b/src/rgw/rgw_etag_verifier.h
@@ -0,0 +1,85 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * RGW Etag Verifier is an RGW filter which enables the objects copied using
+ * multisite sync to be verified using their ETag from source i.e. the MD5
+ * checksum of the object is computed at the destination and is verified to be
+ * identical to the ETag stored in the object HEAD at source cluster.
+ *
+ * For MPU objects, a different filter named RGWMultipartEtagFilter is applied
+ * which re-computes ETag using RGWObjManifest. This computes the ETag using the
+ * same algorithm used at the source cluster i.e. MD5 sum of the individual ETag
+ * on the MPU parts.
+ */
+#ifndef CEPH_RGW_ETAG_VERIFIER_H
+#define CEPH_RGW_ETAG_VERIFIER_H
+
+#include "rgw_putobj.h"
+#include "rgw_op.h"
+#include "common/static_ptr.h"
+
+namespace rgw::putobj {
+
+class ETagVerifier : public rgw::putobj::Pipe
+{
+protected:
+ CephContext* cct;
+ MD5 hash;
+ string calculated_etag;
+
+public:
+ ETagVerifier(CephContext* cct_, rgw::putobj::DataProcessor *next)
+ : Pipe(next), cct(cct_) {}
+
+ virtual void calculate_etag() = 0;
+ string get_calculated_etag() { return calculated_etag;}
+
+}; /* ETagVerifier */
+
+class ETagVerifier_Atomic : public ETagVerifier
+{
+public:
+ ETagVerifier_Atomic(CephContext* cct_, rgw::putobj::DataProcessor *next)
+ : ETagVerifier(cct_, next) {}
+
+ int process(bufferlist&& data, uint64_t logical_offset) override;
+ void calculate_etag() override;
+
+}; /* ETagVerifier_Atomic */
+
+class ETagVerifier_MPU : public ETagVerifier
+{
+ std::vector<uint64_t> part_ofs;
+ int cur_part_index{0}, next_part_index{1};
+ MD5 mpu_etag_hash;
+
+ void process_end_of_MPU_part();
+
+public:
+ ETagVerifier_MPU(CephContext* cct,
+ std::vector<uint64_t> part_ofs,
+ rgw::putobj::DataProcessor *next)
+ : ETagVerifier(cct, next),
+ part_ofs(std::move(part_ofs))
+ {}
+
+ int process(bufferlist&& data, uint64_t logical_offset) override;
+ void calculate_etag() override;
+
+}; /* ETagVerifier_MPU */
+
+constexpr auto max_etag_verifier_size = std::max(
+ sizeof(ETagVerifier_Atomic),
+ sizeof(ETagVerifier_MPU)
+ );
+using etag_verifier_ptr = ceph::static_ptr<ETagVerifier, max_etag_verifier_size>;
+
+int create_etag_verifier(CephContext* cct, DataProcessor* next,
+ const bufferlist& manifest_bl,
+ const std::optional<RGWCompressionInfo>& compression,
+ etag_verifier_ptr& verifier);
+
+} // namespace rgw::putobj
+
+#endif /* CEPH_RGW_ETAG_VERIFIER_H */
diff --git a/src/rgw/rgw_fcgi.cc b/src/rgw/rgw_fcgi.cc
new file mode 100644
index 00000000..a52ea509
--- /dev/null
+++ b/src/rgw/rgw_fcgi.cc
@@ -0,0 +1,91 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "rgw_fcgi.h"
+#include "acconfig.h"
+
+size_t RGWFCGX::write_data(const char* const buf, const size_t len)
+{
+ /* According to the documentation of FCGX_PutStr if there is no error
+ * (signalised by negative return value), then always ret == len. */
+ const auto ret = FCGX_PutStr(buf, len, fcgx->out);
+ if (ret < 0) {
+ throw rgw::io::Exception(-ret, std::system_category());
+ }
+ return ret;
+}
+
+size_t RGWFCGX::read_data(char* const buf, const size_t len)
+{
+ const auto ret = FCGX_GetStr(buf, len, fcgx->in);
+ if (ret < 0) {
+ throw rgw::io::Exception(-ret, std::system_category());
+ }
+ return ret;
+}
+
+void RGWFCGX::flush()
+{
+ txbuf.pubsync();
+ FCGX_FFlush(fcgx->out);
+}
+
+int RGWFCGX::init_env(CephContext* const cct)
+{
+ env.init(cct, (char **)fcgx->envp);
+ return 0;
+}
+
+size_t RGWFCGX::send_status(const int status, const char* const status_name)
+{
+ static constexpr size_t STATUS_BUF_SIZE = 128;
+
+ char statusbuf[STATUS_BUF_SIZE];
+ const auto statuslen = snprintf(statusbuf, sizeof(statusbuf),
+ "Status: %d %s\r\n", status, status_name);
+
+ return txbuf.sputn(statusbuf, statuslen);
+}
+
+size_t RGWFCGX::send_100_continue()
+{
+ const auto sent = send_status(100, "Continue");
+ flush();
+ return sent;
+}
+
+size_t RGWFCGX::send_header(const boost::string_ref& name,
+ const boost::string_ref& value)
+{
+ static constexpr char HEADER_SEP[] = ": ";
+ static constexpr char HEADER_END[] = "\r\n";
+
+ size_t sent = 0;
+
+ sent += txbuf.sputn(name.data(), name.length());
+ sent += txbuf.sputn(HEADER_SEP, sizeof(HEADER_SEP) - 1);
+ sent += txbuf.sputn(value.data(), value.length());
+ sent += txbuf.sputn(HEADER_END, sizeof(HEADER_END) - 1);
+
+ return sent;
+}
+
+size_t RGWFCGX::send_content_length(const uint64_t len)
+{
+ static constexpr size_t CONLEN_BUF_SIZE = 128;
+
+ char sizebuf[CONLEN_BUF_SIZE];
+ const auto sizelen = snprintf(sizebuf, sizeof(sizebuf),
+ "Content-Length: %" PRIu64 "\r\n", len);
+
+ return txbuf.sputn(sizebuf, sizelen);
+}
+
+size_t RGWFCGX::complete_header()
+{
+ static constexpr char HEADER_END[] = "\r\n";
+ const size_t sent = txbuf.sputn(HEADER_END, sizeof(HEADER_END) - 1);
+
+ flush();
+ return sent;
+}
diff --git a/src/rgw/rgw_fcgi.h b/src/rgw/rgw_fcgi.h
new file mode 100644
index 00000000..7f8e61a3
--- /dev/null
+++ b/src/rgw/rgw_fcgi.h
@@ -0,0 +1,57 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_FCGI_H
+#define CEPH_RGW_FCGI_H
+
+#include "acconfig.h"
+#include <fcgiapp.h>
+
+#include "rgw_client_io.h"
+
+struct FCGX_Request;
+
+class RGWFCGX : public rgw::io::RestfulClient,
+ public rgw::io::BuffererSink {
+ FCGX_Request *fcgx;
+ RGWEnv env;
+
+ rgw::io::StaticOutputBufferer<> txbuf;
+
+ size_t read_data(char* buf, size_t len);
+ size_t write_data(const char* buf, size_t len) override;
+
+public:
+ explicit RGWFCGX(FCGX_Request* const fcgx)
+ : fcgx(fcgx),
+ txbuf(*this) {
+ }
+
+ int init_env(CephContext* cct) override;
+ size_t send_status(int status, const char* status_name) override;
+ size_t send_100_continue() override;
+ size_t send_header(const boost::string_ref& name,
+ const boost::string_ref& value) override;
+ size_t send_content_length(uint64_t len) override;
+ size_t complete_header() override;
+
+ size_t recv_body(char* buf, size_t max) override {
+ return read_data(buf, max);
+ }
+
+ size_t send_body(const char* buf, size_t len) override {
+ return write_data(buf, len);
+ }
+
+ void flush() override;
+
+ RGWEnv& get_env() noexcept override {
+ return env;
+ }
+
+ size_t complete_request() override {
+ return 0;
+ }
+};
+
+#endif
diff --git a/src/rgw/rgw_fcgi_process.cc b/src/rgw/rgw_fcgi_process.cc
new file mode 100644
index 00000000..757fd3ea
--- /dev/null
+++ b/src/rgw/rgw_fcgi_process.cc
@@ -0,0 +1,138 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/errno.h"
+#include "common/Throttle.h"
+#include "common/WorkQueue.h"
+
+#include "rgw_rados.h"
+#include "rgw_rest.h"
+#include "rgw_frontend.h"
+#include "rgw_request.h"
+#include "rgw_process.h"
+#include "rgw_loadgen.h"
+#include "rgw_client_io.h"
+#include "rgw_client_io_filters.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+void RGWFCGXProcess::run()
+{
+ string socket_path;
+ string socket_port;
+ string socket_host;
+ int socket_backlog;
+
+ conf->get_val("socket_path", "", &socket_path);
+ conf->get_val("socket_port", g_conf()->rgw_port, &socket_port);
+ conf->get_val("socket_host", g_conf()->rgw_host, &socket_host);
+ socket_backlog = g_conf()->rgw_fcgi_socket_backlog;
+
+ if (socket_path.empty() && socket_port.empty() && socket_host.empty()) {
+ socket_path = g_conf()->rgw_socket_path;
+ if (socket_path.empty()) {
+ dout(0) << "ERROR: no socket server point defined, cannot "
+ "start fcgi frontend" << dendl;
+ return;
+ }
+ }
+
+ if (!socket_path.empty()) {
+ string path_str = socket_path;
+
+ /* this is necessary, as FCGX_OpenSocket might not return an
+ * error, but rather ungracefully exit */
+ int fd = open(path_str.c_str(), O_CREAT, 0644);
+ if (fd < 0) {
+ int err = errno;
+ /* ENXIO is actually expected, we'll get that if we try to open
+ * a unix domain socket */
+ if (err != ENXIO) {
+ dout(0) << "ERROR: cannot create socket: path=" << path_str
+ << " error=" << cpp_strerror(err) << dendl;
+ return;
+ }
+ } else {
+ close(fd);
+ }
+
+ const char *path = path_str.c_str();
+ sock_fd = FCGX_OpenSocket(path, socket_backlog);
+ if (sock_fd < 0) {
+ dout(0) << "ERROR: FCGX_OpenSocket (" << path << ") returned "
+ << sock_fd << dendl;
+ return;
+ }
+ if (chmod(path, 0777) < 0) {
+ dout(0) << "WARNING: couldn't set permissions on unix domain socket"
+ << dendl;
+ }
+ } else if (!socket_port.empty()) {
+ string bind = socket_host + ":" + socket_port;
+ sock_fd = FCGX_OpenSocket(bind.c_str(), socket_backlog);
+ if (sock_fd < 0) {
+ dout(0) << "ERROR: FCGX_OpenSocket (" << bind.c_str() << ") returned "
+ << sock_fd << dendl;
+ return;
+ }
+ }
+
+ m_tp.start();
+
+ FCGX_Request fcgx_reqs[max_connections];
+
+ QueueRing<FCGX_Request*> qr(max_connections);
+ for (int i = 0; i < max_connections; i++) {
+ FCGX_Request* fcgx = &fcgx_reqs[i];
+ FCGX_InitRequest(fcgx, sock_fd, 0);
+ qr.enqueue(fcgx);
+ }
+
+ for (;;) {
+ RGWFCGXRequest* req = new RGWFCGXRequest(store->get_new_req_id(), &qr);
+ dout(10) << "allocated request req=" << hex << req << dec << dendl;
+ req_throttle.get(1);
+ int ret = FCGX_Accept_r(req->fcgx);
+ if (ret < 0) {
+ delete req;
+ dout(0) << "ERROR: FCGX_Accept_r returned " << ret << dendl;
+ req_throttle.put(1);
+ break;
+ }
+ req_wq.queue(req);
+ }
+
+ m_tp.drain(&req_wq);
+ m_tp.stop();
+
+ dout(20) << "cleaning up fcgx connections" << dendl;
+
+ for (int i = 0; i < max_connections; i++) {
+ FCGX_Finish_r(&fcgx_reqs[i]);
+ }
+} /* RGWFCGXProcess::run */
+
+void RGWFCGXProcess::handle_request(RGWRequest* r)
+{
+ RGWFCGXRequest* const req = static_cast<RGWFCGXRequest*>(r);
+
+ RGWFCGX fcgxfe(req->fcgx);
+ auto real_client_io = rgw::io::add_reordering(
+ rgw::io::add_buffering(cct,
+ rgw::io::add_chunking(
+ &fcgxfe)));
+ RGWRestfulIO client_io(cct, &real_client_io);
+
+
+ int ret = process_request(store, rest, req, uri_prefix,
+ *auth_registry, &client_io, olog,
+ null_yield, nullptr);
+ if (ret < 0) {
+ /* we don't really care about return code */
+ dout(20) << "process_request() returned " << ret << dendl;
+ }
+
+ FCGX_Finish_r(req->fcgx);
+
+ delete req;
+} /* RGWFCGXProcess::handle_request */
diff --git a/src/rgw/rgw_file.cc b/src/rgw/rgw_file.cc
new file mode 100644
index 00000000..5ccc01c3
--- /dev/null
+++ b/src/rgw/rgw_file.cc
@@ -0,0 +1,2436 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "include/compat.h"
+#include "include/rados/rgw_file.h"
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "rgw_lib.h"
+#include "rgw_rados.h"
+#include "rgw_resolve.h"
+#include "rgw_op.h"
+#include "rgw_rest.h"
+#include "rgw_acl.h"
+#include "rgw_acl_s3.h"
+#include "rgw_frontend.h"
+#include "rgw_request.h"
+#include "rgw_process.h"
+#include "rgw_rest_user.h"
+#include "rgw_rest_s3.h"
+#include "rgw_os_lib.h"
+#include "rgw_auth_s3.h"
+#include "rgw_user.h"
+#include "rgw_bucket.h"
+#include "rgw_zone.h"
+#include "rgw_file.h"
+#include "rgw_lib_frontend.h"
+#include "rgw_perf_counters.h"
+#include "common/errno.h"
+
+#include <atomic>
+
+#define dout_subsys ceph_subsys_rgw
+
+using namespace rgw;
+
+namespace rgw {
+
+ extern RGWLib rgwlib;
+
+ const string RGWFileHandle::root_name = "/";
+
+ std::atomic<uint32_t> RGWLibFS::fs_inst_counter;
+
+ uint32_t RGWLibFS::write_completion_interval_s = 10;
+
+ ceph::timer<ceph::mono_clock> RGWLibFS::write_timer{
+ ceph::construct_suspended};
+
+ inline int valid_fs_bucket_name(const string& name) {
+ int rc = valid_s3_bucket_name(name, false /* relaxed */);
+ if (rc != 0) {
+ if (name.size() > 255)
+ return -ENAMETOOLONG;
+ return -EINVAL;
+ }
+ return 0;
+ }
+
+ inline int valid_fs_object_name(const string& name) {
+ int rc = valid_s3_object_name(name);
+ if (rc != 0) {
+ if (name.size() > 1024)
+ return -ENAMETOOLONG;
+ return -EINVAL;
+ }
+ return 0;
+ }
+
+ LookupFHResult RGWLibFS::stat_bucket(RGWFileHandle* parent, const char *path,
+ RGWLibFS::BucketStats& bs,
+ uint32_t flags)
+ {
+ LookupFHResult fhr{nullptr, 0};
+ std::string bucket_name{path};
+ RGWStatBucketRequest req(cct, get_user(), bucket_name, bs);
+
+ int rc = rgwlib.get_fe()->execute_req(&req);
+ if ((rc == 0) &&
+ (req.get_ret() == 0) &&
+ (req.matched())) {
+ fhr = lookup_fh(parent, path,
+ (flags & RGWFileHandle::FLAG_LOCKED)|
+ RGWFileHandle::FLAG_CREATE|
+ RGWFileHandle::FLAG_BUCKET);
+ if (get<0>(fhr)) {
+ RGWFileHandle* rgw_fh = get<0>(fhr);
+ if (! (flags & RGWFileHandle::FLAG_LOCKED)) {
+ rgw_fh->mtx.lock();
+ }
+ rgw_fh->set_times(req.get_ctime());
+ /* restore attributes */
+ auto ux_key = req.get_attr(RGW_ATTR_UNIX_KEY1);
+ auto ux_attrs = req.get_attr(RGW_ATTR_UNIX1);
+ if (ux_key && ux_attrs) {
+ DecodeAttrsResult dar = rgw_fh->decode_attrs(ux_key, ux_attrs);
+ if (get<0>(dar) || get<1>(dar)) {
+ update_fh(rgw_fh);
+ }
+ }
+ if (! (flags & RGWFileHandle::FLAG_LOCKED)) {
+ rgw_fh->mtx.unlock();
+ }
+ }
+ }
+ return fhr;
+ }
+
+ LookupFHResult RGWLibFS::fake_leaf(RGWFileHandle* parent,
+ const char *path,
+ enum rgw_fh_type type,
+ struct stat *st, uint32_t st_mask,
+ uint32_t flags)
+ {
+ /* synthesize a minimal handle from parent, path, type, and st */
+ using std::get;
+
+ flags |= RGWFileHandle::FLAG_CREATE;
+
+ switch (type) {
+ case RGW_FS_TYPE_DIRECTORY:
+ flags |= RGWFileHandle::FLAG_DIRECTORY;
+ break;
+ default:
+ /* file */
+ break;
+ };
+
+ LookupFHResult fhr = lookup_fh(parent, path, flags);
+ if (get<0>(fhr)) {
+ RGWFileHandle* rgw_fh = get<0>(fhr);
+ if (st) {
+ lock_guard guard(rgw_fh->mtx);
+ if (st_mask & RGW_SETATTR_SIZE) {
+ rgw_fh->set_size(st->st_size);
+ }
+ if (st_mask & RGW_SETATTR_MTIME) {
+ rgw_fh->set_times(st->st_mtim);
+ }
+ } /* st */
+ } /* rgw_fh */
+ return fhr;
+ } /* RGWLibFS::fake_leaf */
+
+ LookupFHResult RGWLibFS::stat_leaf(RGWFileHandle* parent,
+ const char *path,
+ enum rgw_fh_type type,
+ uint32_t flags)
+ {
+ /* find either-of <object_name>, <object_name/>, only one of
+ * which should exist; atomicity? */
+ using std::get;
+
+ LookupFHResult fhr{nullptr, 0};
+
+ /* XXX the need for two round-trip operations to identify file or
+ * directory leaf objects is unecessary--the current proposed
+ * mechanism to avoid this is to store leaf object names with an
+ * object locator w/o trailing slash */
+
+ std::string obj_path = parent->format_child_name(path, false);
+
+ for (auto ix : { 0, 1, 2 }) {
+ switch (ix) {
+ case 0:
+ {
+ /* type hint */
+ if (type == RGW_FS_TYPE_DIRECTORY)
+ continue;
+
+ RGWStatObjRequest req(cct, get_user(),
+ parent->bucket_name(), obj_path,
+ RGWStatObjRequest::FLAG_NONE);
+ int rc = rgwlib.get_fe()->execute_req(&req);
+ if ((rc == 0) &&
+ (req.get_ret() == 0)) {
+ fhr = lookup_fh(parent, path, RGWFileHandle::FLAG_CREATE);
+ if (get<0>(fhr)) {
+ RGWFileHandle* rgw_fh = get<0>(fhr);
+ lock_guard guard(rgw_fh->mtx);
+ rgw_fh->set_size(req.get_size());
+ rgw_fh->set_times(req.get_mtime());
+ /* restore attributes */
+ auto ux_key = req.get_attr(RGW_ATTR_UNIX_KEY1);
+ auto ux_attrs = req.get_attr(RGW_ATTR_UNIX1);
+ rgw_fh->set_etag(*(req.get_attr(RGW_ATTR_ETAG)));
+ rgw_fh->set_acls(*(req.get_attr(RGW_ATTR_ACL)));
+ if (ux_key && ux_attrs) {
+ DecodeAttrsResult dar = rgw_fh->decode_attrs(ux_key, ux_attrs);
+ if (get<0>(dar) || get<1>(dar)) {
+ update_fh(rgw_fh);
+ }
+ }
+ }
+ goto done;
+ }
+ }
+ break;
+ case 1:
+ {
+ /* try dir form */
+ /* type hint */
+ if (type == RGW_FS_TYPE_FILE)
+ continue;
+
+ obj_path += "/";
+ RGWStatObjRequest req(cct, get_user(),
+ parent->bucket_name(), obj_path,
+ RGWStatObjRequest::FLAG_NONE);
+ int rc = rgwlib.get_fe()->execute_req(&req);
+ if ((rc == 0) &&
+ (req.get_ret() == 0)) {
+ fhr = lookup_fh(parent, path, RGWFileHandle::FLAG_DIRECTORY);
+ if (get<0>(fhr)) {
+ RGWFileHandle* rgw_fh = get<0>(fhr);
+ lock_guard guard(rgw_fh->mtx);
+ rgw_fh->set_size(req.get_size());
+ rgw_fh->set_times(req.get_mtime());
+ /* restore attributes */
+ auto ux_key = req.get_attr(RGW_ATTR_UNIX_KEY1);
+ auto ux_attrs = req.get_attr(RGW_ATTR_UNIX1);
+ rgw_fh->set_etag(*(req.get_attr(RGW_ATTR_ETAG)));
+ rgw_fh->set_acls(*(req.get_attr(RGW_ATTR_ACL)));
+ if (ux_key && ux_attrs) {
+ DecodeAttrsResult dar = rgw_fh->decode_attrs(ux_key, ux_attrs);
+ if (get<0>(dar) || get<1>(dar)) {
+ update_fh(rgw_fh);
+ }
+ }
+ }
+ goto done;
+ }
+ }
+ break;
+ case 2:
+ {
+ std::string object_name{path};
+ RGWStatLeafRequest req(cct, get_user(), parent, object_name);
+ int rc = rgwlib.get_fe()->execute_req(&req);
+ if ((rc == 0) &&
+ (req.get_ret() == 0)) {
+ if (req.matched) {
+ /* we need rgw object's key name equal to file name, if
+ * not return NULL */
+ if ((flags & RGWFileHandle::FLAG_EXACT_MATCH) &&
+ !req.exact_matched) {
+ lsubdout(get_context(), rgw, 15)
+ << __func__
+ << ": stat leaf not exact match file name = "
+ << path << dendl;
+ goto done;
+ }
+ fhr = lookup_fh(parent, path,
+ RGWFileHandle::FLAG_CREATE|
+ ((req.is_dir) ?
+ RGWFileHandle::FLAG_DIRECTORY :
+ RGWFileHandle::FLAG_NONE));
+ /* XXX we don't have an object--in general, there need not
+ * be one (just a path segment in some other object). In
+ * actual leaf an object exists, but we'd need another round
+ * trip to get attrs */
+ if (get<0>(fhr)) {
+ /* for now use the parent object's mtime */
+ RGWFileHandle* rgw_fh = get<0>(fhr);
+ lock_guard guard(rgw_fh->mtx);
+ rgw_fh->set_mtime(parent->get_mtime());
+ }
+ }
+ }
+ }
+ break;
+ default:
+ /* not reached */
+ break;
+ }
+ }
+ done:
+ return fhr;
+ } /* RGWLibFS::stat_leaf */
+
+ int RGWLibFS::read(RGWFileHandle* rgw_fh, uint64_t offset, size_t length,
+ size_t* bytes_read, void* buffer, uint32_t flags)
+ {
+ if (! rgw_fh->is_file())
+ return -EINVAL;
+
+ if (rgw_fh->deleted())
+ return -ESTALE;
+
+ RGWReadRequest req(get_context(), get_user(), rgw_fh, offset, length,
+ buffer);
+
+ int rc = rgwlib.get_fe()->execute_req(&req);
+ if ((rc == 0) &&
+ (req.get_ret() == 0)) {
+ lock_guard guard(rgw_fh->mtx);
+ rgw_fh->set_atime(real_clock::to_timespec(real_clock::now()));
+ *bytes_read = req.nread;
+ }
+
+ return rc;
+ }
+
+ int RGWLibFS::readlink(RGWFileHandle* rgw_fh, uint64_t offset, size_t length,
+ size_t* bytes_read, void* buffer, uint32_t flags)
+ {
+ if (! rgw_fh->is_link())
+ return -EINVAL;
+
+ if (rgw_fh->deleted())
+ return -ESTALE;
+
+ RGWReadRequest req(get_context(), get_user(), rgw_fh, offset, length,
+ buffer);
+
+ int rc = rgwlib.get_fe()->execute_req(&req);
+ if ((rc == 0) &&
+ (req.get_ret() == 0)) {
+ lock_guard(rgw_fh->mtx);
+ rgw_fh->set_atime(real_clock::to_timespec(real_clock::now()));
+ *bytes_read = req.nread;
+ }
+
+ return rc;
+ }
+
+ int RGWLibFS::unlink(RGWFileHandle* rgw_fh, const char* name, uint32_t flags)
+ {
+ int rc = 0;
+ BucketStats bs;
+ RGWFileHandle* parent = nullptr;
+ RGWFileHandle* bkt_fh = nullptr;
+
+ if (unlikely(flags & RGWFileHandle::FLAG_UNLINK_THIS)) {
+ /* LOCKED */
+ parent = rgw_fh->get_parent();
+ } else {
+ /* atomicity */
+ parent = rgw_fh;
+ LookupFHResult fhr = lookup_fh(parent, name, RGWFileHandle::FLAG_LOCK);
+ rgw_fh = get<0>(fhr);
+ /* LOCKED */
+ }
+
+ if (parent->is_root()) {
+ /* a bucket may have an object storing Unix attributes, check
+ * for and delete it */
+ LookupFHResult fhr;
+ fhr = stat_bucket(parent, name, bs, (rgw_fh) ?
+ RGWFileHandle::FLAG_LOCKED :
+ RGWFileHandle::FLAG_NONE);
+ bkt_fh = get<0>(fhr);
+ if (unlikely(! bkt_fh)) {
+ /* implies !rgw_fh, so also !LOCKED */
+ return -ENOENT;
+ }
+
+ if (bs.num_entries > 1) {
+ unref(bkt_fh); /* return stat_bucket ref */
+ if (likely(!! rgw_fh)) { /* return lock and ref from
+ * lookup_fh (or caller in the
+ * special case of
+ * RGWFileHandle::FLAG_UNLINK_THIS) */
+ rgw_fh->mtx.unlock();
+ unref(rgw_fh);
+ }
+ return -ENOTEMPTY;
+ } else {
+ /* delete object w/key "<bucket>/" (uxattrs), if any */
+ string oname{"/"};
+ RGWDeleteObjRequest req(cct, get_user(), bkt_fh->bucket_name(), oname);
+ rc = rgwlib.get_fe()->execute_req(&req);
+ /* don't care if ENOENT */
+ unref(bkt_fh);
+ }
+
+ string bname{name};
+ RGWDeleteBucketRequest req(cct, get_user(), bname);
+ rc = rgwlib.get_fe()->execute_req(&req);
+ if (! rc) {
+ rc = req.get_ret();
+ }
+ } else {
+ /*
+ * leaf object
+ */
+ if (! rgw_fh) {
+ /* XXX for now, peform a hard lookup to deduce the type of
+ * object to be deleted ("foo" vs. "foo/")--also, ensures
+ * atomicity at this endpoint */
+ struct rgw_file_handle *fh;
+ rc = rgw_lookup(get_fs(), parent->get_fh(), name, &fh,
+ nullptr /* st */, 0 /* mask */,
+ RGW_LOOKUP_FLAG_NONE);
+ if (!! rc)
+ return rc;
+
+ /* rgw_fh ref+ */
+ rgw_fh = get_rgwfh(fh);
+ rgw_fh->mtx.lock(); /* LOCKED */
+ }
+
+ std::string oname = rgw_fh->relative_object_name();
+ if (rgw_fh->is_dir()) {
+ /* for the duration of our cache timer, trust positive
+ * child cache */
+ if (rgw_fh->has_children()) {
+ rgw_fh->mtx.unlock();
+ unref(rgw_fh);
+ return(-ENOTEMPTY);
+ }
+ oname += "/";
+ }
+ RGWDeleteObjRequest req(cct, get_user(), parent->bucket_name(),
+ oname);
+ rc = rgwlib.get_fe()->execute_req(&req);
+ if (! rc) {
+ rc = req.get_ret();
+ }
+ }
+
+ /* ENOENT when raced with other s3 gateway */
+ if (! rc || rc == -ENOENT) {
+ rgw_fh->flags |= RGWFileHandle::FLAG_DELETED;
+ fh_cache.remove(rgw_fh->fh.fh_hk.object, rgw_fh,
+ RGWFileHandle::FHCache::FLAG_LOCK);
+ }
+
+ if (! rc) {
+ real_time t = real_clock::now();
+ parent->set_mtime(real_clock::to_timespec(t));
+ parent->set_ctime(real_clock::to_timespec(t));
+ }
+
+ rgw_fh->mtx.unlock();
+ unref(rgw_fh);
+
+ return rc;
+ } /* RGWLibFS::unlink */
+
+ int RGWLibFS::rename(RGWFileHandle* src_fh, RGWFileHandle* dst_fh,
+ const char *_src_name, const char *_dst_name)
+
+ {
+ /* XXX initial implementation: try-copy, and delete if copy
+ * succeeds */
+ int rc = -EINVAL;
+
+ real_time t;
+
+ std::string src_name{_src_name};
+ std::string dst_name{_dst_name};
+
+ /* atomicity */
+ LookupFHResult fhr = lookup_fh(src_fh, _src_name, RGWFileHandle::FLAG_LOCK);
+ RGWFileHandle* rgw_fh = get<0>(fhr);
+
+ /* should not happen */
+ if (! rgw_fh) {
+ ldout(get_context(), 0) << __func__
+ << " BUG no such src renaming path="
+ << src_name
+ << dendl;
+ goto out;
+ }
+
+ /* forbid renaming of directories (unreasonable at scale) */
+ if (rgw_fh->is_dir()) {
+ ldout(get_context(), 12) << __func__
+ << " rejecting attempt to rename directory path="
+ << rgw_fh->full_object_name()
+ << dendl;
+ rc = -EPERM;
+ goto unlock;
+ }
+
+ /* forbid renaming open files (violates intent, for now) */
+ if (rgw_fh->is_open()) {
+ ldout(get_context(), 12) << __func__
+ << " rejecting attempt to rename open file path="
+ << rgw_fh->full_object_name()
+ << dendl;
+ rc = -EPERM;
+ goto unlock;
+ }
+
+ t = real_clock::now();
+
+ for (int ix : {0, 1}) {
+ switch (ix) {
+ case 0:
+ {
+ RGWCopyObjRequest req(cct, get_user(), src_fh, dst_fh, src_name,
+ dst_name);
+ int rc = rgwlib.get_fe()->execute_req(&req);
+ if ((rc != 0) ||
+ ((rc = req.get_ret()) != 0)) {
+ ldout(get_context(), 1)
+ << __func__
+ << " rename step 0 failed src="
+ << src_fh->full_object_name() << " " << src_name
+ << " dst=" << dst_fh->full_object_name()
+ << " " << dst_name
+ << "rc " << rc
+ << dendl;
+ goto unlock;
+ }
+ ldout(get_context(), 12)
+ << __func__
+ << " rename step 0 success src="
+ << src_fh->full_object_name() << " " << src_name
+ << " dst=" << dst_fh->full_object_name()
+ << " " << dst_name
+ << " rc " << rc
+ << dendl;
+ /* update dst change id */
+ dst_fh->set_times(t);
+ }
+ break;
+ case 1:
+ {
+ rc = this->unlink(rgw_fh /* LOCKED */, _src_name,
+ RGWFileHandle::FLAG_UNLINK_THIS);
+ /* !LOCKED, -ref */
+ if (! rc) {
+ ldout(get_context(), 12)
+ << __func__
+ << " rename step 1 success src="
+ << src_fh->full_object_name() << " " << src_name
+ << " dst=" << dst_fh->full_object_name()
+ << " " << dst_name
+ << " rc " << rc
+ << dendl;
+ /* update src change id */
+ src_fh->set_times(t);
+ } else {
+ ldout(get_context(), 1)
+ << __func__
+ << " rename step 1 failed src="
+ << src_fh->full_object_name() << " " << src_name
+ << " dst=" << dst_fh->full_object_name()
+ << " " << dst_name
+ << " rc " << rc
+ << dendl;
+ }
+ }
+ goto out;
+ default:
+ ceph_abort();
+ } /* switch */
+ } /* ix */
+ unlock:
+ rgw_fh->mtx.unlock(); /* !LOCKED */
+ unref(rgw_fh); /* -ref */
+
+ out:
+ return rc;
+ } /* RGWLibFS::rename */
+
+ MkObjResult RGWLibFS::mkdir(RGWFileHandle* parent, const char *name,
+ struct stat *st, uint32_t mask, uint32_t flags)
+ {
+ int rc, rc2;
+ rgw_file_handle *lfh;
+
+ rc = rgw_lookup(get_fs(), parent->get_fh(), name, &lfh,
+ nullptr /* st */, 0 /* mask */,
+ RGW_LOOKUP_FLAG_NONE);
+ if (! rc) {
+ /* conflict! */
+ rc = rgw_fh_rele(get_fs(), lfh, RGW_FH_RELE_FLAG_NONE);
+ return MkObjResult{nullptr, -EEXIST};
+ }
+
+ MkObjResult mkr{nullptr, -EINVAL};
+ LookupFHResult fhr;
+ RGWFileHandle* rgw_fh = nullptr;
+ buffer::list ux_key, ux_attrs;
+
+ fhr = lookup_fh(parent, name,
+ RGWFileHandle::FLAG_CREATE|
+ RGWFileHandle::FLAG_DIRECTORY|
+ RGWFileHandle::FLAG_LOCK);
+ rgw_fh = get<0>(fhr);
+ if (rgw_fh) {
+ rgw_fh->create_stat(st, mask);
+ rgw_fh->set_times(real_clock::now());
+ /* save attrs */
+ rgw_fh->encode_attrs(ux_key, ux_attrs);
+ if (st)
+ rgw_fh->stat(st, RGWFileHandle::FLAG_LOCKED);
+ get<0>(mkr) = rgw_fh;
+ } else {
+ get<1>(mkr) = -EIO;
+ return mkr;
+ }
+
+ if (parent->is_root()) {
+ /* bucket */
+ string bname{name};
+ /* enforce S3 name restrictions */
+ rc = valid_fs_bucket_name(bname);
+ if (rc != 0) {
+ rgw_fh->flags |= RGWFileHandle::FLAG_DELETED;
+ fh_cache.remove(rgw_fh->fh.fh_hk.object, rgw_fh,
+ RGWFileHandle::FHCache::FLAG_LOCK);
+ rgw_fh->mtx.unlock();
+ unref(rgw_fh);
+ get<0>(mkr) = nullptr;
+ get<1>(mkr) = rc;
+ return mkr;
+ }
+
+ RGWCreateBucketRequest req(get_context(), get_user(), bname);
+
+ /* save attrs */
+ req.emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
+ req.emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
+
+ rc = rgwlib.get_fe()->execute_req(&req);
+ rc2 = req.get_ret();
+ } else {
+ /* create an object representing the directory */
+ buffer::list bl;
+ string dir_name = parent->format_child_name(name, true);
+
+ /* need valid S3 name (characters, length <= 1024, etc) */
+ rc = valid_fs_object_name(dir_name);
+ if (rc != 0) {
+ rgw_fh->flags |= RGWFileHandle::FLAG_DELETED;
+ fh_cache.remove(rgw_fh->fh.fh_hk.object, rgw_fh,
+ RGWFileHandle::FHCache::FLAG_LOCK);
+ rgw_fh->mtx.unlock();
+ unref(rgw_fh);
+ get<0>(mkr) = nullptr;
+ get<1>(mkr) = rc;
+ return mkr;
+ }
+
+ RGWPutObjRequest req(get_context(), get_user(), parent->bucket_name(),
+ dir_name, bl);
+
+ /* save attrs */
+ req.emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
+ req.emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
+
+ rc = rgwlib.get_fe()->execute_req(&req);
+ rc2 = req.get_ret();
+ }
+
+ if (! ((rc == 0) &&
+ (rc2 == 0))) {
+ /* op failed */
+ rgw_fh->flags |= RGWFileHandle::FLAG_DELETED;
+ rgw_fh->mtx.unlock(); /* !LOCKED */
+ unref(rgw_fh);
+ get<0>(mkr) = nullptr;
+ /* fixup rc */
+ if (!rc)
+ rc = rc2;
+ } else {
+ real_time t = real_clock::now();
+ parent->set_mtime(real_clock::to_timespec(t));
+ parent->set_ctime(real_clock::to_timespec(t));
+ rgw_fh->mtx.unlock(); /* !LOCKED */
+ }
+
+ get<1>(mkr) = rc;
+
+ return mkr;
+ } /* RGWLibFS::mkdir */
+
+ MkObjResult RGWLibFS::create(RGWFileHandle* parent, const char *name,
+ struct stat *st, uint32_t mask, uint32_t flags)
+ {
+ int rc, rc2;
+
+ using std::get;
+
+ rgw_file_handle *lfh;
+ rc = rgw_lookup(get_fs(), parent->get_fh(), name, &lfh,
+ nullptr /* st */, 0 /* mask */,
+ RGW_LOOKUP_FLAG_NONE);
+ if (! rc) {
+ /* conflict! */
+ rc = rgw_fh_rele(get_fs(), lfh, RGW_FH_RELE_FLAG_NONE);
+ return MkObjResult{nullptr, -EEXIST};
+ }
+
+ /* expand and check name */
+ std::string obj_name = parent->format_child_name(name, false);
+ rc = valid_fs_object_name(obj_name);
+ if (rc != 0) {
+ return MkObjResult{nullptr, rc};
+ }
+
+ /* create it */
+ buffer::list bl;
+ RGWPutObjRequest req(cct, get_user(), parent->bucket_name(), obj_name, bl);
+ MkObjResult mkr{nullptr, -EINVAL};
+
+ rc = rgwlib.get_fe()->execute_req(&req);
+ rc2 = req.get_ret();
+
+ if ((rc == 0) &&
+ (rc2 == 0)) {
+ /* XXX atomicity */
+ LookupFHResult fhr = lookup_fh(parent, name, RGWFileHandle::FLAG_CREATE |
+ RGWFileHandle::FLAG_LOCK);
+ RGWFileHandle* rgw_fh = get<0>(fhr);
+ if (rgw_fh) {
+ if (get<1>(fhr) & RGWFileHandle::FLAG_CREATE) {
+ /* fill in stat data */
+ real_time t = real_clock::now();
+ rgw_fh->create_stat(st, mask);
+ rgw_fh->set_times(t);
+
+ parent->set_mtime(real_clock::to_timespec(t));
+ parent->set_ctime(real_clock::to_timespec(t));
+ }
+ if (st)
+ (void) rgw_fh->stat(st, RGWFileHandle::FLAG_LOCKED);
+
+ rgw_fh->set_etag(*(req.get_attr(RGW_ATTR_ETAG)));
+ rgw_fh->set_acls(*(req.get_attr(RGW_ATTR_ACL)));
+
+ get<0>(mkr) = rgw_fh;
+ rgw_fh->mtx.unlock();
+ } else
+ rc = -EIO;
+ }
+
+ get<1>(mkr) = rc;
+
+ /* case like : quota exceed will be considered as fail too*/
+ if(rc2 < 0)
+ get<1>(mkr) = rc2;
+
+ return mkr;
+ } /* RGWLibFS::create */
+
+ MkObjResult RGWLibFS::symlink(RGWFileHandle* parent, const char *name,
+ const char* link_path, struct stat *st, uint32_t mask, uint32_t flags)
+ {
+ int rc, rc2;
+
+ using std::get;
+
+ rgw_file_handle *lfh;
+ rc = rgw_lookup(get_fs(), parent->get_fh(), name, &lfh,
+ nullptr /* st */, 0 /* mask */,
+ RGW_LOOKUP_FLAG_NONE);
+ if (! rc) {
+ /* conflict! */
+ rc = rgw_fh_rele(get_fs(), lfh, RGW_FH_RELE_FLAG_NONE);
+ return MkObjResult{nullptr, -EEXIST};
+ }
+
+ MkObjResult mkr{nullptr, -EINVAL};
+ LookupFHResult fhr;
+ RGWFileHandle* rgw_fh = nullptr;
+ buffer::list ux_key, ux_attrs;
+
+ fhr = lookup_fh(parent, name,
+ RGWFileHandle::FLAG_CREATE|
+ RGWFileHandle::FLAG_SYMBOLIC_LINK|
+ RGWFileHandle::FLAG_LOCK);
+ rgw_fh = get<0>(fhr);
+ if (rgw_fh) {
+ rgw_fh->create_stat(st, mask);
+ rgw_fh->set_times(real_clock::now());
+ /* save attrs */
+ rgw_fh->encode_attrs(ux_key, ux_attrs);
+ if (st)
+ rgw_fh->stat(st);
+ get<0>(mkr) = rgw_fh;
+ } else {
+ get<1>(mkr) = -EIO;
+ return mkr;
+ }
+
+ /* need valid S3 name (characters, length <= 1024, etc) */
+ rc = valid_fs_object_name(name);
+ if (rc != 0) {
+ rgw_fh->flags |= RGWFileHandle::FLAG_DELETED;
+ fh_cache.remove(rgw_fh->fh.fh_hk.object, rgw_fh,
+ RGWFileHandle::FHCache::FLAG_LOCK);
+ rgw_fh->mtx.unlock();
+ unref(rgw_fh);
+ get<0>(mkr) = nullptr;
+ get<1>(mkr) = rc;
+ return mkr;
+ }
+
+ string obj_name = std::string(name);
+ /* create an object representing the directory */
+ buffer::list bl;
+
+ /* XXXX */
+#if 0
+ bl.push_back(
+ buffer::create_static(len, static_cast<char*>(buffer)));
+#else
+
+ bl.push_back(
+ buffer::copy(link_path, strlen(link_path)));
+#endif
+
+ RGWPutObjRequest req(get_context(), get_user(), parent->bucket_name(),
+ obj_name, bl);
+
+ /* save attrs */
+ req.emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
+ req.emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
+
+ rc = rgwlib.get_fe()->execute_req(&req);
+ rc2 = req.get_ret();
+ if (! ((rc == 0) &&
+ (rc2 == 0))) {
+ /* op failed */
+ rgw_fh->flags |= RGWFileHandle::FLAG_DELETED;
+ rgw_fh->mtx.unlock(); /* !LOCKED */
+ unref(rgw_fh);
+ get<0>(mkr) = nullptr;
+ /* fixup rc */
+ if (!rc)
+ rc = rc2;
+ } else {
+ real_time t = real_clock::now();
+ parent->set_mtime(real_clock::to_timespec(t));
+ parent->set_ctime(real_clock::to_timespec(t));
+ rgw_fh->mtx.unlock(); /* !LOCKED */
+ }
+
+ get<1>(mkr) = rc;
+
+ return mkr;
+ } /* RGWLibFS::symlink */
+
+ int RGWLibFS::getattr(RGWFileHandle* rgw_fh, struct stat* st)
+ {
+ switch(rgw_fh->fh.fh_type) {
+ case RGW_FS_TYPE_FILE:
+ {
+ if (rgw_fh->deleted())
+ return -ESTALE;
+ }
+ break;
+ default:
+ break;
+ };
+ /* if rgw_fh is a directory, mtime will be advanced */
+ return rgw_fh->stat(st);
+ } /* RGWLibFS::getattr */
+
+ int RGWLibFS::setattr(RGWFileHandle* rgw_fh, struct stat* st, uint32_t mask,
+ uint32_t flags)
+ {
+ int rc, rc2;
+ buffer::list ux_key, ux_attrs;
+ buffer::list etag = rgw_fh->get_etag();
+ buffer::list acls = rgw_fh->get_acls();
+
+ lock_guard guard(rgw_fh->mtx);
+
+ switch(rgw_fh->fh.fh_type) {
+ case RGW_FS_TYPE_FILE:
+ {
+ if (rgw_fh->deleted())
+ return -ESTALE;
+ }
+ break;
+ default:
+ break;
+ };
+
+ string obj_name{rgw_fh->relative_object_name()};
+
+ if (rgw_fh->is_dir() &&
+ (likely(! rgw_fh->is_bucket()))) {
+ obj_name += "/";
+ }
+
+ RGWSetAttrsRequest req(cct, get_user(), rgw_fh->bucket_name(), obj_name);
+
+ rgw_fh->create_stat(st, mask);
+ rgw_fh->encode_attrs(ux_key, ux_attrs);
+
+ /* save attrs */
+ req.emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
+ req.emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
+ req.emplace_attr(RGW_ATTR_ETAG, std::move(etag));
+ req.emplace_attr(RGW_ATTR_ACL, std::move(acls));
+
+ rc = rgwlib.get_fe()->execute_req(&req);
+ rc2 = req.get_ret();
+
+ if (rc == -ENOENT) {
+ /* special case: materialize placeholder dir */
+ buffer::list bl;
+ RGWPutObjRequest req(get_context(), get_user(), rgw_fh->bucket_name(),
+ obj_name, bl);
+
+ rgw_fh->encode_attrs(ux_key, ux_attrs); /* because std::moved */
+
+ /* save attrs */
+ req.emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
+ req.emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
+
+ rc = rgwlib.get_fe()->execute_req(&req);
+ rc2 = req.get_ret();
+ }
+
+ if ((rc != 0) || (rc2 != 0)) {
+ return -EIO;
+ }
+
+ rgw_fh->set_ctime(real_clock::to_timespec(real_clock::now()));
+
+ return 0;
+ } /* RGWLibFS::setattr */
+
+ /* called under rgw_fh->mtx held */
+ void RGWLibFS::update_fh(RGWFileHandle *rgw_fh)
+ {
+ int rc, rc2;
+ string obj_name{rgw_fh->relative_object_name()};
+ buffer::list ux_key, ux_attrs;
+
+ if (rgw_fh->is_dir() &&
+ (likely(! rgw_fh->is_bucket()))) {
+ obj_name += "/";
+ }
+
+ lsubdout(get_context(), rgw, 17)
+ << __func__
+ << " update old versioned fh : " << obj_name
+ << dendl;
+
+ RGWSetAttrsRequest req(cct, get_user(), rgw_fh->bucket_name(), obj_name);
+
+ rgw_fh->encode_attrs(ux_key, ux_attrs);
+
+ req.emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
+ req.emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
+
+ rc = rgwlib.get_fe()->execute_req(&req);
+ rc2 = req.get_ret();
+
+ if ((rc != 0) || (rc2 != 0)) {
+ lsubdout(get_context(), rgw, 17)
+ << __func__
+ << " update fh failed : " << obj_name
+ << dendl;
+ }
+ } /* RGWLibFS::update_fh */
+
+ void RGWLibFS::close()
+ {
+ state.flags |= FLAG_CLOSED;
+
+ class ObjUnref
+ {
+ RGWLibFS* fs;
+ public:
+ explicit ObjUnref(RGWLibFS* _fs) : fs(_fs) {}
+ void operator()(RGWFileHandle* fh) const {
+ lsubdout(fs->get_context(), rgw, 5)
+ << __func__
+ << fh->name
+ << " before ObjUnref refs=" << fh->get_refcnt()
+ << dendl;
+ fs->unref(fh);
+ }
+ };
+
+ /* force cache drain, forces objects to evict */
+ fh_cache.drain(ObjUnref(this),
+ RGWFileHandle::FHCache::FLAG_LOCK);
+ rgwlib.get_fe()->get_process()->unregister_fs(this);
+ rele();
+ } /* RGWLibFS::close */
+
+ inline std::ostream& operator<<(std::ostream &os, fh_key const &fhk) {
+ os << "<fh_key: bucket=";
+ os << fhk.fh_hk.bucket;
+ os << "; object=";
+ os << fhk.fh_hk.object;
+ os << ">";
+ return os;
+ }
+
+ inline std::ostream& operator<<(std::ostream &os, struct timespec const &ts) {
+ os << "<timespec: tv_sec=";
+ os << ts.tv_sec;
+ os << "; tv_nsec=";
+ os << ts.tv_nsec;
+ os << ">";
+ return os;
+ }
+
+ std::ostream& operator<<(std::ostream &os, RGWLibFS::event const &ev) {
+ os << "<event:";
+ switch (ev.t) {
+ case RGWLibFS::event::type::READDIR:
+ os << "type=READDIR;";
+ break;
+ default:
+ os << "type=UNKNOWN;";
+ break;
+ };
+ os << "fid=" << ev.fhk.fh_hk.bucket << ":" << ev.fhk.fh_hk.object
+ << ";ts=" << ev.ts << ">";
+ return os;
+ }
+
+ void RGWLibFS::gc()
+ {
+ using std::get;
+ using directory = RGWFileHandle::directory;
+
+ /* dirent invalidate timeout--basically, the upper-bound on
+ * inconsistency with the S3 namespace */
+ auto expire_s
+ = get_context()->_conf->rgw_nfs_namespace_expire_secs;
+
+ /* max events to gc in one cycle */
+ uint32_t max_ev = get_context()->_conf->rgw_nfs_max_gc;
+
+ struct timespec now, expire_ts;
+ event_vector ve;
+ bool stop = false;
+ std::deque<event> &events = state.events;
+
+ do {
+ (void) clock_gettime(CLOCK_MONOTONIC_COARSE, &now);
+ lsubdout(get_context(), rgw, 15)
+ << "GC: top of expire loop"
+ << " now=" << now
+ << " expire_s=" << expire_s
+ << dendl;
+ {
+ lock_guard guard(state.mtx); /* LOCKED */
+ lsubdout(get_context(), rgw, 15)
+ << "GC: processing"
+ << " count=" << events.size()
+ << " events"
+ << dendl;
+ /* just return if no events */
+ if (events.empty()) {
+ return;
+ }
+ uint32_t _max_ev =
+ (events.size() < 500) ? max_ev : (events.size() / 4);
+ for (uint32_t ix = 0; (ix < _max_ev) && (events.size() > 0); ++ix) {
+ event& ev = events.front();
+ expire_ts = ev.ts;
+ expire_ts.tv_sec += expire_s;
+ if (expire_ts > now) {
+ stop = true;
+ break;
+ }
+ ve.push_back(ev);
+ events.pop_front();
+ }
+ } /* anon */
+ /* !LOCKED */
+ for (auto& ev : ve) {
+ lsubdout(get_context(), rgw, 15)
+ << "try-expire ev: " << ev << dendl;
+ if (likely(ev.t == event::type::READDIR)) {
+ RGWFileHandle* rgw_fh = lookup_handle(ev.fhk.fh_hk);
+ lsubdout(get_context(), rgw, 15)
+ << "ev rgw_fh: " << rgw_fh << dendl;
+ if (rgw_fh) {
+ RGWFileHandle::directory* d;
+ if (unlikely(! rgw_fh->is_dir())) {
+ lsubdout(get_context(), rgw, 0)
+ << __func__
+ << " BUG non-directory found with READDIR event "
+ << "(" << rgw_fh->bucket_name() << ","
+ << rgw_fh->object_name() << ")"
+ << dendl;
+ goto rele;
+ }
+ /* maybe clear state */
+ d = get<directory>(&rgw_fh->variant_type);
+ if (d) {
+ struct timespec ev_ts = ev.ts;
+ lock_guard guard(rgw_fh->mtx);
+ struct timespec d_last_readdir = d->last_readdir;
+ if (unlikely(ev_ts < d_last_readdir)) {
+ /* readdir cycle in progress, don't invalidate */
+ lsubdout(get_context(), rgw, 15)
+ << "GC: delay expiration for "
+ << rgw_fh->object_name()
+ << " ev.ts=" << ev_ts
+ << " last_readdir=" << d_last_readdir
+ << dendl;
+ continue;
+ } else {
+ lsubdout(get_context(), rgw, 15)
+ << "GC: expiring "
+ << rgw_fh->object_name()
+ << dendl;
+ rgw_fh->clear_state();
+ rgw_fh->invalidate();
+ }
+ }
+ rele:
+ unref(rgw_fh);
+ } /* rgw_fh */
+ } /* event::type::READDIR */
+ } /* ev */
+ ve.clear();
+ } while (! (stop || shutdown));
+ } /* RGWLibFS::gc */
+
+ std::ostream& operator<<(std::ostream &os,
+ RGWFileHandle const &rgw_fh)
+ {
+ const auto& fhk = rgw_fh.get_key();
+ const auto& fh = const_cast<RGWFileHandle&>(rgw_fh).get_fh();
+ os << "<RGWFileHandle:";
+ os << "addr=" << &rgw_fh << ";";
+ switch (fh->fh_type) {
+ case RGW_FS_TYPE_DIRECTORY:
+ os << "type=DIRECTORY;";
+ break;
+ case RGW_FS_TYPE_FILE:
+ os << "type=FILE;";
+ break;
+ default:
+ os << "type=UNKNOWN;";
+ break;
+ };
+ os << "fid=" << fhk.fh_hk.bucket << ":" << fhk.fh_hk.object << ";";
+ os << "name=" << rgw_fh.object_name() << ";";
+ os << "refcnt=" << rgw_fh.get_refcnt() << ";";
+ os << ">";
+ return os;
+ }
+
+ RGWFileHandle::~RGWFileHandle() {
+ /* !recycle case, handle may STILL be in handle table, BUT
+ * the partition lock is not held in this path */
+ if (fh_hook.is_linked()) {
+ fs->fh_cache.remove(fh.fh_hk.object, this, FHCache::FLAG_LOCK);
+ }
+ /* cond-unref parent */
+ if (parent && (! parent->is_mount())) {
+ /* safe because if parent->unref causes its deletion,
+ * there are a) by refcnt, no other objects/paths pointing
+ * to it and b) by the semantics of valid iteration of
+ * fh_lru (observed, e.g., by cohort_lru<T,...>::drain())
+ * no unsafe iterators reaching it either--n.b., this constraint
+ * is binding oncode which may in future attempt to e.g.,
+ * cause the eviction of objects in LRU order */
+ (void) get_fs()->unref(parent);
+ }
+ }
+
+ fh_key RGWFileHandle::make_fhk(const std::string& name)
+ {
+ std::string tenant = get_fs()->get_user()->user_id.to_str();
+ if (depth == 0) {
+ /* S3 bucket -- assert mount-at-bucket case reaches here */
+ return fh_key(name, name, tenant);
+ } else {
+ std::string key_name = make_key_name(name.c_str());
+ return fh_key(fhk.fh_hk.bucket, key_name.c_str(), tenant);
+ }
+ }
+
+ void RGWFileHandle::encode_attrs(ceph::buffer::list& ux_key1,
+ ceph::buffer::list& ux_attrs1)
+ {
+ using ceph::encode;
+ fh_key fhk(this->fh.fh_hk);
+ encode(fhk, ux_key1);
+ encode(*this, ux_attrs1);
+ } /* RGWFileHandle::encode_attrs */
+
+ DecodeAttrsResult RGWFileHandle::decode_attrs(const ceph::buffer::list* ux_key1,
+ const ceph::buffer::list* ux_attrs1)
+ {
+ using ceph::decode;
+ DecodeAttrsResult dar { false, false };
+ fh_key fhk;
+ auto bl_iter_key1 = ux_key1->cbegin();
+ decode(fhk, bl_iter_key1);
+ get<0>(dar) = true;
+
+ auto bl_iter_unix1 = ux_attrs1->cbegin();
+ decode(*this, bl_iter_unix1);
+ if (this->state.version < 2) {
+ get<1>(dar) = true;
+ }
+
+ return dar;
+ } /* RGWFileHandle::decode_attrs */
+
+ bool RGWFileHandle::reclaim(const cohort::lru::ObjectFactory* newobj_fac) {
+ lsubdout(fs->get_context(), rgw, 17)
+ << __func__ << " " << *this
+ << dendl;
+ auto factory = dynamic_cast<const RGWFileHandle::Factory*>(newobj_fac);
+ if (factory == nullptr) {
+ return false;
+ }
+ /* make sure the reclaiming object is the same partiton with newobject factory,
+ * then we can recycle the object, and replace with newobject */
+ if (!fs->fh_cache.is_same_partition(factory->fhk.fh_hk.object, fh.fh_hk.object)) {
+ return false;
+ }
+ /* in the non-delete case, handle may still be in handle table */
+ if (fh_hook.is_linked()) {
+ /* in this case, we are being called from a context which holds
+ * the partition lock */
+ fs->fh_cache.remove(fh.fh_hk.object, this, FHCache::FLAG_NONE);
+ }
+ return true;
+ } /* RGWFileHandle::reclaim */
+
+ bool RGWFileHandle::has_children() const
+ {
+ if (unlikely(! is_dir()))
+ return false;
+
+ RGWRMdirCheck req(fs->get_context(), fs->get_user(), this);
+ int rc = rgwlib.get_fe()->execute_req(&req);
+ if (! rc) {
+ return req.valid && req.has_children;
+ }
+
+ return false;
+ }
+
+ std::ostream& operator<<(std::ostream &os,
+ RGWFileHandle::readdir_offset const &offset)
+ {
+ using boost::get;
+ if (unlikely(!! get<uint64_t*>(&offset))) {
+ uint64_t* ioff = get<uint64_t*>(offset);
+ os << *ioff;
+ }
+ else
+ os << get<const char*>(offset);
+ return os;
+ }
+
+ int RGWFileHandle::readdir(rgw_readdir_cb rcb, void *cb_arg,
+ readdir_offset offset,
+ bool *eof, uint32_t flags)
+ {
+ using event = RGWLibFS::event;
+ using boost::get;
+ int rc = 0;
+ struct timespec now;
+ CephContext* cct = fs->get_context();
+
+ lsubdout(cct, rgw, 10)
+ << __func__ << " readdir called on "
+ << object_name()
+ << dendl;
+
+ directory* d = get<directory>(&variant_type);
+ if (d) {
+ (void) clock_gettime(CLOCK_MONOTONIC_COARSE, &now); /* !LOCKED */
+ lock_guard guard(mtx);
+ d->last_readdir = now;
+ }
+
+ bool initial_off;
+ char* mk{nullptr};
+
+ if (likely(!! get<const char*>(&offset))) {
+ mk = const_cast<char*>(get<const char*>(offset));
+ initial_off = !mk;
+ } else {
+ initial_off = (*get<uint64_t*>(offset) == 0);
+ }
+
+ if (is_root()) {
+ RGWListBucketsRequest req(cct, fs->get_user(), this, rcb, cb_arg,
+ offset);
+ rc = rgwlib.get_fe()->execute_req(&req);
+ if (! rc) {
+ (void) clock_gettime(CLOCK_MONOTONIC_COARSE, &now); /* !LOCKED */
+ lock_guard guard(mtx);
+ state.atime = now;
+ if (initial_off)
+ set_nlink(2);
+ inc_nlink(req.d_count);
+ *eof = req.eof();
+ }
+ } else {
+ RGWReaddirRequest req(cct, fs->get_user(), this, rcb, cb_arg, offset);
+ rc = rgwlib.get_fe()->execute_req(&req);
+ if (! rc) {
+ (void) clock_gettime(CLOCK_MONOTONIC_COARSE, &now); /* !LOCKED */
+ lock_guard guard(mtx);
+ state.atime = now;
+ if (initial_off)
+ set_nlink(2);
+ inc_nlink(req.d_count);
+ *eof = req.eof();
+ }
+ }
+
+ event ev(event::type::READDIR, get_key(), state.atime);
+ lock_guard sguard(fs->state.mtx);
+ fs->state.push_event(ev);
+
+ lsubdout(fs->get_context(), rgw, 15)
+ << __func__
+ << " final link count=" << state.nlink
+ << dendl;
+
+ return rc;
+ } /* RGWFileHandle::readdir */
+
+ int RGWFileHandle::write(uint64_t off, size_t len, size_t *bytes_written,
+ void *buffer)
+ {
+ using std::get;
+ using WriteCompletion = RGWLibFS::WriteCompletion;
+
+ lock_guard guard(mtx);
+
+ int rc = 0;
+
+ file* f = get<file>(&variant_type);
+ if (! f)
+ return -EISDIR;
+
+ if (deleted()) {
+ lsubdout(fs->get_context(), rgw, 5)
+ << __func__
+ << " write attempted on deleted object "
+ << this->object_name()
+ << dendl;
+ /* zap write transaction, if any */
+ if (f->write_req) {
+ delete f->write_req;
+ f->write_req = nullptr;
+ }
+ return -ESTALE;
+ }
+
+ if (! f->write_req) {
+ /* guard--we do not support (e.g., COW-backed) partial writes */
+ if (off != 0) {
+ lsubdout(fs->get_context(), rgw, 5)
+ << __func__
+ << " " << object_name()
+ << " non-0 initial write position " << off
+ << " (mounting with -o sync required)"
+ << dendl;
+ return -EIO;
+ }
+
+ /* start */
+ std::string object_name = relative_object_name();
+ f->write_req =
+ new RGWWriteRequest(fs->get_context(), fs->get_user(), this,
+ bucket_name(), object_name);
+ rc = rgwlib.get_fe()->start_req(f->write_req);
+ if (rc < 0) {
+ lsubdout(fs->get_context(), rgw, 5)
+ << __func__
+ << this->object_name()
+ << " write start failed " << off
+ << " (" << rc << ")"
+ << dendl;
+ /* zap failed write transaction */
+ delete f->write_req;
+ f->write_req = nullptr;
+ return -EIO;
+ } else {
+ if (stateless_open()) {
+ /* start write timer */
+ f->write_req->timer_id =
+ RGWLibFS::write_timer.add_event(
+ std::chrono::seconds(RGWLibFS::write_completion_interval_s),
+ WriteCompletion(*this));
+ }
+ }
+ }
+
+ int overlap = 0;
+ if ((static_cast<off_t>(off) < f->write_req->real_ofs) &&
+ ((f->write_req->real_ofs - off) <= len)) {
+ overlap = f->write_req->real_ofs - off;
+ off = f->write_req->real_ofs;
+ buffer = static_cast<char*>(buffer) + overlap;
+ len -= overlap;
+ }
+
+ buffer::list bl;
+ /* XXXX */
+#if 0
+ bl.push_back(
+ buffer::create_static(len, static_cast<char*>(buffer)));
+#else
+ bl.push_back(
+ buffer::copy(static_cast<char*>(buffer), len));
+#endif
+
+ f->write_req->put_data(off, bl);
+ rc = f->write_req->exec_continue();
+
+ if (rc == 0) {
+ size_t min_size = off + len;
+ if (min_size > get_size())
+ set_size(min_size);
+ if (stateless_open()) {
+ /* bump write timer */
+ RGWLibFS::write_timer.adjust_event(
+ f->write_req->timer_id, std::chrono::seconds(10));
+ }
+ } else {
+ /* continuation failed (e.g., non-contiguous write position) */
+ lsubdout(fs->get_context(), rgw, 5)
+ << __func__
+ << object_name()
+ << " failed write at position " << off
+ << " (fails write transaction) "
+ << dendl;
+ /* zap failed write transaction */
+ delete f->write_req;
+ f->write_req = nullptr;
+ rc = -EIO;
+ }
+
+ *bytes_written = (rc == 0) ? (len + overlap) : 0;
+ return rc;
+ } /* RGWFileHandle::write */
+
+ int RGWFileHandle::write_finish(uint32_t flags)
+ {
+ unique_lock guard{mtx, std::defer_lock};
+ int rc = 0;
+
+ if (! (flags & FLAG_LOCKED)) {
+ guard.lock();
+ }
+
+ file* f = get<file>(&variant_type);
+ if (f && (f->write_req)) {
+ lsubdout(fs->get_context(), rgw, 10)
+ << __func__
+ << " finishing write trans on " << object_name()
+ << dendl;
+ rc = rgwlib.get_fe()->finish_req(f->write_req);
+ if (! rc) {
+ rc = f->write_req->get_ret();
+ }
+ delete f->write_req;
+ f->write_req = nullptr;
+ }
+
+ return rc;
+ } /* RGWFileHandle::write_finish */
+
+ int RGWFileHandle::close()
+ {
+ lock_guard guard(mtx);
+
+ int rc = write_finish(FLAG_LOCKED);
+
+ flags &= ~FLAG_OPEN;
+ flags &= ~FLAG_STATELESS_OPEN;
+
+ return rc;
+ } /* RGWFileHandle::close */
+
+ RGWFileHandle::file::~file()
+ {
+ delete write_req;
+ }
+
+ void RGWFileHandle::clear_state()
+ {
+ directory* d = get<directory>(&variant_type);
+ if (d) {
+ state.nlink = 2;
+ d->last_marker = rgw_obj_key{};
+ }
+ }
+
+ void RGWFileHandle::advance_mtime(uint32_t flags) {
+ /* intended for use on directories, fast-forward mtime so as to
+ * ensure a new, higher value for the change attribute */
+ unique_lock uniq(mtx, std::defer_lock);
+ if (likely(! (flags & RGWFileHandle::FLAG_LOCKED))) {
+ uniq.lock();
+ }
+
+ /* advance mtime only if stored mtime is older than the
+ * configured namespace expiration */
+ auto now = real_clock::now();
+ auto cmptime = state.mtime;
+ cmptime.tv_sec +=
+ fs->get_context()->_conf->rgw_nfs_namespace_expire_secs;
+ if (cmptime < real_clock::to_timespec(now)) {
+ /* sets ctime as well as mtime, to avoid masking updates should
+ * ctime inexplicably hold a higher value */
+ set_times(now);
+ }
+ }
+
+ void RGWFileHandle::invalidate() {
+ RGWLibFS *fs = get_fs();
+ if (fs->invalidate_cb) {
+ fs->invalidate_cb(fs->invalidate_arg, get_key().fh_hk);
+ }
+ }
+
+ int RGWWriteRequest::exec_start() {
+ struct req_state* s = get_state();
+
+ auto compression_type =
+ get_store()->svc.zone->get_zone_params().get_compression_type(
+ s->bucket_info.placement_rule);
+
+ /* not obviously supportable */
+ ceph_assert(! dlo_manifest);
+ ceph_assert(! slo_info);
+
+ perfcounter->inc(l_rgw_put);
+ op_ret = -EINVAL;
+ rgw_obj obj{s->bucket, s->object};
+
+ if (s->object.empty()) {
+ ldout(s->cct, 0) << __func__ << " called on empty object" << dendl;
+ goto done;
+ }
+
+ op_ret = get_params();
+ if (op_ret < 0)
+ goto done;
+
+ op_ret = get_system_versioning_params(s, &olh_epoch, &version_id);
+ if (op_ret < 0) {
+ goto done;
+ }
+
+ /* user-supplied MD5 check skipped (not supplied) */
+ /* early quota check skipped--we don't have size yet */
+ /* skipping user-supplied etag--we might have one in future, but
+ * like data it and other attrs would arrive after open */
+
+ aio.emplace(s->cct->_conf->rgw_put_obj_min_window_size);
+
+ if (s->bucket_info.versioning_enabled()) {
+ if (!version_id.empty()) {
+ obj.key.set_instance(version_id);
+ } else {
+ get_store()->gen_rand_obj_instance_name(&obj);
+ version_id = obj.key.instance;
+ }
+ }
+ processor.emplace(&*aio, get_store(), s->bucket_info,
+ &s->dest_placement,
+ s->bucket_owner.get_id(),
+ *static_cast<RGWObjectCtx *>(s->obj_ctx),
+ obj, olh_epoch, s->req_id);
+
+ op_ret = processor->prepare();
+ if (op_ret < 0) {
+ ldout(s->cct, 20) << "processor->prepare() returned ret=" << op_ret
+ << dendl;
+ goto done;
+ }
+ filter = &*processor;
+ if (compression_type != "none") {
+ plugin = Compressor::create(s->cct, compression_type);
+ if (! plugin) {
+ ldout(s->cct, 1) << "Cannot load plugin for rgw_compression_type "
+ << compression_type << dendl;
+ } else {
+ compressor.emplace(s->cct, plugin, filter);
+ filter = &*compressor;
+ }
+ }
+
+ done:
+ return op_ret;
+ } /* exec_start */
+
+ int RGWWriteRequest::exec_continue()
+ {
+ struct req_state* s = get_state();
+ op_ret = 0;
+
+ /* check guards (e.g., contig write) */
+ if (eio) {
+ ldout(s->cct, 5)
+ << " chunks arrived in wrong order"
+ << " (mounting with -o sync required)"
+ << dendl;
+ return -EIO;
+ }
+
+ op_ret = get_store()->check_quota(s->bucket_owner.get_id(), s->bucket,
+ user_quota, bucket_quota, real_ofs, true);
+ /* max_size exceed */
+ if (op_ret < 0)
+ return -EIO;
+
+ size_t len = data.length();
+ if (! len)
+ return 0;
+
+ hash.Update((const unsigned char *)data.c_str(), data.length());
+ op_ret = filter->process(std::move(data), ofs);
+ if (op_ret < 0) {
+ goto done;
+ }
+ bytes_written += len;
+
+ done:
+ return op_ret;
+ } /* exec_continue */
+
+ int RGWWriteRequest::exec_finish()
+ {
+ buffer::list bl, aclbl, ux_key, ux_attrs;
+ map<string, string>::iterator iter;
+ char calc_md5[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 1];
+ unsigned char m[CEPH_CRYPTO_MD5_DIGESTSIZE];
+ struct req_state* s = get_state();
+
+ size_t osize = rgw_fh->get_size();
+ struct timespec octime = rgw_fh->get_ctime();
+ struct timespec omtime = rgw_fh->get_mtime();
+ real_time appx_t = real_clock::now();
+
+ s->obj_size = bytes_written;
+ perfcounter->inc(l_rgw_put_b, s->obj_size);
+
+ // flush data in filters
+ op_ret = filter->process({}, s->obj_size);
+ if (op_ret < 0) {
+ goto done;
+ }
+
+ op_ret = get_store()->check_quota(s->bucket_owner.get_id(), s->bucket,
+ user_quota, bucket_quota, s->obj_size, true);
+ /* max_size exceed */
+ if (op_ret < 0) {
+ goto done;
+ }
+
+ op_ret = get_store()->check_bucket_shards(s->bucket_info, s->bucket,
+ bucket_quota);
+ if (op_ret < 0) {
+ goto done;
+ }
+
+ hash.Final(m);
+
+ if (compressor && compressor->is_compressed()) {
+ bufferlist tmp;
+ RGWCompressionInfo cs_info;
+ cs_info.compression_type = plugin->get_type_name();
+ cs_info.orig_size = s->obj_size;
+ cs_info.blocks = std::move(compressor->get_compression_blocks());
+ encode(cs_info, tmp);
+ attrs[RGW_ATTR_COMPRESSION] = tmp;
+ ldout(s->cct, 20) << "storing " << RGW_ATTR_COMPRESSION
+ << " with type=" << cs_info.compression_type
+ << ", orig_size=" << cs_info.orig_size
+ << ", blocks=" << cs_info.blocks.size() << dendl;
+ }
+
+ buf_to_hex(m, CEPH_CRYPTO_MD5_DIGESTSIZE, calc_md5);
+ etag = calc_md5;
+
+ bl.append(etag.c_str(), etag.size() + 1);
+ emplace_attr(RGW_ATTR_ETAG, std::move(bl));
+
+ policy.encode(aclbl);
+ emplace_attr(RGW_ATTR_ACL, std::move(aclbl));
+
+ /* unix attrs */
+ rgw_fh->set_mtime(real_clock::to_timespec(appx_t));
+ rgw_fh->set_ctime(real_clock::to_timespec(appx_t));
+ rgw_fh->set_size(bytes_written);
+ rgw_fh->encode_attrs(ux_key, ux_attrs);
+
+ emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
+ emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
+
+ for (iter = s->generic_attrs.begin(); iter != s->generic_attrs.end();
+ ++iter) {
+ buffer::list& attrbl = attrs[iter->first];
+ const string& val = iter->second;
+ attrbl.append(val.c_str(), val.size() + 1);
+ }
+
+ op_ret = rgw_get_request_metadata(s->cct, s->info, attrs);
+ if (op_ret < 0) {
+ goto done;
+ }
+ encode_delete_at_attr(delete_at, attrs);
+
+ /* Add a custom metadata to expose the information whether an object
+ * is an SLO or not. Appending the attribute must be performed AFTER
+ * processing any input from user in order to prohibit overwriting. */
+ if (unlikely(!! slo_info)) {
+ buffer::list slo_userindicator_bl;
+ using ceph::encode;
+ encode("True", slo_userindicator_bl);
+ emplace_attr(RGW_ATTR_SLO_UINDICATOR, std::move(slo_userindicator_bl));
+ }
+
+ op_ret = processor->complete(s->obj_size, etag, &mtime, real_time(), attrs,
+ (delete_at ? *delete_at : real_time()),
+ if_match, if_nomatch, nullptr, nullptr, nullptr);
+ if (op_ret != 0) {
+ /* revert attr updates */
+ rgw_fh->set_mtime(omtime);
+ rgw_fh->set_ctime(octime);
+ rgw_fh->set_size(osize);
+ }
+
+ done:
+ perfcounter->tinc(l_rgw_put_lat, s->time_elapsed());
+ return op_ret;
+ } /* exec_finish */
+
+} /* namespace rgw */
+
+/* librgw */
+extern "C" {
+
+void rgwfile_version(int *major, int *minor, int *extra)
+{
+ if (major)
+ *major = LIBRGW_FILE_VER_MAJOR;
+ if (minor)
+ *minor = LIBRGW_FILE_VER_MINOR;
+ if (extra)
+ *extra = LIBRGW_FILE_VER_EXTRA;
+}
+
+/*
+ attach rgw namespace
+*/
+ int rgw_mount(librgw_t rgw, const char *uid, const char *acc_key,
+ const char *sec_key, struct rgw_fs **rgw_fs,
+ uint32_t flags)
+{
+ int rc = 0;
+
+ /* stash access data for "mount" */
+ RGWLibFS* new_fs = new RGWLibFS(static_cast<CephContext*>(rgw), uid, acc_key,
+ sec_key, "/");
+ ceph_assert(new_fs);
+
+ rc = new_fs->authorize(rgwlib.get_store());
+ if (rc != 0) {
+ delete new_fs;
+ return -EINVAL;
+ }
+
+ /* register fs for shared gc */
+ rgwlib.get_fe()->get_process()->register_fs(new_fs);
+
+ struct rgw_fs *fs = new_fs->get_fs();
+ fs->rgw = rgw;
+
+ /* XXX we no longer assume "/" is unique, but we aren't tracking the
+ * roots atm */
+
+ *rgw_fs = fs;
+
+ return 0;
+}
+
+int rgw_mount2(librgw_t rgw, const char *uid, const char *acc_key,
+ const char *sec_key, const char *root, struct rgw_fs **rgw_fs,
+ uint32_t flags)
+{
+ int rc = 0;
+
+ /* stash access data for "mount" */
+ RGWLibFS* new_fs = new RGWLibFS(static_cast<CephContext*>(rgw), uid, acc_key,
+ sec_key, root);
+ ceph_assert(new_fs);
+
+ rc = new_fs->authorize(rgwlib.get_store());
+ if (rc != 0) {
+ delete new_fs;
+ return -EINVAL;
+ }
+
+ /* register fs for shared gc */
+ rgwlib.get_fe()->get_process()->register_fs(new_fs);
+
+ struct rgw_fs *fs = new_fs->get_fs();
+ fs->rgw = rgw;
+
+ /* XXX we no longer assume "/" is unique, but we aren't tracking the
+ * roots atm */
+
+ *rgw_fs = fs;
+
+ return 0;
+}
+
+/*
+ register invalidate callbacks
+*/
+int rgw_register_invalidate(struct rgw_fs *rgw_fs, rgw_fh_callback_t cb,
+ void *arg, uint32_t flags)
+
+{
+ RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
+ return fs->register_invalidate(cb, arg, flags);
+}
+
+/*
+ detach rgw namespace
+*/
+int rgw_umount(struct rgw_fs *rgw_fs, uint32_t flags)
+{
+ RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
+ fs->close();
+ return 0;
+}
+
+/*
+ get filesystem attributes
+*/
+int rgw_statfs(struct rgw_fs *rgw_fs,
+ struct rgw_file_handle *parent_fh,
+ struct rgw_statvfs *vfs_st, uint32_t flags)
+{
+ RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
+ struct rados_cluster_stat_t stats;
+
+ RGWGetClusterStatReq req(fs->get_context(), fs->get_user(), stats);
+ int rc = rgwlib.get_fe()->execute_req(&req);
+ if (rc < 0) {
+ lderr(fs->get_context()) << "ERROR: getting total cluster usage"
+ << cpp_strerror(-rc) << dendl;
+ return rc;
+ }
+
+ //Set block size to 1M.
+ constexpr uint32_t CEPH_BLOCK_SHIFT = 20;
+ vfs_st->f_bsize = 1 << CEPH_BLOCK_SHIFT;
+ vfs_st->f_frsize = 1 << CEPH_BLOCK_SHIFT;
+ vfs_st->f_blocks = stats.kb >> (CEPH_BLOCK_SHIFT - 10);
+ vfs_st->f_bfree = stats.kb_avail >> (CEPH_BLOCK_SHIFT - 10);
+ vfs_st->f_bavail = stats.kb_avail >> (CEPH_BLOCK_SHIFT - 10);
+ vfs_st->f_files = stats.num_objects;
+ vfs_st->f_ffree = -1;
+ vfs_st->f_fsid[0] = fs->get_fsid();
+ vfs_st->f_fsid[1] = fs->get_fsid();
+ vfs_st->f_flag = 0;
+ vfs_st->f_namemax = 4096;
+ return 0;
+}
+
+/*
+ generic create -- create an empty regular file
+*/
+int rgw_create(struct rgw_fs *rgw_fs, struct rgw_file_handle *parent_fh,
+ const char *name, struct stat *st, uint32_t mask,
+ struct rgw_file_handle **fh, uint32_t posix_flags,
+ uint32_t flags)
+{
+ using std::get;
+
+ RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
+ RGWFileHandle* parent = get_rgwfh(parent_fh);
+
+ if ((! parent) ||
+ (parent->is_root()) ||
+ (parent->is_file())) {
+ /* bad parent */
+ return -EINVAL;
+ }
+
+ MkObjResult fhr = fs->create(parent, name, st, mask, flags);
+ RGWFileHandle *nfh = get<0>(fhr); // nullptr if !success
+
+ if (nfh)
+ *fh = nfh->get_fh();
+
+ return get<1>(fhr);
+} /* rgw_create */
+
+/*
+ create a symbolic link
+ */
+int rgw_symlink(struct rgw_fs *rgw_fs, struct rgw_file_handle *parent_fh,
+ const char *name, const char *link_path, struct stat *st, uint32_t mask,
+ struct rgw_file_handle **fh, uint32_t posix_flags,
+ uint32_t flags)
+{
+ using std::get;
+
+ RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
+ RGWFileHandle* parent = get_rgwfh(parent_fh);
+
+ if ((! parent) ||
+ (parent->is_root()) ||
+ (parent->is_file())) {
+ /* bad parent */
+ return -EINVAL;
+ }
+
+ MkObjResult fhr = fs->symlink(parent, name, link_path, st, mask, flags);
+ RGWFileHandle *nfh = get<0>(fhr); // nullptr if !success
+
+ if (nfh)
+ *fh = nfh->get_fh();
+
+ return get<1>(fhr);
+} /* rgw_symlink */
+
+/*
+ create a new directory
+*/
+int rgw_mkdir(struct rgw_fs *rgw_fs,
+ struct rgw_file_handle *parent_fh,
+ const char *name, struct stat *st, uint32_t mask,
+ struct rgw_file_handle **fh, uint32_t flags)
+{
+ using std::get;
+
+ RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
+ RGWFileHandle* parent = get_rgwfh(parent_fh);
+
+ if (! parent) {
+ /* bad parent */
+ return -EINVAL;
+ }
+
+ MkObjResult fhr = fs->mkdir(parent, name, st, mask, flags);
+ RGWFileHandle *nfh = get<0>(fhr); // nullptr if !success
+
+ if (nfh)
+ *fh = nfh->get_fh();
+
+ return get<1>(fhr);
+} /* rgw_mkdir */
+
+/*
+ rename object
+*/
+int rgw_rename(struct rgw_fs *rgw_fs,
+ struct rgw_file_handle *src, const char* src_name,
+ struct rgw_file_handle *dst, const char* dst_name,
+ uint32_t flags)
+{
+ RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
+
+ RGWFileHandle* src_fh = get_rgwfh(src);
+ RGWFileHandle* dst_fh = get_rgwfh(dst);
+
+ return fs->rename(src_fh, dst_fh, src_name, dst_name);
+}
+
+/*
+ remove file or directory
+*/
+int rgw_unlink(struct rgw_fs *rgw_fs, struct rgw_file_handle *parent_fh,
+ const char *name, uint32_t flags)
+{
+ RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
+ RGWFileHandle* parent = get_rgwfh(parent_fh);
+
+ return fs->unlink(parent, name);
+}
+
+/*
+ lookup object by name (POSIX style)
+*/
+int rgw_lookup(struct rgw_fs *rgw_fs,
+ struct rgw_file_handle *parent_fh, const char* path,
+ struct rgw_file_handle **fh,
+ struct stat *st, uint32_t mask, uint32_t flags)
+{
+ //CephContext* cct = static_cast<CephContext*>(rgw_fs->rgw);
+ RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
+
+ RGWFileHandle* parent = get_rgwfh(parent_fh);
+ if ((! parent) ||
+ (! parent->is_dir())) {
+ /* bad parent */
+ return -EINVAL;
+ }
+
+ RGWFileHandle* rgw_fh;
+ LookupFHResult fhr;
+
+ if (parent->is_root()) {
+ /* special: parent lookup--note lack of ref()! */
+ if (unlikely((strcmp(path, "..") == 0) ||
+ (strcmp(path, "/") == 0))) {
+ rgw_fh = parent;
+ } else {
+ RGWLibFS::BucketStats bstat;
+ fhr = fs->stat_bucket(parent, path, bstat, RGWFileHandle::FLAG_NONE);
+ rgw_fh = get<0>(fhr);
+ if (! rgw_fh)
+ return -ENOENT;
+ }
+ } else {
+ /* special: after readdir--note extra ref()! */
+ if (unlikely((strcmp(path, "..") == 0))) {
+ rgw_fh = parent;
+ lsubdout(fs->get_context(), rgw, 17)
+ << __func__ << " BANG"<< *rgw_fh
+ << dendl;
+ fs->ref(rgw_fh);
+ } else {
+ enum rgw_fh_type fh_type = fh_type_of(flags);
+
+ uint32_t sl_flags = (flags & RGW_LOOKUP_FLAG_RCB)
+ ? RGWFileHandle::FLAG_NONE
+ : RGWFileHandle::FLAG_EXACT_MATCH;
+
+ bool fast_attrs= fs->get_context()->_conf->rgw_nfs_s3_fast_attrs;
+
+ if ((flags & RGW_LOOKUP_FLAG_RCB) && fast_attrs) {
+ /* FAKE STAT--this should mean, interpolate special
+ * owner, group, and perms masks */
+ fhr = fs->fake_leaf(parent, path, fh_type, st, mask, sl_flags);
+ } else {
+ if ((fh_type == RGW_FS_TYPE_DIRECTORY) && fast_attrs) {
+ /* trust cached dir, if present */
+ fhr = fs->lookup_fh(parent, path, RGWFileHandle::FLAG_DIRECTORY);
+ if (get<0>(fhr)) {
+ rgw_fh = get<0>(fhr);
+ goto done;
+ }
+ }
+ fhr = fs->stat_leaf(parent, path, fh_type, sl_flags);
+ }
+ if (! get<0>(fhr)) {
+ if (! (flags & RGW_LOOKUP_FLAG_CREATE))
+ return -ENOENT;
+ else
+ fhr = fs->lookup_fh(parent, path, RGWFileHandle::FLAG_CREATE);
+ }
+ rgw_fh = get<0>(fhr);
+ }
+ } /* !root */
+
+done:
+ struct rgw_file_handle *rfh = rgw_fh->get_fh();
+ *fh = rfh;
+
+ return 0;
+} /* rgw_lookup */
+
+/*
+ lookup object by handle (NFS style)
+*/
+int rgw_lookup_handle(struct rgw_fs *rgw_fs, struct rgw_fh_hk *fh_hk,
+ struct rgw_file_handle **fh, uint32_t flags)
+{
+ RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
+
+ RGWFileHandle* rgw_fh = fs->lookup_handle(*fh_hk);
+ if (! rgw_fh) {
+ /* not found */
+ return -ENOENT;
+ }
+
+ struct rgw_file_handle *rfh = rgw_fh->get_fh();
+ *fh = rfh;
+
+ return 0;
+}
+
+/*
+ * release file handle
+ */
+int rgw_fh_rele(struct rgw_fs *rgw_fs, struct rgw_file_handle *fh,
+ uint32_t flags)
+{
+ RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
+ RGWFileHandle* rgw_fh = get_rgwfh(fh);
+
+ lsubdout(fs->get_context(), rgw, 17)
+ << __func__ << " " << *rgw_fh
+ << dendl;
+
+ fs->unref(rgw_fh);
+ return 0;
+}
+
+/*
+ get unix attributes for object
+*/
+int rgw_getattr(struct rgw_fs *rgw_fs,
+ struct rgw_file_handle *fh, struct stat *st, uint32_t flags)
+{
+ RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
+ RGWFileHandle* rgw_fh = get_rgwfh(fh);
+
+ return fs->getattr(rgw_fh, st);
+}
+
+/*
+ set unix attributes for object
+*/
+int rgw_setattr(struct rgw_fs *rgw_fs,
+ struct rgw_file_handle *fh, struct stat *st,
+ uint32_t mask, uint32_t flags)
+{
+ RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
+ RGWFileHandle* rgw_fh = get_rgwfh(fh);
+
+ return fs->setattr(rgw_fh, st, mask, flags);
+}
+
+/*
+ truncate file
+*/
+int rgw_truncate(struct rgw_fs *rgw_fs,
+ struct rgw_file_handle *fh, uint64_t size, uint32_t flags)
+{
+ return 0;
+}
+
+/*
+ open file
+*/
+int rgw_open(struct rgw_fs *rgw_fs,
+ struct rgw_file_handle *fh, uint32_t posix_flags, uint32_t flags)
+{
+ RGWFileHandle* rgw_fh = get_rgwfh(fh);
+
+ /* XXX
+ * need to track specific opens--at least read opens and
+ * a write open; we need to know when a write open is returned,
+ * that closes a write transaction
+ *
+ * for now, we will support single-open only, it's preferable to
+ * anything we can otherwise do without access to the NFS state
+ */
+ if (! rgw_fh->is_file())
+ return -EISDIR;
+
+ return rgw_fh->open(flags);
+}
+
+/*
+ close file
+*/
+int rgw_close(struct rgw_fs *rgw_fs,
+ struct rgw_file_handle *fh, uint32_t flags)
+{
+ RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
+ RGWFileHandle* rgw_fh = get_rgwfh(fh);
+ int rc = rgw_fh->close(/* XXX */);
+
+ if (flags & RGW_CLOSE_FLAG_RELE)
+ fs->unref(rgw_fh);
+
+ return rc;
+}
+
+int rgw_readdir(struct rgw_fs *rgw_fs,
+ struct rgw_file_handle *parent_fh, uint64_t *offset,
+ rgw_readdir_cb rcb, void *cb_arg, bool *eof,
+ uint32_t flags)
+{
+ RGWFileHandle* parent = get_rgwfh(parent_fh);
+ if (! parent) {
+ /* bad parent */
+ return -EINVAL;
+ }
+
+ lsubdout(parent->get_fs()->get_context(), rgw, 15)
+ << __func__
+ << " offset=" << *offset
+ << dendl;
+
+ if ((*offset == 0) &&
+ (flags & RGW_READDIR_FLAG_DOTDOT)) {
+ /* send '.' and '..' with their NFS-defined offsets */
+ rcb(".", cb_arg, 1, nullptr, 0, RGW_LOOKUP_FLAG_DIR);
+ rcb("..", cb_arg, 2, nullptr, 0, RGW_LOOKUP_FLAG_DIR);
+ }
+
+ int rc = parent->readdir(rcb, cb_arg, offset, eof, flags);
+ return rc;
+} /* rgw_readdir */
+
+/* enumeration continuing from name */
+int rgw_readdir2(struct rgw_fs *rgw_fs,
+ struct rgw_file_handle *parent_fh, const char *name,
+ rgw_readdir_cb rcb, void *cb_arg, bool *eof,
+ uint32_t flags)
+{
+ RGWFileHandle* parent = get_rgwfh(parent_fh);
+ if (! parent) {
+ /* bad parent */
+ return -EINVAL;
+ }
+
+ lsubdout(parent->get_fs()->get_context(), rgw, 15)
+ << __func__
+ << " offset=" << ((name) ? name : "(nil)")
+ << dendl;
+
+ if ((! name) &&
+ (flags & RGW_READDIR_FLAG_DOTDOT)) {
+ /* send '.' and '..' with their NFS-defined offsets */
+ rcb(".", cb_arg, 1, nullptr, 0, RGW_LOOKUP_FLAG_DIR);
+ rcb("..", cb_arg, 2, nullptr, 0, RGW_LOOKUP_FLAG_DIR);
+ }
+
+ int rc = parent->readdir(rcb, cb_arg, name, eof, flags);
+ return rc;
+} /* rgw_readdir2 */
+
+/* project offset of dirent name */
+int rgw_dirent_offset(struct rgw_fs *rgw_fs,
+ struct rgw_file_handle *parent_fh,
+ const char *name, int64_t *offset,
+ uint32_t flags)
+{
+ RGWFileHandle* parent = get_rgwfh(parent_fh);
+ if ((! parent)) {
+ /* bad parent */
+ return -EINVAL;
+ }
+ std::string sname{name};
+ int rc = parent->offset_of(sname, offset, flags);
+ return rc;
+}
+
+/*
+ read data from file
+*/
+int rgw_read(struct rgw_fs *rgw_fs,
+ struct rgw_file_handle *fh, uint64_t offset,
+ size_t length, size_t *bytes_read, void *buffer,
+ uint32_t flags)
+{
+ RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
+ RGWFileHandle* rgw_fh = get_rgwfh(fh);
+
+ return fs->read(rgw_fh, offset, length, bytes_read, buffer, flags);
+}
+
+/*
+ read symbolic link
+*/
+int rgw_readlink(struct rgw_fs *rgw_fs,
+ struct rgw_file_handle *fh, uint64_t offset,
+ size_t length, size_t *bytes_read, void *buffer,
+ uint32_t flags)
+{
+ RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
+ RGWFileHandle* rgw_fh = get_rgwfh(fh);
+
+ return fs->readlink(rgw_fh, offset, length, bytes_read, buffer, flags);
+}
+
+/*
+ write data to file
+*/
+int rgw_write(struct rgw_fs *rgw_fs,
+ struct rgw_file_handle *fh, uint64_t offset,
+ size_t length, size_t *bytes_written, void *buffer,
+ uint32_t flags)
+{
+ RGWFileHandle* rgw_fh = get_rgwfh(fh);
+ int rc;
+
+ *bytes_written = 0;
+
+ if (! rgw_fh->is_file())
+ return -EISDIR;
+
+ if (! rgw_fh->is_open()) {
+ if (flags & RGW_OPEN_FLAG_V3) {
+ rc = rgw_fh->open(flags);
+ if (!! rc)
+ return rc;
+ } else
+ return -EPERM;
+ }
+
+ rc = rgw_fh->write(offset, length, bytes_written, buffer);
+
+ return rc;
+}
+
+/*
+ read data from file (vector)
+*/
+class RGWReadV
+{
+ buffer::list bl;
+ struct rgw_vio* vio;
+
+public:
+ RGWReadV(buffer::list& _bl, rgw_vio* _vio) : vio(_vio) {
+ bl.claim(_bl);
+ }
+
+ struct rgw_vio* get_vio() { return vio; }
+
+ const auto& buffers() { return bl.buffers(); }
+
+ unsigned /* XXX */ length() { return bl.length(); }
+
+};
+
+void rgw_readv_rele(struct rgw_uio *uio, uint32_t flags)
+{
+ RGWReadV* rdv = static_cast<RGWReadV*>(uio->uio_p1);
+ rdv->~RGWReadV();
+ ::operator delete(rdv);
+}
+
+int rgw_readv(struct rgw_fs *rgw_fs,
+ struct rgw_file_handle *fh, rgw_uio *uio, uint32_t flags)
+{
+#if 0 /* XXX */
+ CephContext* cct = static_cast<CephContext*>(rgw_fs->rgw);
+ RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
+ RGWFileHandle* rgw_fh = get_rgwfh(fh);
+
+ if (! rgw_fh->is_file())
+ return -EINVAL;
+
+ int rc = 0;
+
+ buffer::list bl;
+ RGWGetObjRequest req(cct, fs->get_user(), rgw_fh->bucket_name(),
+ rgw_fh->object_name(), uio->uio_offset, uio->uio_resid,
+ bl);
+ req.do_hexdump = false;
+
+ rc = rgwlib.get_fe()->execute_req(&req);
+
+ if (! rc) {
+ RGWReadV* rdv = static_cast<RGWReadV*>(
+ ::operator new(sizeof(RGWReadV) +
+ (bl.buffers().size() * sizeof(struct rgw_vio))));
+
+ (void) new (rdv)
+ RGWReadV(bl, reinterpret_cast<rgw_vio*>(rdv+sizeof(RGWReadV)));
+
+ uio->uio_p1 = rdv;
+ uio->uio_cnt = rdv->buffers().size();
+ uio->uio_resid = rdv->length();
+ uio->uio_vio = rdv->get_vio();
+ uio->uio_rele = rgw_readv_rele;
+
+ int ix = 0;
+ auto& buffers = rdv->buffers();
+ for (auto& bp : buffers) {
+ rgw_vio *vio = &(uio->uio_vio[ix]);
+ vio->vio_base = const_cast<char*>(bp.c_str());
+ vio->vio_len = bp.length();
+ vio->vio_u1 = nullptr;
+ vio->vio_p1 = nullptr;
+ ++ix;
+ }
+ }
+
+ return rc;
+#else
+ return 0;
+#endif
+}
+
+/*
+ write data to file (vector)
+*/
+int rgw_writev(struct rgw_fs *rgw_fs, struct rgw_file_handle *fh,
+ rgw_uio *uio, uint32_t flags)
+{
+
+ return -ENOTSUP;
+
+ CephContext* cct = static_cast<CephContext*>(rgw_fs->rgw);
+ RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
+ RGWFileHandle* rgw_fh = get_rgwfh(fh);
+
+ if (! rgw_fh->is_file())
+ return -EINVAL;
+
+ buffer::list bl;
+ for (unsigned int ix = 0; ix < uio->uio_cnt; ++ix) {
+ rgw_vio *vio = &(uio->uio_vio[ix]);
+ bl.push_back(
+ buffer::create_static(vio->vio_len,
+ static_cast<char*>(vio->vio_base)));
+ }
+
+ std::string oname = rgw_fh->relative_object_name();
+ RGWPutObjRequest req(cct, fs->get_user(), rgw_fh->bucket_name(),
+ oname, bl);
+
+ int rc = rgwlib.get_fe()->execute_req(&req);
+
+ /* XXX update size (in request) */
+
+ return rc;
+}
+
+/*
+ sync written data
+*/
+int rgw_fsync(struct rgw_fs *rgw_fs, struct rgw_file_handle *handle,
+ uint32_t flags)
+{
+ return 0;
+}
+
+int rgw_commit(struct rgw_fs *rgw_fs, struct rgw_file_handle *fh,
+ uint64_t offset, uint64_t length, uint32_t flags)
+{
+ RGWFileHandle* rgw_fh = get_rgwfh(fh);
+
+ return rgw_fh->commit(offset, length, RGWFileHandle::FLAG_NONE);
+}
+
+} /* extern "C" */
diff --git a/src/rgw/rgw_file.h b/src/rgw/rgw_file.h
new file mode 100644
index 00000000..13680eee
--- /dev/null
+++ b/src/rgw/rgw_file.h
@@ -0,0 +1,2806 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RGW_FILE_H
+#define RGW_FILE_H
+
+#include "include/rados/rgw_file.h"
+
+/* internal header */
+#include <string.h>
+#include <sys/stat.h>
+#include <stdint.h>
+
+#include <atomic>
+#include <chrono>
+#include <thread>
+#include <mutex>
+#include <vector>
+#include <deque>
+#include <algorithm>
+#include <functional>
+#include <boost/intrusive_ptr.hpp>
+#include <boost/range/adaptor/reversed.hpp>
+#include <boost/container/flat_map.hpp>
+#include <boost/variant.hpp>
+#include <boost/utility/string_ref.hpp>
+#include <boost/optional.hpp>
+#include "xxhash.h"
+#include "include/buffer.h"
+#include "common/cohort_lru.h"
+#include "common/ceph_timer.h"
+#include "rgw_common.h"
+#include "rgw_user.h"
+#include "rgw_lib.h"
+#include "rgw_ldap.h"
+#include "rgw_token.h"
+#include "rgw_putobj_processor.h"
+#include "rgw_aio_throttle.h"
+#include "rgw_compression.h"
+
+
+/* XXX
+ * ASSERT_H somehow not defined after all the above (which bring
+ * in common/debug.h [e.g., dout])
+ */
+#include "include/ceph_assert.h"
+
+
+#define RGW_RWXMODE (S_IRWXU | S_IRWXG | S_IRWXO)
+
+#define RGW_RWMODE (RGW_RWXMODE & \
+ ~(S_IXUSR | S_IXGRP | S_IXOTH))
+
+
+namespace rgw {
+
+ template <typename T>
+ static inline void ignore(T &&) {}
+
+
+ namespace bi = boost::intrusive;
+
+ class RGWLibFS;
+ class RGWFileHandle;
+ class RGWWriteRequest;
+
+ static inline bool operator <(const struct timespec& lhs,
+ const struct timespec& rhs) {
+ if (lhs.tv_sec == rhs.tv_sec)
+ return lhs.tv_nsec < rhs.tv_nsec;
+ else
+ return lhs.tv_sec < rhs.tv_sec;
+ }
+
+ static inline bool operator ==(const struct timespec& lhs,
+ const struct timespec& rhs) {
+ return ((lhs.tv_sec == rhs.tv_sec) &&
+ (lhs.tv_nsec == rhs.tv_nsec));
+ }
+
+ /*
+ * XXX
+ * The current 64-bit, non-cryptographic hash used here is intended
+ * for prototyping only.
+ *
+ * However, the invariant being prototyped is that objects be
+ * identifiable by their hash components alone. We believe this can
+ * be legitimately implemented using 128-hash values for bucket and
+ * object components, together with a cluster-resident cryptographic
+ * key. Since an MD5 or SHA-1 key is 128 bits and the (fast),
+ * non-cryptographic CityHash128 hash algorithm takes a 128-bit seed,
+ * speculatively we could use that for the final hash computations.
+ */
+ struct fh_key
+ {
+ rgw_fh_hk fh_hk {};
+ uint32_t version;
+
+ static constexpr uint64_t seed = 8675309;
+
+ fh_key() : version(0) {}
+
+ fh_key(const rgw_fh_hk& _hk)
+ : fh_hk(_hk), version(0) {
+ // nothing
+ }
+
+ fh_key(const uint64_t bk, const uint64_t ok)
+ : version(0) {
+ fh_hk.bucket = bk;
+ fh_hk.object = ok;
+ }
+
+ fh_key(const uint64_t bk, const char *_o, const std::string& _t)
+ : version(0) {
+ fh_hk.bucket = bk;
+ std::string to = _t + ":" + _o;
+ fh_hk.object = XXH64(to.c_str(), to.length(), seed);
+ }
+
+ fh_key(const std::string& _b, const std::string& _o,
+ const std::string& _t /* tenant */)
+ : version(0) {
+ std::string tb = _t + ":" + _b;
+ std::string to = _t + ":" + _o;
+ fh_hk.bucket = XXH64(tb.c_str(), tb.length(), seed);
+ fh_hk.object = XXH64(to.c_str(), to.length(), seed);
+ }
+
+ void encode(buffer::list& bl) const {
+ ENCODE_START(2, 1, bl);
+ encode(fh_hk.bucket, bl);
+ encode(fh_hk.object, bl);
+ encode((uint32_t)2, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(2, bl);
+ decode(fh_hk.bucket, bl);
+ decode(fh_hk.object, bl);
+ if (struct_v >= 2) {
+ decode(version, bl);
+ }
+ DECODE_FINISH(bl);
+ }
+
+ friend std::ostream& operator<<(std::ostream &os, fh_key const &fhk);
+
+ }; /* fh_key */
+
+ WRITE_CLASS_ENCODER(fh_key);
+
+ inline bool operator<(const fh_key& lhs, const fh_key& rhs)
+ {
+ return ((lhs.fh_hk.bucket < rhs.fh_hk.bucket) ||
+ ((lhs.fh_hk.bucket == rhs.fh_hk.bucket) &&
+ (lhs.fh_hk.object < rhs.fh_hk.object)));
+ }
+
+ inline bool operator>(const fh_key& lhs, const fh_key& rhs)
+ {
+ return (rhs < lhs);
+ }
+
+ inline bool operator==(const fh_key& lhs, const fh_key& rhs)
+ {
+ return ((lhs.fh_hk.bucket == rhs.fh_hk.bucket) &&
+ (lhs.fh_hk.object == rhs.fh_hk.object));
+ }
+
+ inline bool operator!=(const fh_key& lhs, const fh_key& rhs)
+ {
+ return !(lhs == rhs);
+ }
+
+ inline bool operator<=(const fh_key& lhs, const fh_key& rhs)
+ {
+ return (lhs < rhs) || (lhs == rhs);
+ }
+
+ using boost::variant;
+ using boost::container::flat_map;
+
+ typedef std::tuple<bool, bool> DecodeAttrsResult;
+
+ class RGWFileHandle : public cohort::lru::Object
+ {
+ struct rgw_file_handle fh;
+ std::mutex mtx;
+
+ RGWLibFS* fs;
+ RGWFileHandle* bucket;
+ RGWFileHandle* parent;
+ /* const */ std::string name; /* XXX file or bucket name */
+ /* const */ fh_key fhk;
+
+ using lock_guard = std::lock_guard<std::mutex>;
+ using unique_lock = std::unique_lock<std::mutex>;
+
+ /* TODO: keeping just the last marker is sufficient for
+ * nfs-ganesha 2.4.5; in the near future, nfs-ganesha will
+ * be able to hint the name of the next dirent required,
+ * from which we can directly synthesize a RADOS marker.
+ * using marker_cache_t = flat_map<uint64_t, rgw_obj_key>;
+ */
+
+ struct State {
+ uint64_t dev;
+ uint64_t size;
+ uint64_t nlink;
+ uint32_t owner_uid; /* XXX need Unix attr */
+ uint32_t owner_gid; /* XXX need Unix attr */
+ mode_t unix_mode;
+ struct timespec ctime;
+ struct timespec mtime;
+ struct timespec atime;
+ uint32_t version;
+ State() : dev(0), size(0), nlink(1), owner_uid(0), owner_gid(0),
+ ctime{0,0}, mtime{0,0}, atime{0,0}, version(0) {}
+ } state;
+
+ struct file {
+ RGWWriteRequest* write_req;
+ file() : write_req(nullptr) {}
+ ~file();
+ };
+
+ struct directory {
+
+ static constexpr uint32_t FLAG_NONE = 0x0000;
+
+ uint32_t flags;
+ rgw_obj_key last_marker;
+ struct timespec last_readdir;
+
+ directory() : flags(FLAG_NONE), last_readdir{0,0} {}
+ };
+
+ void clear_state();
+ void advance_mtime(uint32_t flags = FLAG_NONE);
+
+ boost::variant<file, directory> variant_type;
+
+ uint16_t depth;
+ uint32_t flags;
+
+ ceph::buffer::list etag;
+ ceph::buffer::list acls;
+
+ public:
+ const static std::string root_name;
+
+ static constexpr uint16_t MAX_DEPTH = 256;
+
+ static constexpr uint32_t FLAG_NONE = 0x0000;
+ static constexpr uint32_t FLAG_OPEN = 0x0001;
+ static constexpr uint32_t FLAG_ROOT = 0x0002;
+ static constexpr uint32_t FLAG_CREATE = 0x0004;
+ static constexpr uint32_t FLAG_CREATING = 0x0008;
+ static constexpr uint32_t FLAG_SYMBOLIC_LINK = 0x0009;
+ static constexpr uint32_t FLAG_DIRECTORY = 0x0010;
+ static constexpr uint32_t FLAG_BUCKET = 0x0020;
+ static constexpr uint32_t FLAG_LOCK = 0x0040;
+ static constexpr uint32_t FLAG_DELETED = 0x0080;
+ static constexpr uint32_t FLAG_UNLINK_THIS = 0x0100;
+ static constexpr uint32_t FLAG_LOCKED = 0x0200;
+ static constexpr uint32_t FLAG_STATELESS_OPEN = 0x0400;
+ static constexpr uint32_t FLAG_EXACT_MATCH = 0x0800;
+ static constexpr uint32_t FLAG_MOUNT = 0x1000;
+
+#define CREATE_FLAGS(x) \
+ ((x) & ~(RGWFileHandle::FLAG_CREATE|RGWFileHandle::FLAG_LOCK))
+
+ static constexpr uint32_t RCB_MASK = \
+ RGW_SETATTR_MTIME|RGW_SETATTR_CTIME|RGW_SETATTR_ATIME|RGW_SETATTR_SIZE;
+
+ friend class RGWLibFS;
+
+ private:
+ explicit RGWFileHandle(RGWLibFS* _fs)
+ : fs(_fs), bucket(nullptr), parent(nullptr), variant_type{directory()},
+ depth(0), flags(FLAG_NONE)
+ {
+ fh.fh_hk.bucket = 0;
+ fh.fh_hk.object = 0;
+ /* root */
+ fh.fh_type = RGW_FS_TYPE_DIRECTORY;
+ variant_type = directory();
+ /* stat */
+ state.unix_mode = RGW_RWXMODE|S_IFDIR;
+ /* pointer to self */
+ fh.fh_private = this;
+ }
+
+ uint64_t init_fsid(std::string& uid) {
+ return XXH64(uid.c_str(), uid.length(), fh_key::seed);
+ }
+
+ void init_rootfs(std::string& fsid, const std::string& object_name,
+ bool is_bucket) {
+ /* fh_key */
+ fh.fh_hk.bucket = XXH64(fsid.c_str(), fsid.length(), fh_key::seed);
+ fh.fh_hk.object = XXH64(object_name.c_str(), object_name.length(),
+ fh_key::seed);
+ fhk = fh.fh_hk;
+ name = object_name;
+
+ state.dev = init_fsid(fsid);
+
+ if (is_bucket) {
+ flags |= RGWFileHandle::FLAG_BUCKET | RGWFileHandle::FLAG_MOUNT;
+ bucket = this;
+ depth = 1;
+ } else {
+ flags |= RGWFileHandle::FLAG_ROOT | RGWFileHandle::FLAG_MOUNT;
+ }
+ }
+
+ public:
+ RGWFileHandle(RGWLibFS* _fs, RGWFileHandle* _parent,
+ const fh_key& _fhk, std::string& _name, uint32_t _flags)
+ : fs(_fs), bucket(nullptr), parent(_parent), name(std::move(_name)),
+ fhk(_fhk), flags(_flags) {
+
+ if (parent->is_root()) {
+ fh.fh_type = RGW_FS_TYPE_DIRECTORY;
+ variant_type = directory();
+ flags |= FLAG_BUCKET;
+ } else {
+ bucket = parent->is_bucket() ? parent
+ : parent->bucket;
+ if (flags & FLAG_DIRECTORY) {
+ fh.fh_type = RGW_FS_TYPE_DIRECTORY;
+ variant_type = directory();
+ } else if(flags & FLAG_SYMBOLIC_LINK) {
+ fh.fh_type = RGW_FS_TYPE_SYMBOLIC_LINK;
+ variant_type = file();
+ } else {
+ fh.fh_type = RGW_FS_TYPE_FILE;
+ variant_type = file();
+ }
+ }
+
+ depth = parent->depth + 1;
+
+ /* save constant fhk */
+ fh.fh_hk = fhk.fh_hk; /* XXX redundant in fh_hk */
+
+ /* inherits parent's fsid */
+ state.dev = parent->state.dev;
+
+ switch (fh.fh_type) {
+ case RGW_FS_TYPE_DIRECTORY:
+ state.unix_mode = RGW_RWXMODE|S_IFDIR;
+ /* virtual directories are always invalid */
+ advance_mtime();
+ break;
+ case RGW_FS_TYPE_FILE:
+ state.unix_mode = RGW_RWMODE|S_IFREG;
+ break;
+ case RGW_FS_TYPE_SYMBOLIC_LINK:
+ state.unix_mode = RGW_RWMODE|S_IFLNK;
+ break;
+ default:
+ break;
+ }
+
+ /* pointer to self */
+ fh.fh_private = this;
+ }
+
+ const fh_key& get_key() const {
+ return fhk;
+ }
+
+ directory* get_directory() {
+ return get<directory>(&variant_type);
+ }
+
+ size_t get_size() const { return state.size; }
+
+ const char* stype() {
+ return is_dir() ? "DIR" : "FILE";
+ }
+
+ uint16_t get_depth() const { return depth; }
+
+ struct rgw_file_handle* get_fh() { return &fh; }
+
+ RGWLibFS* get_fs() { return fs; }
+
+ RGWFileHandle* get_parent() { return parent; }
+
+ uint32_t get_owner_uid() const { return state.owner_uid; }
+ uint32_t get_owner_gid() const { return state.owner_gid; }
+
+ struct timespec get_ctime() const { return state.ctime; }
+ struct timespec get_mtime() const { return state.mtime; }
+
+ const ceph::buffer::list& get_etag() const { return etag; }
+ const ceph::buffer::list& get_acls() const { return acls; }
+
+ void create_stat(struct stat* st, uint32_t mask) {
+ if (mask & RGW_SETATTR_UID)
+ state.owner_uid = st->st_uid;
+
+ if (mask & RGW_SETATTR_GID)
+ state.owner_gid = st->st_gid;
+
+ if (mask & RGW_SETATTR_MODE) {
+ switch (fh.fh_type) {
+ case RGW_FS_TYPE_DIRECTORY:
+ state.unix_mode = st->st_mode|S_IFDIR;
+ break;
+ case RGW_FS_TYPE_FILE:
+ state.unix_mode = st->st_mode|S_IFREG;
+ break;
+ case RGW_FS_TYPE_SYMBOLIC_LINK:
+ state.unix_mode = st->st_mode|S_IFLNK;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (mask & RGW_SETATTR_ATIME)
+ state.atime = st->st_atim;
+
+ if (mask & RGW_SETATTR_MTIME) {
+ if (fh.fh_type != RGW_FS_TYPE_DIRECTORY)
+ state.mtime = st->st_mtim;
+ }
+
+ if (mask & RGW_SETATTR_CTIME)
+ state.ctime = st->st_ctim;
+ }
+
+ int stat(struct stat* st, uint32_t flags = FLAG_NONE) {
+ /* partial Unix attrs */
+ /* FIPS zeroization audit 20191115: this memset is not security
+ * related. */
+ memset(st, 0, sizeof(struct stat));
+ st->st_dev = state.dev;
+ st->st_ino = fh.fh_hk.object; // XXX
+
+ st->st_uid = state.owner_uid;
+ st->st_gid = state.owner_gid;
+
+ st->st_mode = state.unix_mode;
+
+ switch (fh.fh_type) {
+ case RGW_FS_TYPE_DIRECTORY:
+ /* virtual directories are always invalid */
+ advance_mtime(flags);
+ st->st_nlink = state.nlink;
+ break;
+ case RGW_FS_TYPE_FILE:
+ st->st_nlink = 1;
+ st->st_blksize = 4096;
+ st->st_size = state.size;
+ st->st_blocks = (state.size) / 512;
+ break;
+ case RGW_FS_TYPE_SYMBOLIC_LINK:
+ st->st_nlink = 1;
+ st->st_blksize = 4096;
+ st->st_size = state.size;
+ st->st_blocks = (state.size) / 512;
+ break;
+ default:
+ break;
+ }
+
+#ifdef HAVE_STAT_ST_MTIMESPEC_TV_NSEC
+ st->st_atimespec = state.atime;
+ st->st_mtimespec = state.mtime;
+ st->st_ctimespec = state.ctime;
+#else
+ st->st_atim = state.atime;
+ st->st_mtim = state.mtime;
+ st->st_ctim = state.ctime;
+#endif
+
+ return 0;
+ }
+
+ const std::string& bucket_name() const {
+ if (is_root())
+ return root_name;
+ if (is_bucket())
+ return name;
+ return bucket->object_name();
+ }
+
+ const std::string& object_name() const { return name; }
+
+ std::string full_object_name(bool omit_bucket = false) const {
+ std::string path;
+ std::vector<const std::string*> segments;
+ int reserve = 0;
+ const RGWFileHandle* tfh = this;
+ while (tfh && !tfh->is_root() && !(tfh->is_bucket() && omit_bucket)) {
+ segments.push_back(&tfh->object_name());
+ reserve += (1 + tfh->object_name().length());
+ tfh = tfh->parent;
+ }
+ int pos = 1;
+ path.reserve(reserve);
+ for (auto& s : boost::adaptors::reverse(segments)) {
+ if (pos > 1) {
+ path += "/";
+ } else {
+ if (!omit_bucket &&
+ ((path.length() == 0) || (path.front() != '/')))
+ path += "/";
+ }
+ path += *s;
+ ++pos;
+ }
+ return path;
+ }
+
+ inline std::string relative_object_name() const {
+ return full_object_name(true /* omit_bucket */);
+ }
+
+ inline std::string format_child_name(const std::string& cbasename,
+ bool is_dir) const {
+ std::string child_name{relative_object_name()};
+ if ((child_name.size() > 0) &&
+ (child_name.back() != '/'))
+ child_name += "/";
+ child_name += cbasename;
+ if (is_dir)
+ child_name += "/";
+ return child_name;
+ }
+
+ inline std::string make_key_name(const char *name) const {
+ std::string key_name{full_object_name()};
+ if (key_name.length() > 0)
+ key_name += "/";
+ key_name += name;
+ return key_name;
+ }
+
+ fh_key make_fhk(const std::string& name);
+
+ void add_marker(uint64_t off, const rgw_obj_key& marker,
+ uint8_t obj_type) {
+ using std::get;
+ directory* d = get<directory>(&variant_type);
+ if (d) {
+ unique_lock guard(mtx);
+ d->last_marker = marker;
+ }
+ }
+
+ const rgw_obj_key* find_marker(uint64_t off) const {
+ using std::get;
+ if (off > 0) {
+ const directory* d = get<directory>(&variant_type);
+ if (d ) {
+ return &d->last_marker;
+ }
+ }
+ return nullptr;
+ }
+
+ int offset_of(const std::string& name, int64_t *offset, uint32_t flags) {
+ if (unlikely(! is_dir())) {
+ return -EINVAL;
+ }
+ *offset = XXH64(name.c_str(), name.length(), fh_key::seed);
+ return 0;
+ }
+
+ bool is_open() const { return flags & FLAG_OPEN; }
+ bool is_root() const { return flags & FLAG_ROOT; }
+ bool is_mount() const { return flags & FLAG_MOUNT; }
+ bool is_bucket() const { return flags & FLAG_BUCKET; }
+ bool is_object() const { return !is_bucket(); }
+ bool is_file() const { return (fh.fh_type == RGW_FS_TYPE_FILE); }
+ bool is_dir() const { return (fh.fh_type == RGW_FS_TYPE_DIRECTORY); }
+ bool is_link() const { return (fh.fh_type == RGW_FS_TYPE_SYMBOLIC_LINK); }
+ bool creating() const { return flags & FLAG_CREATING; }
+ bool deleted() const { return flags & FLAG_DELETED; }
+ bool stateless_open() const { return flags & FLAG_STATELESS_OPEN; }
+ bool has_children() const;
+
+ int open(uint32_t gsh_flags) {
+ lock_guard guard(mtx);
+ if (! is_open()) {
+ if (gsh_flags & RGW_OPEN_FLAG_V3) {
+ flags |= FLAG_STATELESS_OPEN;
+ }
+ flags |= FLAG_OPEN;
+ return 0;
+ }
+ return -EPERM;
+ }
+
+ typedef boost::variant<uint64_t*, const char*> readdir_offset;
+
+ int readdir(rgw_readdir_cb rcb, void *cb_arg, readdir_offset offset,
+ bool *eof, uint32_t flags);
+
+ int write(uint64_t off, size_t len, size_t *nbytes, void *buffer);
+
+ int commit(uint64_t offset, uint64_t length, uint32_t flags) {
+ /* NFS3 and NFSv4 COMMIT implementation
+ * the current atomic update strategy doesn't actually permit
+ * clients to read-stable until either CLOSE (NFSv4+) or the
+ * expiration of the active write timer (NFS3). In the
+ * interim, the client may send an arbitrary number of COMMIT
+ * operations which must return a success result */
+ return 0;
+ }
+
+ int write_finish(uint32_t flags = FLAG_NONE);
+ int close();
+
+ void open_for_create() {
+ lock_guard guard(mtx);
+ flags |= FLAG_CREATING;
+ }
+
+ void clear_creating() {
+ lock_guard guard(mtx);
+ flags &= ~FLAG_CREATING;
+ }
+
+ void inc_nlink(const uint64_t n) {
+ state.nlink += n;
+ }
+
+ void set_nlink(const uint64_t n) {
+ state.nlink = n;
+ }
+
+ void set_size(const size_t size) {
+ state.size = size;
+ }
+
+ void set_times(const struct timespec &ts) {
+ state.ctime = ts;
+ state.mtime = state.ctime;
+ state.atime = state.ctime;
+ }
+
+ void set_times(real_time t) {
+ set_times(real_clock::to_timespec(t));
+ }
+
+ void set_ctime(const struct timespec &ts) {
+ state.ctime = ts;
+ }
+
+ void set_mtime(const struct timespec &ts) {
+ state.mtime = ts;
+ }
+
+ void set_atime(const struct timespec &ts) {
+ state.atime = ts;
+ }
+
+ void set_etag(const ceph::buffer::list& _etag ) {
+ etag = _etag;
+ }
+
+ void set_acls(const ceph::buffer::list& _acls ) {
+ acls = _acls;
+ }
+
+ void encode(buffer::list& bl) const {
+ ENCODE_START(2, 1, bl);
+ encode(uint32_t(fh.fh_type), bl);
+ encode(state.dev, bl);
+ encode(state.size, bl);
+ encode(state.nlink, bl);
+ encode(state.owner_uid, bl);
+ encode(state.owner_gid, bl);
+ encode(state.unix_mode, bl);
+ for (const auto& t : { state.ctime, state.mtime, state.atime }) {
+ encode(real_clock::from_timespec(t), bl);
+ }
+ encode((uint32_t)2, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(2, bl);
+ uint32_t fh_type;
+ decode(fh_type, bl);
+ if ((fh.fh_type != fh_type) &&
+ (fh_type == RGW_FS_TYPE_SYMBOLIC_LINK))
+ fh.fh_type = RGW_FS_TYPE_SYMBOLIC_LINK;
+ ceph_assert(fh.fh_type == fh_type);
+ decode(state.dev, bl);
+ decode(state.size, bl);
+ decode(state.nlink, bl);
+ decode(state.owner_uid, bl);
+ decode(state.owner_gid, bl);
+ decode(state.unix_mode, bl);
+ ceph::real_time enc_time;
+ for (auto t : { &(state.ctime), &(state.mtime), &(state.atime) }) {
+ decode(enc_time, bl);
+ *t = real_clock::to_timespec(enc_time);
+ }
+ if (struct_v >= 2) {
+ decode(state.version, bl);
+ }
+ DECODE_FINISH(bl);
+ }
+
+ void encode_attrs(ceph::buffer::list& ux_key1,
+ ceph::buffer::list& ux_attrs1);
+
+ DecodeAttrsResult decode_attrs(const ceph::buffer::list* ux_key1,
+ const ceph::buffer::list* ux_attrs1);
+
+ void invalidate();
+
+ bool reclaim(const cohort::lru::ObjectFactory* newobj_fac) override;
+
+ typedef cohort::lru::LRU<std::mutex> FhLRU;
+
+ struct FhLT
+ {
+ // for internal ordering
+ bool operator()(const RGWFileHandle& lhs, const RGWFileHandle& rhs) const
+ { return (lhs.get_key() < rhs.get_key()); }
+
+ // for external search by fh_key
+ bool operator()(const fh_key& k, const RGWFileHandle& fh) const
+ { return k < fh.get_key(); }
+
+ bool operator()(const RGWFileHandle& fh, const fh_key& k) const
+ { return fh.get_key() < k; }
+ };
+
+ struct FhEQ
+ {
+ bool operator()(const RGWFileHandle& lhs, const RGWFileHandle& rhs) const
+ { return (lhs.get_key() == rhs.get_key()); }
+
+ bool operator()(const fh_key& k, const RGWFileHandle& fh) const
+ { return k == fh.get_key(); }
+
+ bool operator()(const RGWFileHandle& fh, const fh_key& k) const
+ { return fh.get_key() == k; }
+ };
+
+ typedef bi::link_mode<bi::safe_link> link_mode; /* XXX normal */
+#if defined(FHCACHE_AVL)
+ typedef bi::avl_set_member_hook<link_mode> tree_hook_type;
+#else
+ /* RBT */
+ typedef bi::set_member_hook<link_mode> tree_hook_type;
+#endif
+ tree_hook_type fh_hook;
+
+ typedef bi::member_hook<
+ RGWFileHandle, tree_hook_type, &RGWFileHandle::fh_hook> FhHook;
+
+#if defined(FHCACHE_AVL)
+ typedef bi::avltree<RGWFileHandle, bi::compare<FhLT>, FhHook> FHTree;
+#else
+ typedef bi::rbtree<RGWFileHandle, bi::compare<FhLT>, FhHook> FhTree;
+#endif
+ typedef cohort::lru::TreeX<RGWFileHandle, FhTree, FhLT, FhEQ, fh_key,
+ std::mutex> FHCache;
+
+ ~RGWFileHandle() override;
+
+ friend std::ostream& operator<<(std::ostream &os,
+ RGWFileHandle const &rgw_fh);
+
+ class Factory : public cohort::lru::ObjectFactory
+ {
+ public:
+ RGWLibFS* fs;
+ RGWFileHandle* parent;
+ const fh_key& fhk;
+ std::string& name;
+ uint32_t flags;
+
+ Factory() = delete;
+
+ Factory(RGWLibFS* _fs, RGWFileHandle* _parent,
+ const fh_key& _fhk, std::string& _name, uint32_t _flags)
+ : fs(_fs), parent(_parent), fhk(_fhk), name(_name),
+ flags(_flags) {}
+
+ void recycle (cohort::lru::Object* o) override {
+ /* re-use an existing object */
+ o->~Object(); // call lru::Object virtual dtor
+ // placement new!
+ new (o) RGWFileHandle(fs, parent, fhk, name, flags);
+ }
+
+ cohort::lru::Object* alloc() override {
+ return new RGWFileHandle(fs, parent, fhk, name, flags);
+ }
+ }; /* Factory */
+
+ }; /* RGWFileHandle */
+
+ WRITE_CLASS_ENCODER(RGWFileHandle);
+
+ static inline RGWFileHandle* get_rgwfh(struct rgw_file_handle* fh) {
+ return static_cast<RGWFileHandle*>(fh->fh_private);
+ }
+
+ static inline enum rgw_fh_type fh_type_of(uint32_t flags) {
+ enum rgw_fh_type fh_type;
+ switch(flags & RGW_LOOKUP_TYPE_FLAGS)
+ {
+ case RGW_LOOKUP_FLAG_DIR:
+ fh_type = RGW_FS_TYPE_DIRECTORY;
+ break;
+ case RGW_LOOKUP_FLAG_FILE:
+ fh_type = RGW_FS_TYPE_FILE;
+ break;
+ default:
+ fh_type = RGW_FS_TYPE_NIL;
+ };
+ return fh_type;
+ }
+
+ typedef std::tuple<RGWFileHandle*, uint32_t> LookupFHResult;
+ typedef std::tuple<RGWFileHandle*, int> MkObjResult;
+
+ class RGWLibFS
+ {
+ CephContext* cct;
+ struct rgw_fs fs{};
+ RGWFileHandle root_fh;
+ rgw_fh_callback_t invalidate_cb;
+ void *invalidate_arg;
+ bool shutdown;
+
+ mutable std::atomic<uint64_t> refcnt;
+
+ RGWFileHandle::FHCache fh_cache;
+ RGWFileHandle::FhLRU fh_lru;
+
+ std::string uid; // should match user.user_id, iiuc
+
+ RGWUserInfo user;
+ RGWAccessKey key; // XXXX acc_key
+
+ static std::atomic<uint32_t> fs_inst_counter;
+
+ static uint32_t write_completion_interval_s;
+
+ using lock_guard = std::lock_guard<std::mutex>;
+ using unique_lock = std::unique_lock<std::mutex>;
+
+ struct event
+ {
+ enum class type : uint8_t { READDIR } ;
+ type t;
+ const fh_key fhk;
+ struct timespec ts;
+ event(type t, const fh_key& k, const struct timespec& ts)
+ : t(t), fhk(k), ts(ts) {}
+ };
+
+ friend std::ostream& operator<<(std::ostream &os,
+ RGWLibFS::event const &ev);
+
+ using event_vector = /* boost::small_vector<event, 16> */
+ std::vector<event>;
+
+ struct WriteCompletion
+ {
+ RGWFileHandle& rgw_fh;
+
+ explicit WriteCompletion(RGWFileHandle& _fh) : rgw_fh(_fh) {
+ rgw_fh.get_fs()->ref(&rgw_fh);
+ }
+
+ void operator()() {
+ rgw_fh.close(); /* will finish in-progress write */
+ rgw_fh.get_fs()->unref(&rgw_fh);
+ }
+ };
+
+ static ceph::timer<ceph::mono_clock> write_timer;
+
+ struct State {
+ std::mutex mtx;
+ std::atomic<uint32_t> flags;
+ std::deque<event> events;
+
+ State() : flags(0) {}
+
+ void push_event(const event& ev) {
+ events.push_back(ev);
+ }
+ } state;
+
+ uint32_t new_inst() {
+ return ++fs_inst_counter;
+ }
+
+ friend class RGWFileHandle;
+ friend class RGWLibProcess;
+
+ public:
+
+ static constexpr uint32_t FLAG_NONE = 0x0000;
+ static constexpr uint32_t FLAG_CLOSED = 0x0001;
+
+ struct BucketStats {
+ size_t size;
+ size_t size_rounded;
+ real_time creation_time;
+ uint64_t num_entries;
+ };
+
+ RGWLibFS(CephContext* _cct, const char *_uid, const char *_user_id,
+ const char* _key, const char *root)
+ : cct(_cct), root_fh(this), invalidate_cb(nullptr),
+ invalidate_arg(nullptr), shutdown(false), refcnt(1),
+ fh_cache(cct->_conf->rgw_nfs_fhcache_partitions,
+ cct->_conf->rgw_nfs_fhcache_size),
+ fh_lru(cct->_conf->rgw_nfs_lru_lanes,
+ cct->_conf->rgw_nfs_lru_lane_hiwat),
+ uid(_uid), key(_user_id, _key) {
+
+ if (!root || !strcmp(root, "/")) {
+ root_fh.init_rootfs(uid, RGWFileHandle::root_name, false);
+ } else {
+ root_fh.init_rootfs(uid, root, true);
+ }
+
+ /* pointer to self */
+ fs.fs_private = this;
+
+ /* expose public root fh */
+ fs.root_fh = root_fh.get_fh();
+
+ new_inst();
+ }
+
+ friend void intrusive_ptr_add_ref(const RGWLibFS* fs) {
+ fs->refcnt.fetch_add(1, std::memory_order_relaxed);
+ }
+
+ friend void intrusive_ptr_release(const RGWLibFS* fs) {
+ if (fs->refcnt.fetch_sub(1, std::memory_order_release) == 0) {
+ std::atomic_thread_fence(std::memory_order_acquire);
+ delete fs;
+ }
+ }
+
+ RGWLibFS* ref() {
+ intrusive_ptr_add_ref(this);
+ return this;
+ }
+
+ inline void rele() {
+ intrusive_ptr_release(this);
+ }
+
+ void stop() { shutdown = true; }
+
+ void release_evict(RGWFileHandle* fh) {
+ /* remove from cache, releases sentinel ref */
+ fh_cache.remove(fh->fh.fh_hk.object, fh,
+ RGWFileHandle::FHCache::FLAG_LOCK);
+ /* release call-path ref */
+ (void) fh_lru.unref(fh, cohort::lru::FLAG_NONE);
+ }
+
+ int authorize(RGWRados* store) {
+ int ret = rgw_get_user_info_by_access_key(store, key.id, user);
+ if (ret == 0) {
+ RGWAccessKey* k = user.get_key(key.id);
+ if (!k || (k->key != key.key))
+ return -EINVAL;
+ if (user.suspended)
+ return -ERR_USER_SUSPENDED;
+ } else {
+ /* try external authenticators (ldap for now) */
+ rgw::LDAPHelper* ldh = rgwlib.get_ldh(); /* !nullptr */
+ RGWToken token;
+ /* boost filters and/or string_ref may throw on invalid input */
+ try {
+ token = rgw::from_base64(key.id);
+ } catch(...) {
+ token = std::string("");
+ }
+ if (token.valid() && (ldh->auth(token.id, token.key) == 0)) {
+ /* try to store user if it doesn't already exist */
+ if (rgw_get_user_info_by_uid(store, token.id, user) < 0) {
+ int ret = rgw_store_user_info(store, user, NULL, NULL, real_time(),
+ true);
+ if (ret < 0) {
+ lsubdout(get_context(), rgw, 10)
+ << "NOTICE: failed to store new user's info: ret=" << ret
+ << dendl;
+ }
+ }
+ } /* auth success */
+ }
+ return ret;
+ } /* authorize */
+
+ int register_invalidate(rgw_fh_callback_t cb, void *arg, uint32_t flags) {
+ invalidate_cb = cb;
+ invalidate_arg = arg;
+ return 0;
+ }
+
+ /* find RGWFileHandle by id */
+ LookupFHResult lookup_fh(const fh_key& fhk,
+ const uint32_t flags = RGWFileHandle::FLAG_NONE) {
+ using std::get;
+
+ // cast int32_t(RGWFileHandle::FLAG_NONE) due to strictness of Clang
+ // the cast transfers a lvalue into a rvalue in the ctor
+ // check the commit message for the full details
+ LookupFHResult fhr { nullptr, uint32_t(RGWFileHandle::FLAG_NONE) };
+
+ RGWFileHandle::FHCache::Latch lat;
+ bool fh_locked = flags & RGWFileHandle::FLAG_LOCKED;
+
+ retry:
+ RGWFileHandle* fh =
+ fh_cache.find_latch(fhk.fh_hk.object /* partition selector*/,
+ fhk /* key */, lat /* serializer */,
+ RGWFileHandle::FHCache::FLAG_LOCK);
+ /* LATCHED */
+ if (fh) {
+ if (likely(! fh_locked))
+ fh->mtx.lock(); // XXX !RAII because may-return-LOCKED
+ /* need initial ref from LRU (fast path) */
+ if (! fh_lru.ref(fh, cohort::lru::FLAG_INITIAL)) {
+ lat.lock->unlock();
+ if (likely(! fh_locked))
+ fh->mtx.unlock();
+ goto retry; /* !LATCHED */
+ }
+ /* LATCHED, LOCKED */
+ if (! (flags & RGWFileHandle::FLAG_LOCK))
+ fh->mtx.unlock(); /* ! LOCKED */
+ }
+ lat.lock->unlock(); /* !LATCHED */
+ get<0>(fhr) = fh;
+ if (fh) {
+ lsubdout(get_context(), rgw, 17)
+ << __func__ << " 1 " << *fh
+ << dendl;
+ }
+ return fhr;
+ } /* lookup_fh(const fh_key&) */
+
+ /* find or create an RGWFileHandle */
+ LookupFHResult lookup_fh(RGWFileHandle* parent, const char *name,
+ const uint32_t flags = RGWFileHandle::FLAG_NONE) {
+ using std::get;
+
+ // cast int32_t(RGWFileHandle::FLAG_NONE) due to strictness of Clang
+ // the cast transfers a lvalue into a rvalue in the ctor
+ // check the commit message for the full details
+ LookupFHResult fhr { nullptr, uint32_t(RGWFileHandle::FLAG_NONE) };
+
+ /* mount is stale? */
+ if (state.flags & FLAG_CLOSED)
+ return fhr;
+
+ RGWFileHandle::FHCache::Latch lat;
+ bool fh_locked = flags & RGWFileHandle::FLAG_LOCKED;
+
+ std::string obj_name{name};
+ std::string key_name{parent->make_key_name(name)};
+ fh_key fhk = parent->make_fhk(obj_name);
+
+ lsubdout(get_context(), rgw, 10)
+ << __func__ << " called on "
+ << parent->object_name() << " for " << key_name
+ << " (" << obj_name << ")"
+ << " -> " << fhk
+ << dendl;
+
+ retry:
+ RGWFileHandle* fh =
+ fh_cache.find_latch(fhk.fh_hk.object /* partition selector*/,
+ fhk /* key */, lat /* serializer */,
+ RGWFileHandle::FHCache::FLAG_LOCK);
+ /* LATCHED */
+ if (fh) {
+ if (likely(! fh_locked))
+ fh->mtx.lock(); // XXX !RAII because may-return-LOCKED
+ if (fh->flags & RGWFileHandle::FLAG_DELETED) {
+ /* for now, delay briefly and retry */
+ lat.lock->unlock();
+ if (likely(! fh_locked))
+ fh->mtx.unlock();
+ std::this_thread::sleep_for(std::chrono::milliseconds(20));
+ goto retry; /* !LATCHED */
+ }
+ /* need initial ref from LRU (fast path) */
+ if (! fh_lru.ref(fh, cohort::lru::FLAG_INITIAL)) {
+ lat.lock->unlock();
+ if (likely(! fh_locked))
+ fh->mtx.unlock();
+ goto retry; /* !LATCHED */
+ }
+ /* LATCHED, LOCKED */
+ if (! (flags & RGWFileHandle::FLAG_LOCK))
+ if (likely(! fh_locked))
+ fh->mtx.unlock(); /* ! LOCKED */
+ } else {
+ /* make or re-use handle */
+ RGWFileHandle::Factory prototype(this, parent, fhk,
+ obj_name, CREATE_FLAGS(flags));
+ uint32_t iflags{cohort::lru::FLAG_INITIAL};
+ fh = static_cast<RGWFileHandle*>(
+ fh_lru.insert(&prototype,
+ cohort::lru::Edge::MRU,
+ iflags));
+ if (fh) {
+ /* lock fh (LATCHED) */
+ if (flags & RGWFileHandle::FLAG_LOCK)
+ fh->mtx.lock();
+ if (likely(! (iflags & cohort::lru::FLAG_RECYCLE))) {
+ /* inserts at cached insert iterator, releasing latch */
+ fh_cache.insert_latched(
+ fh, lat, RGWFileHandle::FHCache::FLAG_UNLOCK);
+ } else {
+ /* recycle step invalidates Latch */
+ fh_cache.insert(
+ fhk.fh_hk.object, fh, RGWFileHandle::FHCache::FLAG_NONE);
+ lat.lock->unlock(); /* !LATCHED */
+ }
+ get<1>(fhr) |= RGWFileHandle::FLAG_CREATE;
+ /* ref parent (non-initial ref cannot fail on valid object) */
+ if (! parent->is_mount()) {
+ (void) fh_lru.ref(parent, cohort::lru::FLAG_NONE);
+ }
+ goto out; /* !LATCHED */
+ } else {
+ lat.lock->unlock();
+ goto retry; /* !LATCHED */
+ }
+ }
+ lat.lock->unlock(); /* !LATCHED */
+ out:
+ get<0>(fhr) = fh;
+ if (fh) {
+ lsubdout(get_context(), rgw, 17)
+ << __func__ << " 2 " << *fh
+ << dendl;
+ }
+ return fhr;
+ } /* lookup_fh(RGWFileHandle*, const char *, const uint32_t) */
+
+ inline void unref(RGWFileHandle* fh) {
+ if (likely(! fh->is_mount())) {
+ (void) fh_lru.unref(fh, cohort::lru::FLAG_NONE);
+ }
+ }
+
+ inline RGWFileHandle* ref(RGWFileHandle* fh) {
+ if (likely(! fh->is_mount())) {
+ fh_lru.ref(fh, cohort::lru::FLAG_NONE);
+ }
+ return fh;
+ }
+
+ int getattr(RGWFileHandle* rgw_fh, struct stat* st);
+
+ int setattr(RGWFileHandle* rgw_fh, struct stat* st, uint32_t mask,
+ uint32_t flags);
+
+ void update_fh(RGWFileHandle *rgw_fh);
+
+ LookupFHResult stat_bucket(RGWFileHandle* parent, const char *path,
+ RGWLibFS::BucketStats& bs,
+ uint32_t flags);
+
+ LookupFHResult fake_leaf(RGWFileHandle* parent, const char *path,
+ enum rgw_fh_type type = RGW_FS_TYPE_NIL,
+ struct stat *st = nullptr, uint32_t mask = 0,
+ uint32_t flags = RGWFileHandle::FLAG_NONE);
+
+ LookupFHResult stat_leaf(RGWFileHandle* parent, const char *path,
+ enum rgw_fh_type type = RGW_FS_TYPE_NIL,
+ uint32_t flags = RGWFileHandle::FLAG_NONE);
+
+ int read(RGWFileHandle* rgw_fh, uint64_t offset, size_t length,
+ size_t* bytes_read, void* buffer, uint32_t flags);
+
+ int readlink(RGWFileHandle* rgw_fh, uint64_t offset, size_t length,
+ size_t* bytes_read, void* buffer, uint32_t flags);
+
+ int rename(RGWFileHandle* old_fh, RGWFileHandle* new_fh,
+ const char *old_name, const char *new_name);
+
+ MkObjResult create(RGWFileHandle* parent, const char *name, struct stat *st,
+ uint32_t mask, uint32_t flags);
+
+ MkObjResult symlink(RGWFileHandle* parent, const char *name,
+ const char *link_path, struct stat *st, uint32_t mask, uint32_t flags);
+
+ MkObjResult mkdir(RGWFileHandle* parent, const char *name, struct stat *st,
+ uint32_t mask, uint32_t flags);
+
+ int unlink(RGWFileHandle* rgw_fh, const char *name,
+ uint32_t flags = FLAG_NONE);
+
+ /* find existing RGWFileHandle */
+ RGWFileHandle* lookup_handle(struct rgw_fh_hk fh_hk) {
+
+ if (state.flags & FLAG_CLOSED)
+ return nullptr;
+
+ RGWFileHandle::FHCache::Latch lat;
+ fh_key fhk(fh_hk);
+
+ retry:
+ RGWFileHandle* fh =
+ fh_cache.find_latch(fhk.fh_hk.object /* partition selector*/,
+ fhk /* key */, lat /* serializer */,
+ RGWFileHandle::FHCache::FLAG_LOCK);
+ /* LATCHED */
+ if (! fh) {
+ if (unlikely(fhk == root_fh.fh.fh_hk)) {
+ /* lookup for root of this fs */
+ fh = &root_fh;
+ goto out;
+ }
+ lsubdout(get_context(), rgw, 0)
+ << __func__ << " handle lookup failed " << fhk
+ << dendl;
+ goto out;
+ }
+ fh->mtx.lock();
+ if (fh->flags & RGWFileHandle::FLAG_DELETED) {
+ /* for now, delay briefly and retry */
+ lat.lock->unlock();
+ fh->mtx.unlock(); /* !LOCKED */
+ std::this_thread::sleep_for(std::chrono::milliseconds(20));
+ goto retry; /* !LATCHED */
+ }
+ if (! fh_lru.ref(fh, cohort::lru::FLAG_INITIAL)) {
+ lat.lock->unlock();
+ fh->mtx.unlock();
+ goto retry; /* !LATCHED */
+ }
+ /* LATCHED */
+ fh->mtx.unlock(); /* !LOCKED */
+ out:
+ lat.lock->unlock(); /* !LATCHED */
+
+ /* special case: lookup root_fh */
+ if (! fh) {
+ if (unlikely(fh_hk == root_fh.fh.fh_hk)) {
+ fh = &root_fh;
+ }
+ }
+
+ return fh;
+ }
+
+ CephContext* get_context() {
+ return cct;
+ }
+
+ struct rgw_fs* get_fs() { return &fs; }
+
+ uint64_t get_fsid() { return root_fh.state.dev; }
+
+ RGWUserInfo* get_user() { return &user; }
+
+ void update_user() {
+ RGWUserInfo _user = user;
+ int ret = rgw_get_user_info_by_access_key(rgwlib.get_store(), key.id, user);
+ if (ret != 0)
+ user = _user;
+ }
+
+ void close();
+ void gc();
+ }; /* RGWLibFS */
+
+static inline std::string make_uri(const std::string& bucket_name,
+ const std::string& object_name) {
+ std::string uri("/");
+ uri.reserve(bucket_name.length() + object_name.length() + 2);
+ uri += bucket_name;
+ uri += "/";
+ uri += object_name;
+ return uri;
+}
+
+/*
+ read directory content (buckets)
+*/
+
+class RGWListBucketsRequest : public RGWLibRequest,
+ public RGWListBuckets /* RGWOp */
+{
+public:
+ RGWFileHandle* rgw_fh;
+ RGWFileHandle::readdir_offset offset;
+ void* cb_arg;
+ rgw_readdir_cb rcb;
+ uint64_t* ioff;
+ size_t ix;
+ uint32_t d_count;
+ bool rcb_eof; // caller forced early stop in readdir cycle
+
+ RGWListBucketsRequest(CephContext* _cct, RGWUserInfo *_user,
+ RGWFileHandle* _rgw_fh, rgw_readdir_cb _rcb,
+ void* _cb_arg, RGWFileHandle::readdir_offset& _offset)
+ : RGWLibRequest(_cct, _user), rgw_fh(_rgw_fh), offset(_offset),
+ cb_arg(_cb_arg), rcb(_rcb), ioff(nullptr), ix(0), d_count(0),
+ rcb_eof(false) {
+
+ using boost::get;
+
+ if (unlikely(!! get<uint64_t*>(&offset))) {
+ ioff = get<uint64_t*>(offset);
+ const auto& mk = rgw_fh->find_marker(*ioff);
+ if (mk) {
+ marker = mk->name;
+ }
+ } else {
+ const char* mk = get<const char*>(offset);
+ if (mk) {
+ marker = mk;
+ }
+ }
+ op = this;
+ }
+
+ bool only_bucket() override { return false; }
+
+ int op_init() override {
+ // assign store, s, and dialect_handler
+ RGWObjectCtx* rados_ctx
+ = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
+ // framework promises to call op_init after parent init
+ ceph_assert(rados_ctx);
+ RGWOp::init(rados_ctx->get_store(), get_state(), this);
+ op = this; // assign self as op: REQUIRED
+ return 0;
+ }
+
+ int header_init() override {
+ struct req_state* s = get_state();
+ s->info.method = "GET";
+ s->op = OP_GET;
+
+ /* XXX derp derp derp */
+ s->relative_uri = "/";
+ s->info.request_uri = "/"; // XXX
+ s->info.effective_uri = "/";
+ s->info.request_params = "";
+ s->info.domain = ""; /* XXX ? */
+
+ // woo
+ s->user = user;
+ s->bucket_tenant = user->user_id.tenant;
+
+ return 0;
+ }
+
+ int get_params() override {
+ limit = -1; /* no limit */
+ return 0;
+ }
+
+ void send_response_begin(bool has_buckets) override {
+ sent_data = true;
+ }
+
+ void send_response_data(RGWUserBuckets& buckets) override {
+ if (!sent_data)
+ return;
+ map<string, RGWBucketEnt>& m = buckets.get_buckets();
+ for (const auto& iter : m) {
+ boost::string_ref marker{iter.first};
+ const RGWBucketEnt& ent = iter.second;
+ if (! this->operator()(ent.bucket.name, marker)) {
+ /* caller cannot accept more */
+ lsubdout(cct, rgw, 5) << "ListBuckets rcb failed"
+ << " dirent=" << ent.bucket.name
+ << " call count=" << ix
+ << dendl;
+ rcb_eof = true;
+ return;
+ }
+ ++ix;
+ }
+ } /* send_response_data */
+
+ void send_response_end() override {
+ // do nothing
+ }
+
+ int operator()(const boost::string_ref& name,
+ const boost::string_ref& marker) {
+ uint64_t off = XXH64(name.data(), name.length(), fh_key::seed);
+ if (!! ioff) {
+ *ioff = off;
+ }
+ /* update traversal cache */
+ rgw_fh->add_marker(off, rgw_obj_key{marker.data(), ""},
+ RGW_FS_TYPE_DIRECTORY);
+ ++d_count;
+ return rcb(name.data(), cb_arg, off, nullptr, 0, RGW_LOOKUP_FLAG_DIR);
+ }
+
+ bool eof() {
+ if (unlikely(cct->_conf->subsys.should_gather(ceph_subsys_rgw, 15))) {
+ bool is_offset =
+ unlikely(! get<const char*>(&offset)) ||
+ !! get<const char*>(offset);
+ lsubdout(cct, rgw, 15) << "READDIR offset: " <<
+ ((is_offset) ? offset : "(nil)")
+ << " is_truncated: " << is_truncated
+ << dendl;
+ }
+ return !is_truncated && !rcb_eof;
+ }
+
+}; /* RGWListBucketsRequest */
+
+/*
+ read directory content (bucket objects)
+*/
+
+class RGWReaddirRequest : public RGWLibRequest,
+ public RGWListBucket /* RGWOp */
+{
+public:
+ RGWFileHandle* rgw_fh;
+ RGWFileHandle::readdir_offset offset;
+ void* cb_arg;
+ rgw_readdir_cb rcb;
+ uint64_t* ioff;
+ size_t ix;
+ uint32_t d_count;
+ bool rcb_eof; // caller forced early stop in readdir cycle
+
+ RGWReaddirRequest(CephContext* _cct, RGWUserInfo *_user,
+ RGWFileHandle* _rgw_fh, rgw_readdir_cb _rcb,
+ void* _cb_arg, RGWFileHandle::readdir_offset& _offset)
+ : RGWLibRequest(_cct, _user), rgw_fh(_rgw_fh), offset(_offset),
+ cb_arg(_cb_arg), rcb(_rcb), ioff(nullptr), ix(0), d_count(0),
+ rcb_eof(false) {
+
+ using boost::get;
+
+ if (unlikely(!! get<uint64_t*>(&offset))) {
+ ioff = get<uint64_t*>(offset);
+ const auto& mk = rgw_fh->find_marker(*ioff);
+ if (mk) {
+ marker = *mk;
+ }
+ } else {
+ const char* mk = get<const char*>(offset);
+ if (mk) {
+ std::string tmark{rgw_fh->relative_object_name()};
+ if (tmark.length() > 0)
+ tmark += "/";
+ tmark += mk;
+ marker = rgw_obj_key{std::move(tmark), "", ""};
+ }
+ }
+
+ default_max = 1000; // XXX was being omitted
+ op = this;
+ }
+
+ bool only_bucket() override { return true; }
+
+ int op_init() override {
+ // assign store, s, and dialect_handler
+ RGWObjectCtx* rados_ctx
+ = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
+ // framework promises to call op_init after parent init
+ ceph_assert(rados_ctx);
+ RGWOp::init(rados_ctx->get_store(), get_state(), this);
+ op = this; // assign self as op: REQUIRED
+ return 0;
+ }
+
+ int header_init() override {
+ struct req_state* s = get_state();
+ s->info.method = "GET";
+ s->op = OP_GET;
+
+ /* XXX derp derp derp */
+ std::string uri = "/" + rgw_fh->bucket_name() + "/";
+ s->relative_uri = uri;
+ s->info.request_uri = uri; // XXX
+ s->info.effective_uri = uri;
+ s->info.request_params = "";
+ s->info.domain = ""; /* XXX ? */
+
+ // woo
+ s->user = user;
+ s->bucket_tenant = user->user_id.tenant;
+
+ prefix = rgw_fh->relative_object_name();
+ if (prefix.length() > 0)
+ prefix += "/";
+ delimiter = '/';
+
+ return 0;
+ }
+
+ int operator()(const boost::string_ref name, const rgw_obj_key& marker,
+ const ceph::real_time& t, const uint64_t fsz, uint8_t type) {
+
+ assert(name.length() > 0); // all cases handled in callers
+
+ /* hash offset of name in parent (short name) for NFS readdir cookie */
+ uint64_t off = XXH64(name.data(), name.length(), fh_key::seed);
+ if (unlikely(!! ioff)) {
+ *ioff = off;
+ }
+
+ /* update traversal cache */
+ rgw_fh->add_marker(off, marker, type);
+ ++d_count;
+
+ /* set c/mtime and size from bucket index entry */
+ struct stat st = {};
+#ifdef HAVE_STAT_ST_MTIMESPEC_TV_NSEC
+ st.st_atimespec = ceph::real_clock::to_timespec(t);
+ st.st_mtimespec = st.st_atimespec;
+ st.st_ctimespec = st.st_atimespec;
+#else
+ st.st_atim = ceph::real_clock::to_timespec(t);
+ st.st_mtim = st.st_atim;
+ st.st_ctim = st.st_atim;
+#endif
+ st.st_size = fsz;
+
+ return rcb(name.data(), cb_arg, off, &st, RGWFileHandle::RCB_MASK,
+ (type == RGW_FS_TYPE_DIRECTORY) ?
+ RGW_LOOKUP_FLAG_DIR :
+ RGW_LOOKUP_FLAG_FILE);
+ }
+
+ int get_params() override {
+ max = default_max;
+ return 0;
+ }
+
+ void send_response() override {
+ struct req_state* s = get_state();
+ auto cnow = real_clock::now();
+
+ /* enumerate objs and common_prefixes in parallel,
+ * avoiding increment on and end iterator, which is
+ * undefined */
+
+ class DirIterator
+ {
+ vector<rgw_bucket_dir_entry>& objs;
+ vector<rgw_bucket_dir_entry>::iterator obj_iter;
+
+ map<string, bool>& common_prefixes;
+ map<string, bool>::iterator cp_iter;
+
+ boost::optional<boost::string_ref> obj_sref;
+ boost::optional<boost::string_ref> cp_sref;
+ bool _skip_cp;
+
+ public:
+
+ DirIterator(vector<rgw_bucket_dir_entry>& objs,
+ map<string, bool>& common_prefixes)
+ : objs(objs), common_prefixes(common_prefixes), _skip_cp(false)
+ {
+ obj_iter = objs.begin();
+ parse_obj();
+ cp_iter = common_prefixes.begin();
+ parse_cp();
+ }
+
+ bool is_obj() {
+ return (obj_iter != objs.end());
+ }
+
+ bool is_cp(){
+ return (cp_iter != common_prefixes.end());
+ }
+
+ bool eof() {
+ return ((!is_obj()) && (!is_cp()));
+ }
+
+ void parse_obj() {
+ if (is_obj()) {
+ boost::string_ref sref{obj_iter->key.name};
+ size_t last_del = sref.find_last_of('/');
+ if (last_del != string::npos)
+ sref.remove_prefix(last_del+1);
+ obj_sref = sref;
+ }
+ } /* parse_obj */
+
+ void next_obj() {
+ ++obj_iter;
+ parse_obj();
+ }
+
+ void parse_cp() {
+ if (is_cp()) {
+ /* leading-/ skip case */
+ if (cp_iter->first == "/") {
+ _skip_cp = true;
+ return;
+ } else
+ _skip_cp = false;
+
+ /* it's safest to modify the element in place--a suffix-modifying
+ * string_ref operation is problematic since ULP rgw_file callers
+ * will ultimately need a c-string */
+ if (cp_iter->first.back() == '/')
+ const_cast<std::string&>(cp_iter->first).pop_back();
+
+ boost::string_ref sref{cp_iter->first};
+ size_t last_del = sref.find_last_of('/');
+ if (last_del != string::npos)
+ sref.remove_prefix(last_del+1);
+ cp_sref = sref;
+ } /* is_cp */
+ } /* parse_cp */
+
+ void next_cp() {
+ ++cp_iter;
+ parse_cp();
+ }
+
+ bool skip_cp() {
+ return _skip_cp;
+ }
+
+ bool entry_is_obj() {
+ return (is_obj() &&
+ ((! is_cp()) ||
+ (obj_sref.get() < cp_sref.get())));
+ }
+
+ boost::string_ref get_obj_sref() {
+ return obj_sref.get();
+ }
+
+ boost::string_ref get_cp_sref() {
+ return cp_sref.get();
+ }
+
+ vector<rgw_bucket_dir_entry>::iterator& get_obj_iter() {
+ return obj_iter;
+ }
+
+ map<string, bool>::iterator& get_cp_iter() {
+ return cp_iter;
+ }
+
+ }; /* DirIterator */
+
+ DirIterator di{objs, common_prefixes};
+
+ for (;;) {
+
+ if (di.eof()) {
+ break; // done
+ }
+
+ /* assert: one of is_obj() || is_cp() holds */
+ if (di.entry_is_obj()) {
+ auto sref = di.get_obj_sref();
+ if (sref.empty()) {
+ /* recursive list of a leaf dir (iirc), do nothing */
+ } else {
+ /* send a file entry */
+ auto obj_entry = *(di.get_obj_iter());
+
+ lsubdout(cct, rgw, 15) << "RGWReaddirRequest "
+ << __func__ << " "
+ << "list uri=" << s->relative_uri << " "
+ << " prefix=" << prefix << " "
+ << " obj path=" << obj_entry.key.name
+ << " (" << sref << ")" << ""
+ << " mtime="
+ << real_clock::to_time_t(obj_entry.meta.mtime)
+ << " size=" << obj_entry.meta.accounted_size
+ << dendl;
+
+ if (! this->operator()(sref, next_marker, obj_entry.meta.mtime,
+ obj_entry.meta.accounted_size,
+ RGW_FS_TYPE_FILE)) {
+ /* caller cannot accept more */
+ lsubdout(cct, rgw, 5) << "readdir rcb caller signalled stop"
+ << " dirent=" << sref.data()
+ << " call count=" << ix
+ << dendl;
+ rcb_eof = true;
+ return;
+ }
+ }
+ di.next_obj(); // and advance object
+ } else {
+ /* send a dir entry */
+ if (! di.skip_cp()) {
+ auto sref = di.get_cp_sref();
+
+ lsubdout(cct, rgw, 15) << "RGWReaddirRequest "
+ << __func__ << " "
+ << "list uri=" << s->relative_uri << " "
+ << " prefix=" << prefix << " "
+ << " cpref=" << sref
+ << dendl;
+
+ if (sref.empty()) {
+ /* null path segment--could be created in S3 but has no NFS
+ * interpretation */
+ } else {
+ if (! this->operator()(sref, next_marker, cnow, 0,
+ RGW_FS_TYPE_DIRECTORY)) {
+ /* caller cannot accept more */
+ lsubdout(cct, rgw, 5) << "readdir rcb caller signalled stop"
+ << " dirent=" << sref.data()
+ << " call count=" << ix
+ << dendl;
+ rcb_eof = true;
+ return;
+ }
+ }
+ }
+ di.next_cp(); // and advance common_prefixes
+ } /* ! di.entry_is_obj() */
+ } /* for (;;) */
+ }
+
+ virtual void send_versioned_response() {
+ send_response();
+ }
+
+ bool eof() {
+ if (unlikely(cct->_conf->subsys.should_gather(ceph_subsys_rgw, 15))) {
+ bool is_offset =
+ unlikely(! get<const char*>(&offset)) ||
+ !! get<const char*>(offset);
+ lsubdout(cct, rgw, 15) << "READDIR offset: " <<
+ ((is_offset) ? offset : "(nil)")
+ << " next marker: " << next_marker
+ << " is_truncated: " << is_truncated
+ << dendl;
+ }
+ return !is_truncated && !rcb_eof;
+ }
+
+}; /* RGWReaddirRequest */
+
+/*
+ dir has-children predicate (bucket objects)
+*/
+
+class RGWRMdirCheck : public RGWLibRequest,
+ public RGWListBucket /* RGWOp */
+{
+public:
+ const RGWFileHandle* rgw_fh;
+ bool valid;
+ bool has_children;
+
+ RGWRMdirCheck (CephContext* _cct, RGWUserInfo *_user,
+ const RGWFileHandle* _rgw_fh)
+ : RGWLibRequest(_cct, _user), rgw_fh(_rgw_fh), valid(false),
+ has_children(false) {
+ default_max = 2;
+ op = this;
+ }
+
+ bool only_bucket() override { return true; }
+
+ int op_init() override {
+ // assign store, s, and dialect_handler
+ RGWObjectCtx* rados_ctx
+ = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
+ // framework promises to call op_init after parent init
+ ceph_assert(rados_ctx);
+ RGWOp::init(rados_ctx->get_store(), get_state(), this);
+ op = this; // assign self as op: REQUIRED
+ return 0;
+ }
+
+ int header_init() override {
+ struct req_state* s = get_state();
+ s->info.method = "GET";
+ s->op = OP_GET;
+
+ std::string uri = "/" + rgw_fh->bucket_name() + "/";
+ s->relative_uri = uri;
+ s->info.request_uri = uri;
+ s->info.effective_uri = uri;
+ s->info.request_params = "";
+ s->info.domain = ""; /* XXX ? */
+
+ s->user = user;
+ s->bucket_tenant = user->user_id.tenant;
+
+ prefix = rgw_fh->relative_object_name();
+ if (prefix.length() > 0)
+ prefix += "/";
+ delimiter = '/';
+
+ return 0;
+ }
+
+ int get_params() override {
+ max = default_max;
+ return 0;
+ }
+
+ void send_response() override {
+ valid = true;
+ if ((objs.size() > 1) ||
+ (! objs.empty() &&
+ (objs.front().key.name != prefix))) {
+ has_children = true;
+ return;
+ }
+ for (auto& iter : common_prefixes) {
+ /* readdir never produces a name for this case */
+ if (iter.first == "/")
+ continue;
+ has_children = true;
+ break;
+ }
+ }
+
+ virtual void send_versioned_response() {
+ send_response();
+ }
+
+}; /* RGWRMdirCheck */
+
+/*
+ create bucket
+*/
+
+class RGWCreateBucketRequest : public RGWLibRequest,
+ public RGWCreateBucket /* RGWOp */
+{
+public:
+ const std::string& bucket_name;
+
+ RGWCreateBucketRequest(CephContext* _cct, RGWUserInfo *_user,
+ std::string& _bname)
+ : RGWLibRequest(_cct, _user), bucket_name(_bname) {
+ op = this;
+ }
+
+ bool only_bucket() override { return false; }
+
+ int read_permissions(RGWOp* op_obj) override {
+ /* we ARE a 'create bucket' request (cf. rgw_rest.cc, ll. 1305-6) */
+ return 0;
+ }
+
+ int op_init() override {
+ // assign store, s, and dialect_handler
+ RGWObjectCtx* rados_ctx
+ = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
+ // framework promises to call op_init after parent init
+ ceph_assert(rados_ctx);
+ RGWOp::init(rados_ctx->get_store(), get_state(), this);
+ op = this; // assign self as op: REQUIRED
+ return 0;
+ }
+
+ int header_init() override {
+
+ struct req_state* s = get_state();
+ s->info.method = "PUT";
+ s->op = OP_PUT;
+
+ string uri = "/" + bucket_name;
+ /* XXX derp derp derp */
+ s->relative_uri = uri;
+ s->info.request_uri = uri; // XXX
+ s->info.effective_uri = uri;
+ s->info.request_params = "";
+ s->info.domain = ""; /* XXX ? */
+
+ // woo
+ s->user = user;
+ s->bucket_tenant = user->user_id.tenant;
+
+ return 0;
+ }
+
+ int get_params() override {
+ struct req_state* s = get_state();
+ RGWAccessControlPolicy_S3 s3policy(s->cct);
+ /* we don't have (any) headers, so just create canned ACLs */
+ int ret = s3policy.create_canned(s->owner, s->bucket_owner, s->canned_acl);
+ policy = s3policy;
+ return ret;
+ }
+
+ void send_response() override {
+ /* TODO: something (maybe) */
+ }
+}; /* RGWCreateBucketRequest */
+
+/*
+ delete bucket
+*/
+
+class RGWDeleteBucketRequest : public RGWLibRequest,
+ public RGWDeleteBucket /* RGWOp */
+{
+public:
+ const std::string& bucket_name;
+
+ RGWDeleteBucketRequest(CephContext* _cct, RGWUserInfo *_user,
+ std::string& _bname)
+ : RGWLibRequest(_cct, _user), bucket_name(_bname) {
+ op = this;
+ }
+
+ bool only_bucket() override { return true; }
+
+ int op_init() override {
+ // assign store, s, and dialect_handler
+ RGWObjectCtx* rados_ctx
+ = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
+ // framework promises to call op_init after parent init
+ ceph_assert(rados_ctx);
+ RGWOp::init(rados_ctx->get_store(), get_state(), this);
+ op = this; // assign self as op: REQUIRED
+ return 0;
+ }
+
+ int header_init() override {
+
+ struct req_state* s = get_state();
+ s->info.method = "DELETE";
+ s->op = OP_DELETE;
+
+ string uri = "/" + bucket_name;
+ /* XXX derp derp derp */
+ s->relative_uri = uri;
+ s->info.request_uri = uri; // XXX
+ s->info.effective_uri = uri;
+ s->info.request_params = "";
+ s->info.domain = ""; /* XXX ? */
+
+ // woo
+ s->user = user;
+ s->bucket_tenant = user->user_id.tenant;
+
+ return 0;
+ }
+
+ void send_response() override {}
+
+}; /* RGWDeleteBucketRequest */
+
+/*
+ put object
+*/
+class RGWPutObjRequest : public RGWLibRequest,
+ public RGWPutObj /* RGWOp */
+{
+public:
+ const std::string& bucket_name;
+ const std::string& obj_name;
+ buffer::list& bl; /* XXX */
+ size_t bytes_written;
+
+ RGWPutObjRequest(CephContext* _cct, RGWUserInfo *_user,
+ const std::string& _bname, const std::string& _oname,
+ buffer::list& _bl)
+ : RGWLibRequest(_cct, _user), bucket_name(_bname), obj_name(_oname),
+ bl(_bl), bytes_written(0) {
+ op = this;
+ }
+
+ bool only_bucket() override { return true; }
+
+ int op_init() override {
+ // assign store, s, and dialect_handler
+ RGWObjectCtx* rados_ctx
+ = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
+ // framework promises to call op_init after parent init
+ ceph_assert(rados_ctx);
+ RGWOp::init(rados_ctx->get_store(), get_state(), this);
+ op = this; // assign self as op: REQUIRED
+
+ int rc = valid_s3_object_name(obj_name);
+ if (rc != 0)
+ return rc;
+
+ return 0;
+ }
+
+ int header_init() override {
+
+ struct req_state* s = get_state();
+ s->info.method = "PUT";
+ s->op = OP_PUT;
+
+ /* XXX derp derp derp */
+ std::string uri = make_uri(bucket_name, obj_name);
+ s->relative_uri = uri;
+ s->info.request_uri = uri; // XXX
+ s->info.effective_uri = uri;
+ s->info.request_params = "";
+ s->info.domain = ""; /* XXX ? */
+
+ /* XXX required in RGWOp::execute() */
+ s->content_length = bl.length();
+
+ // woo
+ s->user = user;
+ s->bucket_tenant = user->user_id.tenant;
+
+ return 0;
+ }
+
+ int get_params() override {
+ struct req_state* s = get_state();
+ RGWAccessControlPolicy_S3 s3policy(s->cct);
+ /* we don't have (any) headers, so just create canned ACLs */
+ int ret = s3policy.create_canned(s->owner, s->bucket_owner, s->canned_acl);
+ policy = s3policy;
+ return ret;
+ }
+
+ int get_data(buffer::list& _bl) override {
+ /* XXX for now, use sharing semantics */
+ _bl.claim(bl);
+ uint32_t len = _bl.length();
+ bytes_written += len;
+ return len;
+ }
+
+ void send_response() override {}
+
+ int verify_params() override {
+ if (bl.length() > cct->_conf->rgw_max_put_size)
+ return -ERR_TOO_LARGE;
+ return 0;
+ }
+
+ buffer::list* get_attr(const std::string& k) {
+ auto iter = attrs.find(k);
+ return (iter != attrs.end()) ? &(iter->second) : nullptr;
+ }
+
+}; /* RGWPutObjRequest */
+
+/*
+ get object
+*/
+
+class RGWReadRequest : public RGWLibRequest,
+ public RGWGetObj /* RGWOp */
+{
+public:
+ RGWFileHandle* rgw_fh;
+ void *ulp_buffer;
+ size_t nread;
+ size_t read_resid; /* initialize to len, <= sizeof(ulp_buffer) */
+ bool do_hexdump = false;
+
+ RGWReadRequest(CephContext* _cct, RGWUserInfo *_user,
+ RGWFileHandle* _rgw_fh, uint64_t off, uint64_t len,
+ void *_ulp_buffer)
+ : RGWLibRequest(_cct, _user), rgw_fh(_rgw_fh), ulp_buffer(_ulp_buffer),
+ nread(0), read_resid(len) {
+ op = this;
+
+ /* fixup RGWGetObj (already know range parameters) */
+ RGWGetObj::range_parsed = true;
+ RGWGetObj::get_data = true; // XXX
+ RGWGetObj::partial_content = true;
+ RGWGetObj::ofs = off;
+ RGWGetObj::end = off + len;
+ }
+
+ bool only_bucket() override { return false; }
+
+ int op_init() override {
+ // assign store, s, and dialect_handler
+ RGWObjectCtx* rados_ctx
+ = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
+ // framework promises to call op_init after parent init
+ ceph_assert(rados_ctx);
+ RGWOp::init(rados_ctx->get_store(), get_state(), this);
+ op = this; // assign self as op: REQUIRED
+ return 0;
+ }
+
+ int header_init() override {
+
+ struct req_state* s = get_state();
+ s->info.method = "GET";
+ s->op = OP_GET;
+
+ /* XXX derp derp derp */
+ s->relative_uri = make_uri(rgw_fh->bucket_name(),
+ rgw_fh->relative_object_name());
+ s->info.request_uri = s->relative_uri; // XXX
+ s->info.effective_uri = s->relative_uri;
+ s->info.request_params = "";
+ s->info.domain = ""; /* XXX ? */
+
+ // woo
+ s->user = user;
+ s->bucket_tenant = user->user_id.tenant;
+
+ return 0;
+ }
+
+ int get_params() override {
+ return 0;
+ }
+
+ int send_response_data(ceph::buffer::list& bl, off_t bl_off,
+ off_t bl_len) override {
+ size_t bytes;
+ for (auto& bp : bl.buffers()) {
+ /* if for some reason bl_off indicates the start-of-data is not at
+ * the current buffer::ptr, skip it and account */
+ if (bl_off > bp.length()) {
+ bl_off -= bp.length();
+ continue;
+ }
+ /* read no more than read_resid */
+ bytes = std::min(read_resid, size_t(bp.length()-bl_off));
+ memcpy(static_cast<char*>(ulp_buffer)+nread, bp.c_str()+bl_off, bytes);
+ read_resid -= bytes; /* reduce read_resid by bytes read */
+ nread += bytes;
+ bl_off = 0;
+ /* stop if we have no residual ulp_buffer */
+ if (! read_resid)
+ break;
+ }
+ return 0;
+ }
+
+ int send_response_data_error() override {
+ /* S3 implementation just sends nothing--there is no side effect
+ * to simulate here */
+ return 0;
+ }
+
+}; /* RGWReadRequest */
+
+/*
+ delete object
+*/
+
+class RGWDeleteObjRequest : public RGWLibRequest,
+ public RGWDeleteObj /* RGWOp */
+{
+public:
+ const std::string& bucket_name;
+ const std::string& obj_name;
+
+ RGWDeleteObjRequest(CephContext* _cct, RGWUserInfo *_user,
+ const std::string& _bname, const std::string& _oname)
+ : RGWLibRequest(_cct, _user), bucket_name(_bname), obj_name(_oname) {
+ op = this;
+ }
+
+ bool only_bucket() override { return true; }
+
+ int op_init() override {
+ // assign store, s, and dialect_handler
+ RGWObjectCtx* rados_ctx
+ = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
+ // framework promises to call op_init after parent init
+ ceph_assert(rados_ctx);
+ RGWOp::init(rados_ctx->get_store(), get_state(), this);
+ op = this; // assign self as op: REQUIRED
+ return 0;
+ }
+
+ int header_init() override {
+
+ struct req_state* s = get_state();
+ s->info.method = "DELETE";
+ s->op = OP_DELETE;
+
+ /* XXX derp derp derp */
+ std::string uri = make_uri(bucket_name, obj_name);
+ s->relative_uri = uri;
+ s->info.request_uri = uri; // XXX
+ s->info.effective_uri = uri;
+ s->info.request_params = "";
+ s->info.domain = ""; /* XXX ? */
+
+ // woo
+ s->user = user;
+ s->bucket_tenant = user->user_id.tenant;
+
+ return 0;
+ }
+
+ void send_response() override {}
+
+}; /* RGWDeleteObjRequest */
+
+class RGWStatObjRequest : public RGWLibRequest,
+ public RGWGetObj /* RGWOp */
+{
+public:
+ const std::string& bucket_name;
+ const std::string& obj_name;
+ uint64_t _size;
+ uint32_t flags;
+
+ static constexpr uint32_t FLAG_NONE = 0x000;
+
+ RGWStatObjRequest(CephContext* _cct, RGWUserInfo *_user,
+ const std::string& _bname, const std::string& _oname,
+ uint32_t _flags)
+ : RGWLibRequest(_cct, _user), bucket_name(_bname), obj_name(_oname),
+ _size(0), flags(_flags) {
+ op = this;
+
+ /* fixup RGWGetObj (already know range parameters) */
+ RGWGetObj::range_parsed = true;
+ RGWGetObj::get_data = false; // XXX
+ RGWGetObj::partial_content = true;
+ RGWGetObj::ofs = 0;
+ RGWGetObj::end = UINT64_MAX;
+ }
+
+ const char* name() const override { return "stat_obj"; }
+ RGWOpType get_type() override { return RGW_OP_STAT_OBJ; }
+
+ real_time get_mtime() const {
+ return lastmod;
+ }
+
+ /* attributes */
+ uint64_t get_size() { return _size; }
+ real_time ctime() { return mod_time; } // XXX
+ real_time mtime() { return mod_time; }
+ std::map<string, bufferlist>& get_attrs() { return attrs; }
+
+ buffer::list* get_attr(const std::string& k) {
+ auto iter = attrs.find(k);
+ return (iter != attrs.end()) ? &(iter->second) : nullptr;
+ }
+
+ bool only_bucket() override { return false; }
+
+ int op_init() override {
+ // assign store, s, and dialect_handler
+ RGWObjectCtx* rados_ctx
+ = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
+ // framework promises to call op_init after parent init
+ ceph_assert(rados_ctx);
+ RGWOp::init(rados_ctx->get_store(), get_state(), this);
+ op = this; // assign self as op: REQUIRED
+ return 0;
+ }
+
+ int header_init() override {
+
+ struct req_state* s = get_state();
+ s->info.method = "GET";
+ s->op = OP_GET;
+
+ /* XXX derp derp derp */
+ s->relative_uri = make_uri(bucket_name, obj_name);
+ s->info.request_uri = s->relative_uri; // XXX
+ s->info.effective_uri = s->relative_uri;
+ s->info.request_params = "";
+ s->info.domain = ""; /* XXX ? */
+
+ // woo
+ s->user = user;
+ s->bucket_tenant = user->user_id.tenant;
+
+ return 0;
+ }
+
+ int get_params() override {
+ return 0;
+ }
+
+ int send_response_data(ceph::buffer::list& _bl, off_t s_off,
+ off_t e_off) override {
+ /* NOP */
+ /* XXX save attrs? */
+ return 0;
+ }
+
+ int send_response_data_error() override {
+ /* NOP */
+ return 0;
+ }
+
+ void execute() override {
+ RGWGetObj::execute();
+ _size = get_state()->obj_size;
+ }
+
+}; /* RGWStatObjRequest */
+
+class RGWStatBucketRequest : public RGWLibRequest,
+ public RGWStatBucket /* RGWOp */
+{
+public:
+ std::string uri;
+ std::map<std::string, buffer::list> attrs;
+ RGWLibFS::BucketStats& bs;
+
+ RGWStatBucketRequest(CephContext* _cct, RGWUserInfo *_user,
+ const std::string& _path,
+ RGWLibFS::BucketStats& _stats)
+ : RGWLibRequest(_cct, _user), bs(_stats) {
+ uri = "/" + _path;
+ op = this;
+ }
+
+ buffer::list* get_attr(const std::string& k) {
+ auto iter = attrs.find(k);
+ return (iter != attrs.end()) ? &(iter->second) : nullptr;
+ }
+
+ real_time get_ctime() const {
+ return bucket.creation_time;
+ }
+
+ bool only_bucket() override { return false; }
+
+ int op_init() override {
+ // assign store, s, and dialect_handler
+ RGWObjectCtx* rados_ctx
+ = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
+ // framework promises to call op_init after parent init
+ ceph_assert(rados_ctx);
+ RGWOp::init(rados_ctx->get_store(), get_state(), this);
+ op = this; // assign self as op: REQUIRED
+ return 0;
+ }
+
+ int header_init() override {
+
+ struct req_state* s = get_state();
+ s->info.method = "GET";
+ s->op = OP_GET;
+
+ /* XXX derp derp derp */
+ s->relative_uri = uri;
+ s->info.request_uri = uri; // XXX
+ s->info.effective_uri = uri;
+ s->info.request_params = "";
+ s->info.domain = ""; /* XXX ? */
+
+ // woo
+ s->user = user;
+ s->bucket_tenant = user->user_id.tenant;
+
+ return 0;
+ }
+
+ virtual int get_params() {
+ return 0;
+ }
+
+ void send_response() override {
+ bucket.creation_time = get_state()->bucket_info.creation_time;
+ bs.size = bucket.size;
+ bs.size_rounded = bucket.size_rounded;
+ bs.creation_time = bucket.creation_time;
+ bs.num_entries = bucket.count;
+ std::swap(attrs, get_state()->bucket_attrs);
+ }
+
+ bool matched() {
+ return (bucket.bucket.name.length() > 0);
+ }
+
+}; /* RGWStatBucketRequest */
+
+class RGWStatLeafRequest : public RGWLibRequest,
+ public RGWListBucket /* RGWOp */
+{
+public:
+ RGWFileHandle* rgw_fh;
+ std::string path;
+ bool matched;
+ bool is_dir;
+ bool exact_matched;
+
+ RGWStatLeafRequest(CephContext* _cct, RGWUserInfo *_user,
+ RGWFileHandle* _rgw_fh, const std::string& _path)
+ : RGWLibRequest(_cct, _user), rgw_fh(_rgw_fh), path(_path),
+ matched(false), is_dir(false), exact_matched(false) {
+ default_max = 1000; // logical max {"foo", "foo/"}
+ op = this;
+ }
+
+ bool only_bucket() override { return true; }
+
+ int op_init() override {
+ // assign store, s, and dialect_handler
+ RGWObjectCtx* rados_ctx
+ = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
+ // framework promises to call op_init after parent init
+ ceph_assert(rados_ctx);
+ RGWOp::init(rados_ctx->get_store(), get_state(), this);
+ op = this; // assign self as op: REQUIRED
+ return 0;
+ }
+
+ int header_init() override {
+
+ struct req_state* s = get_state();
+ s->info.method = "GET";
+ s->op = OP_GET;
+
+ /* XXX derp derp derp */
+ std::string uri = "/" + rgw_fh->bucket_name() + "/";
+ s->relative_uri = uri;
+ s->info.request_uri = uri; // XXX
+ s->info.effective_uri = uri;
+ s->info.request_params = "";
+ s->info.domain = ""; /* XXX ? */
+
+ // woo
+ s->user = user;
+ s->bucket_tenant = user->user_id.tenant;
+
+ prefix = rgw_fh->relative_object_name();
+ if (prefix.length() > 0)
+ prefix += "/";
+ prefix += path;
+ delimiter = '/';
+
+ return 0;
+ }
+
+ int get_params() override {
+ max = default_max;
+ return 0;
+ }
+
+ void send_response() override {
+ struct req_state* s = get_state();
+ // try objects
+ for (const auto& iter : objs) {
+ auto& name = iter.key.name;
+ lsubdout(cct, rgw, 15) << "RGWStatLeafRequest "
+ << __func__ << " "
+ << "list uri=" << s->relative_uri << " "
+ << " prefix=" << prefix << " "
+ << " obj path=" << name << ""
+ << " target = " << path << ""
+ << dendl;
+ /* XXX is there a missing match-dir case (trailing '/')? */
+ matched = true;
+ if (name == path)
+ exact_matched = true;
+ return;
+ }
+ // try prefixes
+ for (auto& iter : common_prefixes) {
+ auto& name = iter.first;
+ lsubdout(cct, rgw, 15) << "RGWStatLeafRequest "
+ << __func__ << " "
+ << "list uri=" << s->relative_uri << " "
+ << " prefix=" << prefix << " "
+ << " pref path=" << name << " (not chomped)"
+ << " target = " << path << ""
+ << dendl;
+ matched = true;
+ /* match-dir case (trailing '/') */
+ if (name == prefix + "/")
+ exact_matched = true;
+ is_dir = true;
+ break;
+ }
+ }
+
+ virtual void send_versioned_response() {
+ send_response();
+ }
+}; /* RGWStatLeafRequest */
+
+/*
+ put object
+*/
+
+class RGWWriteRequest : public RGWLibContinuedReq,
+ public RGWPutObj /* RGWOp */
+{
+public:
+ const std::string& bucket_name;
+ const std::string& obj_name;
+ RGWFileHandle* rgw_fh;
+ std::optional<rgw::AioThrottle> aio;
+ std::optional<rgw::putobj::AtomicObjectProcessor> processor;
+ rgw::putobj::DataProcessor* filter;
+ boost::optional<RGWPutObj_Compress> compressor;
+ CompressorRef plugin;
+ buffer::list data;
+ uint64_t timer_id;
+ MD5 hash;
+ off_t real_ofs;
+ size_t bytes_written;
+ bool eio;
+
+ RGWWriteRequest(CephContext* _cct, RGWUserInfo *_user, RGWFileHandle* _fh,
+ const std::string& _bname, const std::string& _oname)
+ : RGWLibContinuedReq(_cct, _user),
+ bucket_name(_bname), obj_name(_oname),
+ rgw_fh(_fh), filter(nullptr), real_ofs(0),
+ bytes_written(0), eio(false) {
+
+ int ret = header_init();
+ if (ret == 0) {
+ ret = init_from_header(get_state());
+ }
+ op = this;
+ }
+
+ bool only_bucket() override { return true; }
+
+ int op_init() override {
+ // assign store, s, and dialect_handler
+ RGWObjectCtx* rados_ctx
+ = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
+ // framework promises to call op_init after parent init
+ ceph_assert(rados_ctx);
+ RGWOp::init(rados_ctx->get_store(), get_state(), this);
+ op = this; // assign self as op: REQUIRED
+ return 0;
+ }
+
+ int header_init() override {
+
+ struct req_state* s = get_state();
+ s->info.method = "PUT";
+ s->op = OP_PUT;
+
+ /* XXX derp derp derp */
+ std::string uri = make_uri(bucket_name, obj_name);
+ s->relative_uri = uri;
+ s->info.request_uri = uri; // XXX
+ s->info.effective_uri = uri;
+ s->info.request_params = "";
+ s->info.domain = ""; /* XXX ? */
+
+ // woo
+ s->user = user;
+ s->bucket_tenant = user->user_id.tenant;
+
+ return 0;
+ }
+
+ int get_params() override {
+ struct req_state* s = get_state();
+ RGWAccessControlPolicy_S3 s3policy(s->cct);
+ /* we don't have (any) headers, so just create canned ACLs */
+ int ret = s3policy.create_canned(s->owner, s->bucket_owner, s->canned_acl);
+ policy = s3policy;
+ return ret;
+ }
+
+ int get_data(buffer::list& _bl) override {
+ /* XXX for now, use sharing semantics */
+ uint32_t len = data.length();
+ _bl.claim(data);
+ bytes_written += len;
+ return len;
+ }
+
+ void put_data(off_t off, buffer::list& _bl) {
+ if (off != real_ofs) {
+ eio = true;
+ }
+ data.claim(_bl);
+ real_ofs += data.length();
+ ofs = off; /* consumed in exec_continue() */
+ }
+
+ int exec_start() override;
+ int exec_continue() override;
+ int exec_finish() override;
+
+ void send_response() override {}
+
+ int verify_params() override {
+ return 0;
+ }
+}; /* RGWWriteRequest */
+
+/*
+ copy object
+*/
+class RGWCopyObjRequest : public RGWLibRequest,
+ public RGWCopyObj /* RGWOp */
+{
+public:
+ RGWFileHandle* src_parent;
+ RGWFileHandle* dst_parent;
+ const std::string& src_name;
+ const std::string& dst_name;
+
+ RGWCopyObjRequest(CephContext* _cct, RGWUserInfo *_user,
+ RGWFileHandle* _src_parent, RGWFileHandle* _dst_parent,
+ const std::string& _src_name, const std::string& _dst_name)
+ : RGWLibRequest(_cct, _user), src_parent(_src_parent),
+ dst_parent(_dst_parent), src_name(_src_name), dst_name(_dst_name) {
+ /* all requests have this */
+ op = this;
+
+ /* allow this request to replace selected attrs */
+ attrs_mod = RGWRados::ATTRSMOD_MERGE;
+ }
+
+ bool only_bucket() override { return true; }
+
+ int op_init() override {
+ // assign store, s, and dialect_handler
+ RGWObjectCtx* rados_ctx
+ = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
+ // framework promises to call op_init after parent init
+ ceph_assert(rados_ctx);
+ RGWOp::init(rados_ctx->get_store(), get_state(), this);
+ op = this; // assign self as op: REQUIRED
+
+ return 0;
+ }
+
+ int header_init() override {
+
+ struct req_state* s = get_state();
+ s->info.method = "PUT"; // XXX check
+ s->op = OP_PUT;
+
+ src_bucket_name = src_parent->bucket_name();
+ // need s->src_bucket_name?
+ src_object.name = src_parent->format_child_name(src_name, false);
+ // need s->src_object?
+
+ dest_bucket_name = dst_parent->bucket_name();
+ // need s->bucket.name?
+ dest_object = dst_parent->format_child_name(dst_name, false);
+ // need s->object_name?
+
+ int rc = valid_s3_object_name(dest_object);
+ if (rc != 0)
+ return rc;
+
+ /* XXX and fixup key attr (could optimize w/string ref and
+ * dest_object) */
+ buffer::list ux_key;
+ fh_key fhk = dst_parent->make_fhk(dst_name);
+ rgw::encode(fhk, ux_key);
+ emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
+
+#if 0 /* XXX needed? */
+ s->relative_uri = uri;
+ s->info.request_uri = uri; // XXX
+ s->info.effective_uri = uri;
+ s->info.request_params = "";
+ s->info.domain = ""; /* XXX ? */
+#endif
+
+ // woo
+ s->user = user;
+ s->bucket_tenant = user->user_id.tenant;
+
+ return 0;
+ }
+
+ int get_params() override {
+ struct req_state* s = get_state();
+ RGWAccessControlPolicy_S3 s3policy(s->cct);
+ /* we don't have (any) headers, so just create canned ACLs */
+ int ret = s3policy.create_canned(s->owner, s->bucket_owner, s->canned_acl);
+ dest_policy = s3policy;
+ return ret;
+ }
+
+ void send_response() override {}
+ void send_partial_response(off_t ofs) override {}
+
+}; /* RGWCopyObjRequest */
+
+class RGWSetAttrsRequest : public RGWLibRequest,
+ public RGWSetAttrs /* RGWOp */
+{
+public:
+ const std::string& bucket_name;
+ const std::string& obj_name;
+
+ RGWSetAttrsRequest(CephContext* _cct, RGWUserInfo *_user,
+ const std::string& _bname, const std::string& _oname)
+ : RGWLibRequest(_cct, _user), bucket_name(_bname), obj_name(_oname) {
+ op = this;
+ }
+
+ bool only_bucket() override { return false; }
+
+ int op_init() override {
+ // assign store, s, and dialect_handler
+ RGWObjectCtx* rados_ctx
+ = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
+ // framework promises to call op_init after parent init
+ ceph_assert(rados_ctx);
+ RGWOp::init(rados_ctx->get_store(), get_state(), this);
+ op = this; // assign self as op: REQUIRED
+ return 0;
+ }
+
+ int header_init() override {
+
+ struct req_state* s = get_state();
+ s->info.method = "PUT";
+ s->op = OP_PUT;
+
+ /* XXX derp derp derp */
+ std::string uri = make_uri(bucket_name, obj_name);
+ s->relative_uri = uri;
+ s->info.request_uri = uri; // XXX
+ s->info.effective_uri = uri;
+ s->info.request_params = "";
+ s->info.domain = ""; /* XXX ? */
+
+ // woo
+ s->user = user;
+ s->bucket_tenant = user->user_id.tenant;
+
+ return 0;
+ }
+
+ int get_params() override {
+ return 0;
+ }
+
+ void send_response() override {}
+
+}; /* RGWSetAttrsRequest */
+
+/*
+ * Send request to get the rados cluster stats
+ */
+class RGWGetClusterStatReq : public RGWLibRequest,
+ public RGWGetClusterStat {
+public:
+ struct rados_cluster_stat_t& stats_req;
+ RGWGetClusterStatReq(CephContext* _cct,RGWUserInfo *_user,
+ rados_cluster_stat_t& _stats):
+ RGWLibRequest(_cct, _user), stats_req(_stats){
+ op = this;
+ }
+
+ int op_init() override {
+ // assign store, s, and dialect_handler
+ RGWObjectCtx* rados_ctx
+ = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
+ // framework promises to call op_init after parent init
+ ceph_assert(rados_ctx);
+ RGWOp::init(rados_ctx->get_store(), get_state(), this);
+ op = this; // assign self as op: REQUIRED
+ return 0;
+ }
+
+ int header_init() override {
+ struct req_state* s = get_state();
+ s->info.method = "GET";
+ s->op = OP_GET;
+ s->user = user;
+ return 0;
+ }
+
+ int get_params() override { return 0; }
+ bool only_bucket() override { return false; }
+ void send_response() override {
+ stats_req.kb = stats_op.kb;
+ stats_req.kb_avail = stats_op.kb_avail;
+ stats_req.kb_used = stats_op.kb_used;
+ stats_req.num_objects = stats_op.num_objects;
+ }
+}; /* RGWGetClusterStatReq */
+
+
+} /* namespace rgw */
+
+#endif /* RGW_FILE_H */
diff --git a/src/rgw/rgw_formats.cc b/src/rgw/rgw_formats.cc
new file mode 100644
index 00000000..f8abf72f
--- /dev/null
+++ b/src/rgw/rgw_formats.cc
@@ -0,0 +1,374 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2011 New Dream Network
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include <boost/format.hpp>
+
+#include "common/escape.h"
+#include "common/Formatter.h"
+#include "rgw/rgw_common.h"
+#include "rgw/rgw_formats.h"
+#include "rgw/rgw_rest.h"
+
+#define LARGE_SIZE 8192
+
+#define dout_subsys ceph_subsys_rgw
+
+RGWFormatter_Plain::RGWFormatter_Plain(const bool ukv)
+ : use_kv(ukv)
+{
+}
+
+RGWFormatter_Plain::~RGWFormatter_Plain()
+{
+ free(buf);
+}
+
+void RGWFormatter_Plain::flush(ostream& os)
+{
+ if (!buf)
+ return;
+
+ if (len) {
+ os << buf;
+ os.flush();
+ }
+
+ reset_buf();
+}
+
+void RGWFormatter_Plain::reset_buf()
+{
+ free(buf);
+ buf = NULL;
+ len = 0;
+ max_len = 0;
+}
+
+void RGWFormatter_Plain::reset()
+{
+ reset_buf();
+ stack.clear();
+ min_stack_level = 0;
+}
+
+void RGWFormatter_Plain::open_array_section(const char *name)
+{
+ struct plain_stack_entry new_entry;
+ new_entry.is_array = true;
+ new_entry.size = 0;
+
+ if (use_kv && min_stack_level > 0 && !stack.empty()) {
+ struct plain_stack_entry& entry = stack.back();
+
+ if (!entry.is_array)
+ dump_format(name, "");
+ }
+
+ stack.push_back(new_entry);
+}
+
+void RGWFormatter_Plain::open_array_section_in_ns(const char *name, const char *ns)
+{
+ ostringstream oss;
+ oss << name << " " << ns;
+ open_array_section(oss.str().c_str());
+}
+
+void RGWFormatter_Plain::open_object_section(const char *name)
+{
+ struct plain_stack_entry new_entry;
+ new_entry.is_array = false;
+ new_entry.size = 0;
+
+ if (use_kv && min_stack_level > 0)
+ dump_format(name, "");
+
+ stack.push_back(new_entry);
+}
+
+void RGWFormatter_Plain::open_object_section_in_ns(const char *name,
+ const char *ns)
+{
+ ostringstream oss;
+ oss << name << " " << ns;
+ open_object_section(oss.str().c_str());
+}
+
+void RGWFormatter_Plain::close_section()
+{
+ stack.pop_back();
+}
+
+void RGWFormatter_Plain::dump_unsigned(const char *name, uint64_t u)
+{
+ dump_value_int(name, "%" PRIu64, u);
+}
+
+void RGWFormatter_Plain::dump_int(const char *name, int64_t u)
+{
+ dump_value_int(name, "%" PRId64, u);
+}
+
+void RGWFormatter_Plain::dump_float(const char *name, double d)
+{
+ dump_value_int(name, "%f", d);
+}
+
+void RGWFormatter_Plain::dump_string(const char *name, std::string_view s)
+{
+ dump_format(name, "%s", s.data());
+}
+
+std::ostream& RGWFormatter_Plain::dump_stream(const char *name)
+{
+ // TODO: implement this!
+ ceph_abort();
+}
+
+void RGWFormatter_Plain::dump_format_va(const char *name, const char *ns, bool quoted, const char *fmt, va_list ap)
+{
+ char buf[LARGE_SIZE];
+
+ struct plain_stack_entry& entry = stack.back();
+
+ if (!min_stack_level)
+ min_stack_level = stack.size();
+
+ bool should_print = ((stack.size() == min_stack_level && !entry.size) || use_kv);
+
+ entry.size++;
+
+ if (!should_print)
+ return;
+
+ vsnprintf(buf, LARGE_SIZE, fmt, ap);
+
+ const char *eol;
+ if (wrote_something) {
+ if (use_kv && entry.is_array && entry.size > 1)
+ eol = ", ";
+ else
+ eol = "\n";
+ } else
+ eol = "";
+ wrote_something = true;
+
+ if (use_kv && !entry.is_array)
+ write_data("%s%s: %s", eol, name, buf);
+ else
+ write_data("%s%s", eol, buf);
+}
+
+int RGWFormatter_Plain::get_len() const
+{
+ // don't include null termination in length
+ return (len ? len - 1 : 0);
+}
+
+void RGWFormatter_Plain::write_raw_data(const char *data)
+{
+ write_data("%s", data);
+}
+
+void RGWFormatter_Plain::write_data(const char *fmt, ...)
+{
+#define LARGE_ENOUGH_LEN 128
+ int n, size = LARGE_ENOUGH_LEN;
+ char s[size + 8];
+ char *p, *np;
+ bool p_on_stack;
+ va_list ap;
+ int pos;
+
+ p = s;
+ p_on_stack = true;
+
+ while (1) {
+ va_start(ap, fmt);
+ n = vsnprintf(p, size, fmt, ap);
+ va_end(ap);
+
+ if (n > -1 && n < size)
+ goto done;
+ /* Else try again with more space. */
+ if (n > -1) /* glibc 2.1 */
+ size = n+1; /* precisely what is needed */
+ else /* glibc 2.0 */
+ size *= 2; /* twice the old size */
+ if (p_on_stack)
+ np = (char *)malloc(size + 8);
+ else
+ np = (char *)realloc(p, size + 8);
+ if (!np)
+ goto done_free;
+ p = np;
+ p_on_stack = false;
+ }
+done:
+#define LARGE_ENOUGH_BUF 4096
+ if (!buf) {
+ max_len = std::max(LARGE_ENOUGH_BUF, size);
+ buf = (char *)malloc(max_len);
+ if (!buf) {
+ cerr << "ERROR: RGWFormatter_Plain::write_data: failed allocating " << max_len << " bytes" << std::endl;
+ goto done_free;
+ }
+ }
+
+ if (len + size > max_len) {
+ max_len = len + size + LARGE_ENOUGH_BUF;
+ void *_realloc = NULL;
+ if ((_realloc = realloc(buf, max_len)) == NULL) {
+ cerr << "ERROR: RGWFormatter_Plain::write_data: failed allocating " << max_len << " bytes" << std::endl;
+ goto done_free;
+ } else {
+ buf = (char *)_realloc;
+ }
+ }
+
+ pos = len;
+ if (len)
+ pos--; // squash null termination
+ strcpy(buf + pos, p);
+ len = pos + strlen(p) + 1;
+done_free:
+ if (!p_on_stack)
+ free(p);
+}
+
+void RGWFormatter_Plain::dump_value_int(const char *name, const char *fmt, ...)
+{
+ char buf[LARGE_SIZE];
+ va_list ap;
+
+ if (!min_stack_level)
+ min_stack_level = stack.size();
+
+ struct plain_stack_entry& entry = stack.back();
+ bool should_print = ((stack.size() == min_stack_level && !entry.size) || use_kv);
+
+ entry.size++;
+
+ if (!should_print)
+ return;
+
+ va_start(ap, fmt);
+ vsnprintf(buf, LARGE_SIZE, fmt, ap);
+ va_end(ap);
+
+ const char *eol;
+ if (wrote_something) {
+ eol = "\n";
+ } else
+ eol = "";
+ wrote_something = true;
+
+ if (use_kv && !entry.is_array)
+ write_data("%s%s: %s", eol, name, buf);
+ else
+ write_data("%s%s", eol, buf);
+
+}
+
+
+/* An utility class that serves as a mean to access the protected static
+ * methods of XMLFormatter. */
+class HTMLHelper : public XMLFormatter {
+public:
+ static std::string escape(const std::string& unescaped_str) {
+ int len = escape_xml_attr_len(unescaped_str.c_str());
+ std::string escaped(len, 0);
+ escape_xml_attr(unescaped_str.c_str(), escaped.data());
+ return escaped;
+ }
+};
+
+void RGWSwiftWebsiteListingFormatter::generate_header(
+ const std::string& dir_path,
+ const std::string& css_path)
+{
+ ss << R"(<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 )"
+ << R"(Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">)";
+
+ ss << "<html><head><title>Listing of " << xml_stream_escaper(dir_path)
+ << "</title>";
+
+ if (! css_path.empty()) {
+ ss << boost::format(R"(<link rel="stylesheet" type="text/css" href="%s" />)")
+ % url_encode(css_path);
+ } else {
+ ss << R"(<style type="text/css">)"
+ << R"(h1 {font-size: 1em; font-weight: bold;})"
+ << R"(th {text-align: left; padding: 0px 1em 0px 1em;})"
+ << R"(td {padding: 0px 1em 0px 1em;})"
+ << R"(a {text-decoration: none;})"
+ << R"(</style>)";
+ }
+
+ ss << "</head><body>";
+
+ ss << R"(<h1 id="title">Listing of )" << xml_stream_escaper(dir_path) << "</h1>"
+ << R"(<table id="listing">)"
+ << R"(<tr id="heading">)"
+ << R"(<th class="colname">Name</th>)"
+ << R"(<th class="colsize">Size</th>)"
+ << R"(<th class="coldate">Date</th>)"
+ << R"(</tr>)";
+
+ if (! prefix.empty()) {
+ ss << R"(<tr id="parent" class="item">)"
+ << R"(<td class="colname"><a href="../">../</a></td>)"
+ << R"(<td class="colsize">&nbsp;</td>)"
+ << R"(<td class="coldate">&nbsp;</td>)"
+ << R"(</tr>)";
+ }
+}
+
+void RGWSwiftWebsiteListingFormatter::generate_footer()
+{
+ ss << R"(</table></body></html>)";
+}
+
+std::string RGWSwiftWebsiteListingFormatter::format_name(
+ const std::string& item_name) const
+{
+ return item_name.substr(prefix.length());
+}
+
+void RGWSwiftWebsiteListingFormatter::dump_object(const rgw_bucket_dir_entry& objent)
+{
+ const auto name = format_name(objent.key.name);
+ ss << boost::format(R"(<tr class="item %s">)")
+ % "default"
+ << boost::format(R"(<td class="colname"><a href="%s">%s</a></td>)")
+ % url_encode(name)
+ % HTMLHelper::escape(name)
+ << boost::format(R"(<td class="colsize">%lld</td>)") % objent.meta.size
+ << boost::format(R"(<td class="coldate">%s</td>)")
+ % dump_time_to_str(objent.meta.mtime)
+ << R"(</tr>)";
+}
+
+void RGWSwiftWebsiteListingFormatter::dump_subdir(const std::string& name)
+{
+ const auto fname = format_name(name);
+ ss << R"(<tr class="item subdir">)"
+ << boost::format(R"(<td class="colname"><a href="%s">%s</a></td>)")
+ % url_encode(fname)
+ % HTMLHelper::escape(fname)
+ << R"(<td class="colsize">&nbsp;</td>)"
+ << R"(<td class="coldate">&nbsp;</td>)"
+ << R"(</tr>)";
+}
diff --git a/src/rgw/rgw_formats.h b/src/rgw/rgw_formats.h
new file mode 100644
index 00000000..10cc0deb
--- /dev/null
+++ b/src/rgw/rgw_formats.h
@@ -0,0 +1,136 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_FORMATS_H
+#define CEPH_RGW_FORMATS_H
+
+#include "common/Formatter.h"
+
+#include <list>
+#include <stdint.h>
+#include <string>
+#include <ostream>
+
+struct plain_stack_entry {
+ int size;
+ bool is_array;
+};
+
+/* FIXME: this class is mis-named.
+ * FIXME: This was a hack to send certain swift messages.
+ * There is a much better way to do this.
+ */
+class RGWFormatter_Plain : public Formatter {
+ void reset_buf();
+public:
+ explicit RGWFormatter_Plain(bool use_kv = false);
+ ~RGWFormatter_Plain() override;
+
+ void set_status(int status, const char* status_name) override {};
+ void output_header() override {};
+ void output_footer() override {};
+ void enable_line_break() override {};
+ void flush(ostream& os) override;
+ void reset() override;
+
+ void open_array_section(const char *name) override;
+ void open_array_section_in_ns(const char *name, const char *ns) override;
+ void open_object_section(const char *name) override;
+ void open_object_section_in_ns(const char *name, const char *ns) override;
+ void close_section() override;
+ void dump_unsigned(const char *name, uint64_t u) override;
+ void dump_int(const char *name, int64_t u) override;
+ void dump_float(const char *name, double d) override;
+ void dump_string(const char *name, std::string_view s) override;
+ std::ostream& dump_stream(const char *name) override;
+ void dump_format_va(const char *name, const char *ns, bool quoted, const char *fmt, va_list ap) override;
+ int get_len() const override;
+ void write_raw_data(const char *data) override;
+
+private:
+ void write_data(const char *fmt, ...);
+ void dump_value_int(const char *name, const char *fmt, ...);
+
+ char *buf = nullptr;
+ int len = 0;
+ int max_len = 0;
+
+ std::list<struct plain_stack_entry> stack;
+ size_t min_stack_level = 0;
+ bool use_kv;
+ bool wrote_something = 0;
+};
+
+
+/* This is a presentation layer. No logic inside, please. */
+class RGWSwiftWebsiteListingFormatter {
+ std::ostream& ss;
+ const std::string prefix;
+protected:
+ std::string format_name(const std::string& item_name) const;
+public:
+ RGWSwiftWebsiteListingFormatter(std::ostream& ss,
+ std::string prefix)
+ : ss(ss),
+ prefix(std::move(prefix)) {
+ }
+
+ /* The supplied css_path can be empty. In such situation a default,
+ * embedded style sheet will be generated. */
+ void generate_header(const std::string& dir_path,
+ const std::string& css_path);
+ void generate_footer();
+ void dump_object(const rgw_bucket_dir_entry& objent);
+ void dump_subdir(const std::string& name);
+};
+
+
+class RGWFormatterFlusher {
+protected:
+ Formatter *formatter;
+ bool flushed;
+ bool started;
+ virtual void do_flush() = 0;
+ virtual void do_start(int ret) {}
+ void set_formatter(Formatter *f) {
+ formatter = f;
+ }
+public:
+ explicit RGWFormatterFlusher(Formatter *f) : formatter(f), flushed(false), started(false) {}
+ virtual ~RGWFormatterFlusher() {}
+
+ void flush() {
+ do_flush();
+ flushed = true;
+ }
+
+ virtual void start(int client_ret) {
+ if (!started)
+ do_start(client_ret);
+ started = true;
+ }
+
+ Formatter *get_formatter() { return formatter; }
+ bool did_flush() { return flushed; }
+ bool did_start() { return started; }
+};
+
+class RGWStreamFlusher : public RGWFormatterFlusher {
+ ostream& os;
+protected:
+ void do_flush() override {
+ formatter->flush(os);
+ }
+public:
+ RGWStreamFlusher(Formatter *f, ostream& _os) : RGWFormatterFlusher(f), os(_os) {}
+};
+
+class RGWNullFlusher : public RGWFormatterFlusher {
+protected:
+ void do_flush() override {
+ }
+public:
+ RGWNullFlusher() : RGWFormatterFlusher(nullptr) {}
+};
+
+#endif
diff --git a/src/rgw/rgw_frontend.cc b/src/rgw/rgw_frontend.cc
new file mode 100644
index 00000000..f22ec124
--- /dev/null
+++ b/src/rgw/rgw_frontend.cc
@@ -0,0 +1,82 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <signal.h>
+
+#include "rgw_frontend.h"
+#include "include/str_list.h"
+
+#include "include/ceph_assert.h"
+
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+int RGWFrontendConfig::parse_config(const string& config,
+ std::multimap<string, string>& config_map)
+{
+ for (auto& entry : get_str_vec(config, " ")) {
+ string key;
+ string val;
+
+ if (framework.empty()) {
+ framework = entry;
+ dout(0) << "framework: " << framework << dendl;
+ continue;
+ }
+
+ ssize_t pos = entry.find('=');
+ if (pos < 0) {
+ dout(0) << "framework conf key: " << entry << dendl;
+ config_map.emplace(std::move(entry), "");
+ continue;
+ }
+
+ int ret = parse_key_value(entry, key, val);
+ if (ret < 0) {
+ cerr << "ERROR: can't parse " << entry << std::endl;
+ return ret;
+ }
+
+ dout(0) << "framework conf key: " << key << ", val: " << val << dendl;
+ config_map.emplace(std::move(key), std::move(val));
+ }
+
+ return 0;
+}
+
+bool RGWFrontendConfig::get_val(const string& key, const string& def_val,
+ string *out)
+{
+ auto iter = config_map.find(key);
+ if (iter == config_map.end()) {
+ *out = def_val;
+ return false;
+ }
+
+ *out = iter->second;
+ return true;
+}
+
+bool RGWFrontendConfig::get_val(const string& key, int def_val, int *out)
+{
+ string str;
+ bool found = get_val(key, "", &str);
+ if (!found) {
+ *out = def_val;
+ return false;
+ }
+ string err;
+ *out = strict_strtol(str.c_str(), 10, &err);
+ if (!err.empty()) {
+ cerr << "error parsing int: " << str << ": " << err << std::endl;
+ return -EINVAL;
+ }
+ return 0;
+}
+
+void RGWProcessFrontend::stop()
+{
+ pprocess->close_fd();
+ thread->kill(SIGUSR1);
+}
diff --git a/src/rgw/rgw_frontend.h b/src/rgw/rgw_frontend.h
new file mode 100644
index 00000000..c797e4d5
--- /dev/null
+++ b/src/rgw/rgw_frontend.h
@@ -0,0 +1,285 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RGW_FRONTEND_H
+#define RGW_FRONTEND_H
+
+#include <map>
+#include <string>
+
+#include "rgw_request.h"
+#include "rgw_process.h"
+#include "rgw_realm_reloader.h"
+
+#include "rgw_civetweb.h"
+#include "rgw_civetweb_log.h"
+#include "civetweb/civetweb.h"
+#include "rgw_auth_registry.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+namespace rgw::dmclock {
+ class SyncScheduler;
+ class ClientConfig;
+ class SchedulerCtx;
+}
+
+class RGWFrontendConfig {
+ std::string config;
+ std::multimap<std::string, std::string> config_map;
+ std::string framework;
+
+ int parse_config(const std::string& config,
+ std::multimap<std::string, std::string>& config_map);
+
+public:
+ explicit RGWFrontendConfig(const std::string& config)
+ : config(config) {
+ }
+
+ int init() {
+ const int ret = parse_config(config, config_map);
+ return ret < 0 ? ret : 0;
+ }
+
+ bool get_val(const std::string& key,
+ const std::string& def_val,
+ std::string* out);
+ bool get_val(const std::string& key, int def_val, int *out);
+
+ std::string get_val(const std::string& key,
+ const std::string& def_val) {
+ std::string out;
+ get_val(key, def_val, &out);
+ return out;
+ }
+
+ const std::string& get_config() {
+ return config;
+ }
+
+ std::multimap<std::string, std::string>& get_config_map() {
+ return config_map;
+ }
+
+ std::string get_framework() const {
+ return framework;
+ }
+};
+
+class RGWFrontend {
+public:
+ virtual ~RGWFrontend() {}
+
+ virtual int init() = 0;
+
+ virtual int run() = 0;
+ virtual void stop() = 0;
+ virtual void join() = 0;
+
+ virtual void pause_for_new_config() = 0;
+ virtual void unpause_with_new_config(RGWRados* store,
+ rgw_auth_registry_ptr_t auth_registry) = 0;
+};
+
+
+struct RGWMongooseEnv : public RGWProcessEnv {
+ // every request holds a read lock, so we need to prioritize write locks to
+ // avoid starving pause_for_new_config()
+ static constexpr bool prioritize_write = true;
+ RWLock mutex;
+
+ explicit RGWMongooseEnv(const RGWProcessEnv &env)
+ : RGWProcessEnv(env),
+ mutex("RGWCivetWebFrontend", false, true, prioritize_write) {
+ }
+};
+
+
+class RGWCivetWebFrontend : public RGWFrontend {
+ RGWFrontendConfig* conf;
+ struct mg_context* ctx;
+ RGWMongooseEnv env;
+
+ std::unique_ptr<rgw::dmclock::SyncScheduler> scheduler;
+ std::unique_ptr<rgw::dmclock::ClientConfig> client_config;
+
+ void set_conf_default(std::multimap<std::string, std::string>& m,
+ const std::string& key,
+ const std::string& def_val) {
+ if (m.find(key) == std::end(m)) {
+ m.emplace(key, def_val);
+ }
+ }
+
+ CephContext* cct() const { return env.store->ctx(); }
+public:
+ RGWCivetWebFrontend(RGWProcessEnv& env,
+ RGWFrontendConfig *conf,
+ rgw::dmclock::SchedulerCtx& sched_ctx);
+
+ int init() override {
+ return 0;
+ }
+
+ int run() override;
+
+ int process(struct mg_connection* conn);
+
+ void stop() override {
+ if (ctx) {
+ mg_stop(ctx);
+ }
+ }
+
+ void join() override {
+ return;
+ }
+
+ void pause_for_new_config() override {
+ // block callbacks until unpause
+ env.mutex.get_write();
+ }
+
+ void unpause_with_new_config(RGWRados* const store,
+ rgw_auth_registry_ptr_t auth_registry) override {
+ env.store = store;
+ env.auth_registry = std::move(auth_registry);
+ // unpause callbacks
+ env.mutex.put_write();
+ }
+}; /* RGWCivetWebFrontend */
+
+class RGWProcessFrontend : public RGWFrontend {
+protected:
+ RGWFrontendConfig* conf;
+ RGWProcess* pprocess;
+ RGWProcessEnv env;
+ RGWProcessControlThread* thread;
+
+public:
+ RGWProcessFrontend(RGWProcessEnv& pe, RGWFrontendConfig* _conf)
+ : conf(_conf), pprocess(nullptr), env(pe), thread(nullptr) {
+ }
+
+ ~RGWProcessFrontend() override {
+ delete thread;
+ delete pprocess;
+ }
+
+ int run() override {
+ ceph_assert(pprocess); /* should have initialized by init() */
+ thread = new RGWProcessControlThread(pprocess);
+ thread->create("rgw_frontend");
+ return 0;
+ }
+
+ void stop() override;
+
+ void join() override {
+ thread->join();
+ }
+
+ void pause_for_new_config() override {
+ pprocess->pause();
+ }
+
+ void unpause_with_new_config(RGWRados* const store,
+ rgw_auth_registry_ptr_t auth_registry) override {
+ env.store = store;
+ env.auth_registry = auth_registry;
+ pprocess->unpause_with_new_config(store, std::move(auth_registry));
+ }
+}; /* RGWProcessFrontend */
+
+class RGWFCGXFrontend : public RGWProcessFrontend {
+public:
+ RGWFCGXFrontend(RGWProcessEnv& pe, RGWFrontendConfig* _conf)
+ : RGWProcessFrontend(pe, _conf) {}
+
+ int init() override {
+ pprocess = new RGWFCGXProcess(g_ceph_context, &env,
+ g_conf()->rgw_thread_pool_size, conf);
+ return 0;
+ }
+}; /* RGWFCGXFrontend */
+
+class RGWLoadGenFrontend : public RGWProcessFrontend {
+public:
+ RGWLoadGenFrontend(RGWProcessEnv& pe, RGWFrontendConfig *_conf)
+ : RGWProcessFrontend(pe, _conf) {}
+
+ int init() override {
+ int num_threads;
+ conf->get_val("num_threads", g_conf()->rgw_thread_pool_size, &num_threads);
+ RGWLoadGenProcess *pp = new RGWLoadGenProcess(g_ceph_context, &env,
+ num_threads, conf);
+
+ pprocess = pp;
+
+ string uid_str;
+ conf->get_val("uid", "", &uid_str);
+ if (uid_str.empty()) {
+ derr << "ERROR: uid param must be specified for loadgen frontend"
+ << dendl;
+ return -EINVAL;
+ }
+
+ rgw_user uid(uid_str);
+
+ RGWUserInfo user_info;
+ int ret = rgw_get_user_info_by_uid(env.store, uid, user_info, NULL);
+ if (ret < 0) {
+ derr << "ERROR: failed reading user info: uid=" << uid << " ret="
+ << ret << dendl;
+ return ret;
+ }
+
+ map<string, RGWAccessKey>::iterator aiter = user_info.access_keys.begin();
+ if (aiter == user_info.access_keys.end()) {
+ derr << "ERROR: user has no S3 access keys set" << dendl;
+ return -EINVAL;
+ }
+
+ pp->set_access_key(aiter->second);
+
+ return 0;
+ }
+}; /* RGWLoadGenFrontend */
+
+// FrontendPauser implementation for RGWRealmReloader
+class RGWFrontendPauser : public RGWRealmReloader::Pauser {
+ std::list<RGWFrontend*> &frontends;
+ RGWRealmReloader::Pauser* pauser;
+ rgw::auth::ImplicitTenants& implicit_tenants;
+
+ public:
+ RGWFrontendPauser(std::list<RGWFrontend*> &frontends,
+ rgw::auth::ImplicitTenants& implicit_tenants,
+ RGWRealmReloader::Pauser* pauser = nullptr)
+ : frontends(frontends),
+ pauser(pauser),
+ implicit_tenants(implicit_tenants) {
+ }
+
+ void pause() override {
+ for (auto frontend : frontends)
+ frontend->pause_for_new_config();
+ if (pauser)
+ pauser->pause();
+ }
+ void resume(RGWRados *store) override {
+ /* Initialize the registry of auth strategies which will coordinate
+ * the dynamic reconfiguration. */
+ auto auth_registry = \
+ rgw::auth::StrategyRegistry::create(g_ceph_context, implicit_tenants, store);
+
+ for (auto frontend : frontends)
+ frontend->unpause_with_new_config(store, auth_registry);
+ if (pauser)
+ pauser->resume(store);
+ }
+};
+
+#endif /* RGW_FRONTEND_H */
diff --git a/src/rgw/rgw_gc.cc b/src/rgw/rgw_gc.cc
new file mode 100644
index 00000000..0b99e087
--- /dev/null
+++ b/src/rgw/rgw_gc.cc
@@ -0,0 +1,528 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "rgw_gc.h"
+
+#include "include/scope_guard.h"
+#include "rgw_tools.h"
+#include "include/rados/librados.hpp"
+#include "cls/rgw/cls_rgw_client.h"
+#include "cls/refcount/cls_refcount_client.h"
+#include "rgw_perf_counters.h"
+#include "cls/lock/cls_lock_client.h"
+#include "include/random.h"
+
+#include <list> // XXX
+#include <sstream>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+using namespace librados;
+
+static string gc_oid_prefix = "gc";
+static string gc_index_lock_name = "gc_process";
+
+
+void RGWGC::initialize(CephContext *_cct, RGWRados *_store) {
+ cct = _cct;
+ store = _store;
+
+ max_objs = min(static_cast<int>(cct->_conf->rgw_gc_max_objs), rgw_shards_max());
+
+ obj_names = new string[max_objs];
+
+ for (int i = 0; i < max_objs; i++) {
+ obj_names[i] = gc_oid_prefix;
+ char buf[32];
+ snprintf(buf, 32, ".%d", i);
+ obj_names[i].append(buf);
+ }
+}
+
+void RGWGC::finalize()
+{
+ delete[] obj_names;
+}
+
+int RGWGC::tag_index(const string& tag)
+{
+ return rgw_shard_id(tag, max_objs);
+}
+
+void RGWGC::add_chain(ObjectWriteOperation& op, cls_rgw_obj_chain& chain, const string& tag)
+{
+ cls_rgw_gc_obj_info info;
+ info.chain = chain;
+ info.tag = tag;
+
+ cls_rgw_gc_set_entry(op, cct->_conf->rgw_gc_obj_min_wait, info);
+}
+
+int RGWGC::send_chain(cls_rgw_obj_chain& chain, const string& tag, bool sync)
+{
+ ObjectWriteOperation op;
+ add_chain(op, chain, tag);
+
+ int i = tag_index(tag);
+
+ if (sync)
+ return store->gc_operate(obj_names[i], &op);
+
+ return store->gc_aio_operate(obj_names[i], &op);
+}
+
+int RGWGC::defer_chain(const string& tag, bool sync)
+{
+ ObjectWriteOperation op;
+ cls_rgw_gc_defer_entry(op, cct->_conf->rgw_gc_obj_min_wait, tag);
+
+ int i = tag_index(tag);
+
+ if (sync)
+ return store->gc_operate(obj_names[i], &op);
+
+ return store->gc_aio_operate(obj_names[i], &op);
+}
+
+int RGWGC::remove(int index, const std::vector<string>& tags, AioCompletion **pc)
+{
+ ObjectWriteOperation op;
+ cls_rgw_gc_remove(op, tags);
+ return store->gc_aio_operate(obj_names[index], &op, pc);
+}
+
+int RGWGC::list(int *index, string& marker, uint32_t max, bool expired_only, std::list<cls_rgw_gc_obj_info>& result, bool *truncated)
+{
+ result.clear();
+ string next_marker;
+
+ for (; *index < max_objs && result.size() < max; (*index)++, marker.clear()) {
+ std::list<cls_rgw_gc_obj_info> entries;
+ int ret = cls_rgw_gc_list(store->gc_pool_ctx, obj_names[*index], marker, max - result.size(), expired_only, entries, truncated, next_marker);
+ if (ret == -ENOENT)
+ continue;
+ if (ret < 0)
+ return ret;
+
+ std::list<cls_rgw_gc_obj_info>::iterator iter;
+ for (iter = entries.begin(); iter != entries.end(); ++iter) {
+ result.push_back(*iter);
+ }
+
+ marker = next_marker;
+
+ if (*index == max_objs - 1) {
+ /* we cut short here, truncated will hold the correct value */
+ return 0;
+ }
+
+ if (result.size() == max) {
+ /* close approximation, it might be that the next of the objects don't hold
+ * anything, in this case truncated should have been false, but we can find
+ * that out on the next iteration
+ */
+ *truncated = true;
+ return 0;
+ }
+
+ }
+ *truncated = false;
+
+ return 0;
+}
+
+class RGWGCIOManager {
+ const DoutPrefixProvider* dpp;
+ CephContext *cct;
+ RGWGC *gc;
+
+ struct IO {
+ enum Type {
+ UnknownIO = 0,
+ TailIO = 1,
+ IndexIO = 2,
+ } type{UnknownIO};
+ librados::AioCompletion *c{nullptr};
+ string oid;
+ int index{-1};
+ string tag;
+ };
+
+ deque<IO> ios;
+ vector<std::vector<string> > remove_tags;
+ /* tracks the number of remaining shadow objects for a given tag in order to
+ * only remove the tag once all shadow objects have themselves been removed
+ */
+ vector<map<string, size_t> > tag_io_size;
+
+#define MAX_AIO_DEFAULT 10
+ size_t max_aio{MAX_AIO_DEFAULT};
+
+public:
+ RGWGCIOManager(const DoutPrefixProvider* _dpp, CephContext *_cct, RGWGC *_gc) : dpp(_dpp),
+ cct(_cct),
+ gc(_gc),
+ remove_tags(cct->_conf->rgw_gc_max_objs),
+ tag_io_size(cct->_conf->rgw_gc_max_objs) {
+ max_aio = cct->_conf->rgw_gc_max_concurrent_io;
+ }
+
+ ~RGWGCIOManager() {
+ for (auto io : ios) {
+ io.c->release();
+ }
+ }
+
+ int schedule_io(IoCtx *ioctx, const string& oid, ObjectWriteOperation *op,
+ int index, const string& tag) {
+ while (ios.size() > max_aio) {
+ if (gc->going_down()) {
+ return 0;
+ }
+ handle_next_completion();
+ }
+
+ AioCompletion *c = librados::Rados::aio_create_completion(NULL, NULL, NULL);
+ int ret = ioctx->aio_operate(oid, c, op);
+ if (ret < 0) {
+ return ret;
+ }
+ ios.push_back(IO{IO::TailIO, c, oid, index, tag});
+
+ return 0;
+ }
+
+ void handle_next_completion() {
+ ceph_assert(!ios.empty());
+ IO& io = ios.front();
+ io.c->wait_for_safe();
+ int ret = io.c->get_return_value();
+ io.c->release();
+
+ if (ret == -ENOENT) {
+ ret = 0;
+ }
+
+ if (io.type == IO::IndexIO) {
+ if (ret < 0) {
+ ldpp_dout(dpp, 0) << "WARNING: gc cleanup of tags on gc shard index=" <<
+ io.index << " returned error, ret=" << ret << dendl;
+ }
+ goto done;
+ }
+
+ if (ret < 0) {
+ ldpp_dout(dpp, 0) << "WARNING: gc could not remove oid=" << io.oid <<
+ ", ret=" << ret << dendl;
+ goto done;
+ }
+
+ schedule_tag_removal(io.index, io.tag);
+
+ done:
+ ios.pop_front();
+ }
+
+ /* This is a request to schedule a tag removal. It will be called once when
+ * there are no shadow objects. But it will also be called for every shadow
+ * object when there are any. Since we do not want the tag to be removed
+ * until all shadow objects have been successfully removed, the scheduling
+ * will not happen until the shadow object count goes down to zero
+ */
+ void schedule_tag_removal(int index, string tag) {
+ auto& ts = tag_io_size[index];
+ auto ts_it = ts.find(tag);
+ if (ts_it != ts.end()) {
+ auto& size = ts_it->second;
+ --size;
+ // wait all shadow obj delete return
+ if (size != 0)
+ return;
+
+ ts.erase(ts_it);
+ }
+
+ auto& rt = remove_tags[index];
+
+ rt.push_back(tag);
+ if (rt.size() >= (size_t)cct->_conf->rgw_gc_max_trim_chunk) {
+ flush_remove_tags(index, rt);
+ }
+ }
+
+ void add_tag_io_size(int index, string tag, size_t size) {
+ auto& ts = tag_io_size[index];
+ ts.emplace(tag, size);
+ }
+
+ void drain_ios() {
+ while (!ios.empty()) {
+ if (gc->going_down()) {
+ return;
+ }
+ handle_next_completion();
+ }
+ }
+
+ void drain() {
+ drain_ios();
+ flush_remove_tags();
+ /* the tags draining might have generated more ios, drain those too */
+ drain_ios();
+ }
+
+ void flush_remove_tags(int index, vector<string>& rt) {
+ IO index_io;
+ index_io.type = IO::IndexIO;
+ index_io.index = index;
+
+ ldpp_dout(dpp, 20) << __func__ <<
+ " removing entries from gc log shard index=" << index << ", size=" <<
+ rt.size() << ", entries=" << rt << dendl;
+
+ auto rt_guard = make_scope_guard(
+ [&]
+ {
+ rt.clear();
+ }
+ );
+
+ int ret = gc->remove(index, rt, &index_io.c);
+ if (ret < 0) {
+ /* we already cleared list of tags, this prevents us from
+ * ballooning in case of a persistent problem
+ */
+ ldpp_dout(dpp, 0) << "WARNING: failed to remove tags on gc shard index=" <<
+ index << " ret=" << ret << dendl;
+ return;
+ }
+ if (perfcounter) {
+ /* log the count of tags retired for rate estimation */
+ perfcounter->inc(l_rgw_gc_retire, rt.size());
+ }
+ ios.push_back(index_io);
+ }
+
+ void flush_remove_tags() {
+ int index = 0;
+ for (auto& rt : remove_tags) {
+ flush_remove_tags(index, rt);
+ ++index;
+ }
+ }
+}; // class RGWGCIOManger
+
+int RGWGC::process(int index, int max_secs, bool expired_only,
+ RGWGCIOManager& io_manager)
+{
+ ldpp_dout(this, 20) << "RGWGC::process entered with GC index_shard=" <<
+ index << ", max_secs=" << max_secs << ", expired_only=" <<
+ expired_only << dendl;
+
+ rados::cls::lock::Lock l(gc_index_lock_name);
+ utime_t end = ceph_clock_now();
+
+ /* max_secs should be greater than zero. We don't want a zero max_secs
+ * to be translated as no timeout, since we'd then need to break the
+ * lock and that would require a manual intervention. In this case
+ * we can just wait it out. */
+ if (max_secs <= 0)
+ return -EAGAIN;
+
+ end += max_secs;
+ utime_t time(max_secs, 0);
+ l.set_duration(time);
+
+ int ret = l.lock_exclusive(&store->gc_pool_ctx, obj_names[index]);
+ if (ret == -EBUSY) { /* already locked by another gc processor */
+ ldpp_dout(this, 10) << "RGWGC::process failed to acquire lock on " <<
+ obj_names[index] << dendl;
+ return 0;
+ }
+ if (ret < 0)
+ return ret;
+
+ string marker;
+ string next_marker;
+ bool truncated;
+ IoCtx *ctx = new IoCtx;
+ do {
+ int max = 100;
+ std::list<cls_rgw_gc_obj_info> entries;
+
+ ret = cls_rgw_gc_list(store->gc_pool_ctx, obj_names[index], marker, max,
+ expired_only, entries, &truncated, next_marker);
+ ldpp_dout(this, 20) <<
+ "RGWGC::process cls_rgw_gc_list returned with returned:" << ret <<
+ ", entries.size=" << entries.size() << ", truncated=" << truncated <<
+ ", next_marker='" << next_marker << "'" << dendl;
+
+ if (ret == -ENOENT) {
+ ret = 0;
+ goto done;
+ }
+ if (ret < 0)
+ goto done;
+
+ marker = next_marker;
+
+ string last_pool;
+ std::list<cls_rgw_gc_obj_info>::iterator iter;
+ for (iter = entries.begin(); iter != entries.end(); ++iter) {
+ cls_rgw_gc_obj_info& info = *iter;
+
+ ldpp_dout(this, 20) << "RGWGC::process iterating over entry tag='" <<
+ info.tag << "', time=" << info.time << ", chain.objs.size()=" <<
+ info.chain.objs.size() << dendl;
+
+ std::list<cls_rgw_obj>::iterator liter;
+ cls_rgw_obj_chain& chain = info.chain;
+
+ utime_t now = ceph_clock_now();
+ if (now >= end) {
+ goto done;
+ }
+
+ if (chain.objs.empty()) {
+ io_manager.schedule_tag_removal(index, info.tag);
+ } else {
+ io_manager.add_tag_io_size(index, info.tag, chain.objs.size());
+ for (liter = chain.objs.begin(); liter != chain.objs.end(); ++liter) {
+ cls_rgw_obj& obj = *liter;
+
+ if (obj.pool != last_pool) {
+ delete ctx;
+ ctx = new IoCtx;
+ ret = rgw_init_ioctx(store->get_rados_handle(), obj.pool, *ctx);
+ if (ret < 0) {
+ last_pool = "";
+ ldpp_dout(this, 0) << "ERROR: failed to create ioctx pool=" <<
+ obj.pool << dendl;
+ continue;
+ }
+ last_pool = obj.pool;
+ }
+
+ ctx->locator_set_key(obj.loc);
+
+ const string& oid = obj.key.name; /* just stored raw oid there */
+
+ ldpp_dout(this, 5) << "RGWGC::process removing " << obj.pool <<
+ ":" << obj.key.name << dendl;
+ ObjectWriteOperation op;
+ cls_refcount_put(op, info.tag, true);
+
+ ret = io_manager.schedule_io(ctx, oid, &op, index, info.tag);
+ if (ret < 0) {
+ ldpp_dout(this, 0) <<
+ "WARNING: failed to schedule deletion for oid=" << oid << dendl;
+ }
+ if (going_down()) {
+ // leave early, even if tag isn't removed, it's ok since it
+ // will be picked up next time around
+ goto done;
+ }
+ } // chains loop
+ } // else -- chains not empty
+ } // entries loop
+ } while (truncated);
+
+done:
+ /* we don't drain here, because if we're going down we don't want to
+ * hold the system if backend is unresponsive
+ */
+ l.unlock(&store->gc_pool_ctx, obj_names[index]);
+ delete ctx;
+
+ return 0;
+}
+
+int RGWGC::process(bool expired_only)
+{
+ int max_secs = cct->_conf->rgw_gc_processor_max_time;
+
+ const int start = ceph::util::generate_random_number(0, max_objs - 1);
+
+ RGWGCIOManager io_manager(this, store->ctx(), this);
+
+ for (int i = 0; i < max_objs; i++) {
+ int index = (i + start) % max_objs;
+ int ret = process(index, max_secs, expired_only, io_manager);
+ if (ret < 0)
+ return ret;
+ }
+ if (!going_down()) {
+ io_manager.drain();
+ }
+
+ return 0;
+}
+
+bool RGWGC::going_down()
+{
+ return down_flag;
+}
+
+void RGWGC::start_processor()
+{
+ worker = new GCWorker(this, cct, this);
+ worker->create("rgw_gc");
+}
+
+void RGWGC::stop_processor()
+{
+ down_flag = true;
+ if (worker) {
+ worker->stop();
+ worker->join();
+ }
+ delete worker;
+ worker = NULL;
+}
+
+unsigned RGWGC::get_subsys() const
+{
+ return dout_subsys;
+}
+
+std::ostream& RGWGC::gen_prefix(std::ostream& out) const
+{
+ return out << "garbage collection: ";
+}
+
+void *RGWGC::GCWorker::entry() {
+ do {
+ utime_t start = ceph_clock_now();
+ ldpp_dout(dpp, 2) << "garbage collection: start" << dendl;
+ int r = gc->process(true);
+ if (r < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: garbage collection process() returned error r=" << r << dendl;
+ }
+ ldpp_dout(dpp, 2) << "garbage collection: stop" << dendl;
+
+ if (gc->going_down())
+ break;
+
+ utime_t end = ceph_clock_now();
+ end -= start;
+ int secs = cct->_conf->rgw_gc_processor_period;
+
+ if (secs <= end.sec())
+ continue; // next round
+
+ secs -= end.sec();
+
+ lock.Lock();
+ cond.WaitInterval(lock, utime_t(secs, 0));
+ lock.Unlock();
+ } while (!gc->going_down());
+
+ return NULL;
+}
+
+void RGWGC::GCWorker::stop()
+{
+ Mutex::Locker l(lock);
+ cond.Signal();
+}
diff --git a/src/rgw/rgw_gc.h b/src/rgw/rgw_gc.h
new file mode 100644
index 00000000..f8f24e97
--- /dev/null
+++ b/src/rgw/rgw_gc.h
@@ -0,0 +1,77 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_GC_H
+#define CEPH_RGW_GC_H
+
+
+#include "include/types.h"
+#include "include/rados/librados.hpp"
+#include "common/Mutex.h"
+#include "common/Cond.h"
+#include "common/Thread.h"
+#include "rgw_common.h"
+#include "rgw_rados.h"
+#include "cls/rgw/cls_rgw_types.h"
+
+#include <atomic>
+
+class RGWGCIOManager;
+
+class RGWGC : public DoutPrefixProvider {
+ CephContext *cct;
+ RGWRados *store;
+ int max_objs;
+ string *obj_names;
+ std::atomic<bool> down_flag = { false };
+
+ int tag_index(const string& tag);
+
+ class GCWorker : public Thread {
+ const DoutPrefixProvider *dpp;
+ CephContext *cct;
+ RGWGC *gc;
+ Mutex lock;
+ Cond cond;
+
+ public:
+ GCWorker(const DoutPrefixProvider *_dpp, CephContext *_cct, RGWGC *_gc) : dpp(_dpp), cct(_cct), gc(_gc), lock("GCWorker") {}
+ void *entry() override;
+ void stop();
+ };
+
+ GCWorker *worker;
+public:
+ RGWGC() : cct(NULL), store(NULL), max_objs(0), obj_names(NULL), worker(NULL) {}
+ ~RGWGC() {
+ stop_processor();
+ finalize();
+ }
+
+ void add_chain(librados::ObjectWriteOperation& op, cls_rgw_obj_chain& chain, const string& tag);
+ int send_chain(cls_rgw_obj_chain& chain, const string& tag, bool sync);
+ int defer_chain(const string& tag, bool sync);
+ int remove(int index, const std::vector<string>& tags, librados::AioCompletion **pc);
+
+ void initialize(CephContext *_cct, RGWRados *_store);
+ void finalize();
+
+ int list(int *index, string& marker, uint32_t max, bool expired_only, std::list<cls_rgw_gc_obj_info>& result, bool *truncated);
+ void list_init(int *index) { *index = 0; }
+ int process(int index, int process_max_secs, bool expired_only,
+ RGWGCIOManager& io_manager);
+ int process(bool expired_only);
+
+ bool going_down();
+ void start_processor();
+ void stop_processor();
+
+ CephContext *get_cct() const override { return store->ctx(); }
+ unsigned get_subsys() const;
+
+ std::ostream& gen_prefix(std::ostream& out) const;
+
+};
+
+
+#endif
diff --git a/src/rgw/rgw_http_client.cc b/src/rgw/rgw_http_client.cc
new file mode 100644
index 00000000..18f7a4ad
--- /dev/null
+++ b/src/rgw/rgw_http_client.cc
@@ -0,0 +1,1255 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "include/compat.h"
+#include "common/errno.h"
+
+#include <boost/utility/string_ref.hpp>
+
+#include <curl/curl.h>
+#include <curl/easy.h>
+#include <curl/multi.h>
+
+#include "rgw_common.h"
+#include "rgw_http_client.h"
+#include "rgw_http_errors.h"
+#include "common/async/completion.h"
+#include "common/RefCountedObj.h"
+
+#include "rgw_coroutine.h"
+
+#include <atomic>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+RGWHTTPManager *rgw_http_manager;
+
+struct RGWCurlHandle;
+
+static void do_curl_easy_cleanup(RGWCurlHandle *curl_handle);
+
+struct rgw_http_req_data : public RefCountedObject {
+ RGWCurlHandle *curl_handle{nullptr};
+ curl_slist *h{nullptr};
+ uint64_t id;
+ int ret{0};
+ std::atomic<bool> done = { false };
+ RGWHTTPClient *client{nullptr};
+ rgw_io_id control_io_id;
+ void *user_info{nullptr};
+ bool registered{false};
+ RGWHTTPManager *mgr{nullptr};
+ char error_buf[CURL_ERROR_SIZE];
+ bool write_paused{false};
+ bool read_paused{false};
+
+ Mutex lock;
+ Cond cond;
+
+ using Signature = void(boost::system::error_code);
+ using Completion = ceph::async::Completion<Signature>;
+ std::unique_ptr<Completion> completion;
+
+ rgw_http_req_data() : id(-1), lock("rgw_http_req_data::lock") {
+ // FIPS zeroization audit 20191115: this memset is not security related.
+ memset(error_buf, 0, sizeof(error_buf));
+ }
+
+ template <typename ExecutionContext, typename CompletionToken>
+ auto async_wait(ExecutionContext& ctx, CompletionToken&& token) {
+ boost::asio::async_completion<CompletionToken, Signature> init(token);
+ auto& handler = init.completion_handler;
+ {
+ std::unique_lock l{lock};
+ completion = Completion::create(ctx.get_executor(), std::move(handler));
+ }
+ return init.result.get();
+ }
+ int wait(optional_yield y) {
+ if (done) {
+ return ret;
+ }
+#ifdef HAVE_BOOST_CONTEXT
+ if (y) {
+ auto& context = y.get_io_context();
+ auto& yield = y.get_yield_context();
+ boost::system::error_code ec;
+ async_wait(context, yield[ec]);
+ return -ec.value();
+ }
+#endif
+ Mutex::Locker l(lock);
+ cond.Wait(lock);
+ return ret;
+ }
+
+ void set_state(int bitmask);
+
+ void finish(int r, long http_status = -1) {
+ Mutex::Locker l(lock);
+ if (http_status != -1) {
+ if (client) {
+ client->set_http_status(http_status);
+ }
+ }
+ ret = r;
+ if (curl_handle)
+ do_curl_easy_cleanup(curl_handle);
+
+ if (h)
+ curl_slist_free_all(h);
+
+ curl_handle = NULL;
+ h = NULL;
+ done = true;
+ if (completion) {
+ boost::system::error_code ec(-ret, boost::system::system_category());
+ Completion::post(std::move(completion), ec);
+ } else {
+ cond.Signal();
+ }
+ }
+
+ bool is_done() {
+ return done;
+ }
+
+ int get_retcode() {
+ Mutex::Locker l(lock);
+ return ret;
+ }
+
+ RGWHTTPManager *get_manager() {
+ Mutex::Locker l(lock);
+ return mgr;
+ }
+
+ CURL *get_easy_handle() const;
+};
+
+struct RGWCurlHandle {
+ int uses;
+ mono_time lastuse;
+ CURL* h;
+
+ explicit RGWCurlHandle(CURL* h) : uses(0), h(h) {};
+ CURL* operator*() {
+ return this->h;
+ }
+};
+
+void rgw_http_req_data::set_state(int bitmask) {
+ /* no need to lock here, moreover curl_easy_pause() might trigger
+ * the data receive callback :/
+ */
+ CURLcode rc = curl_easy_pause(**curl_handle, bitmask);
+ if (rc != CURLE_OK) {
+ dout(0) << "ERROR: curl_easy_pause() returned rc=" << rc << dendl;
+ }
+}
+
+#define MAXIDLE 5
+class RGWCurlHandles : public Thread {
+public:
+ Mutex cleaner_lock;
+ std::vector<RGWCurlHandle*>saved_curl;
+ int cleaner_shutdown;
+ Cond cleaner_cond;
+
+ RGWCurlHandles() :
+ cleaner_lock{"RGWCurlHandles::cleaner_lock"},
+ cleaner_shutdown{0} {
+ }
+
+ RGWCurlHandle* get_curl_handle();
+ void release_curl_handle_now(RGWCurlHandle* curl);
+ void release_curl_handle(RGWCurlHandle* curl);
+ void flush_curl_handles();
+ void* entry();
+ void stop();
+};
+
+RGWCurlHandle* RGWCurlHandles::get_curl_handle() {
+ RGWCurlHandle* curl = 0;
+ CURL* h;
+ {
+ Mutex::Locker lock(cleaner_lock);
+ if (!saved_curl.empty()) {
+ curl = *saved_curl.begin();
+ saved_curl.erase(saved_curl.begin());
+ }
+ }
+ if (curl) {
+ } else if ((h = curl_easy_init())) {
+ curl = new RGWCurlHandle{h};
+ } else {
+ // curl = 0;
+ }
+ return curl;
+}
+
+void RGWCurlHandles::release_curl_handle_now(RGWCurlHandle* curl)
+{
+ curl_easy_cleanup(**curl);
+ delete curl;
+}
+
+void RGWCurlHandles::release_curl_handle(RGWCurlHandle* curl)
+{
+ if (cleaner_shutdown) {
+ release_curl_handle_now(curl);
+ } else {
+ curl_easy_reset(**curl);
+ Mutex::Locker lock(cleaner_lock);
+ curl->lastuse = mono_clock::now();
+ saved_curl.insert(saved_curl.begin(), 1, curl);
+ }
+}
+
+void* RGWCurlHandles::entry()
+{
+ RGWCurlHandle* curl;
+ Mutex::Locker lock(cleaner_lock);
+
+ for (;;) {
+ if (cleaner_shutdown) {
+ if (saved_curl.empty())
+ break;
+ } else {
+ utime_t release = ceph_clock_now() + utime_t(MAXIDLE,0);
+ cleaner_cond.WaitUntil(cleaner_lock, release);
+ }
+ mono_time now = mono_clock::now();
+ while (!saved_curl.empty()) {
+ auto cend = saved_curl.end();
+ --cend;
+ curl = *cend;
+ if (!cleaner_shutdown && now - curl->lastuse < std::chrono::seconds(MAXIDLE))
+ break;
+ saved_curl.erase(cend);
+ release_curl_handle_now(curl);
+ }
+ }
+ return nullptr;
+}
+
+void RGWCurlHandles::stop()
+{
+ Mutex::Locker lock(cleaner_lock);
+ cleaner_shutdown = 1;
+ cleaner_cond.Signal();
+}
+
+void RGWCurlHandles::flush_curl_handles()
+{
+ stop();
+ join();
+ if (!saved_curl.empty()) {
+ dout(0) << "ERROR: " << __func__ << " failed final cleanup" << dendl;
+ }
+ saved_curl.shrink_to_fit();
+}
+
+CURL *rgw_http_req_data::get_easy_handle() const
+{
+ return **curl_handle;
+}
+
+static RGWCurlHandles *handles;
+
+static RGWCurlHandle *do_curl_easy_init()
+{
+ return handles->get_curl_handle();
+}
+
+static void do_curl_easy_cleanup(RGWCurlHandle *curl_handle)
+{
+ handles->release_curl_handle(curl_handle);
+}
+
+// XXX make this part of the token cache? (but that's swift-only;
+// and this especially needs to integrates with s3...)
+
+void rgw_setup_saved_curl_handles()
+{
+ handles = new RGWCurlHandles();
+ handles->create("rgw_curl");
+}
+
+void rgw_release_all_curl_handles()
+{
+ handles->flush_curl_handles();
+ delete handles;
+}
+
+void RGWIOProvider::assign_io(RGWIOIDProvider& io_id_provider, int io_type)
+{
+ if (id == 0) {
+ id = io_id_provider.get_next();
+ }
+}
+
+/*
+ * the following set of callbacks will be called either on RGWHTTPManager::process(),
+ * or via the RGWHTTPManager async processing.
+ */
+size_t RGWHTTPClient::receive_http_header(void * const ptr,
+ const size_t size,
+ const size_t nmemb,
+ void * const _info)
+{
+ rgw_http_req_data *req_data = static_cast<rgw_http_req_data *>(_info);
+ size_t len = size * nmemb;
+
+ Mutex::Locker l(req_data->lock);
+
+ if (!req_data->registered) {
+ return len;
+ }
+
+ int ret = req_data->client->receive_header(ptr, size * nmemb);
+ if (ret < 0) {
+ dout(0) << "WARNING: client->receive_header() returned ret=" << ret << dendl;
+ }
+
+ return len;
+}
+
+size_t RGWHTTPClient::receive_http_data(void * const ptr,
+ const size_t size,
+ const size_t nmemb,
+ void * const _info)
+{
+ rgw_http_req_data *req_data = static_cast<rgw_http_req_data *>(_info);
+ size_t len = size * nmemb;
+
+ bool pause = false;
+
+ RGWHTTPClient *client;
+
+ {
+ Mutex::Locker l(req_data->lock);
+ if (!req_data->registered) {
+ return len;
+ }
+
+ client = req_data->client;
+ }
+
+ size_t& skip_bytes = client->receive_pause_skip;
+
+ if (skip_bytes >= len) {
+ skip_bytes -= len;
+ return len;
+ }
+
+ int ret = client->receive_data((char *)ptr + skip_bytes, len - skip_bytes, &pause);
+ if (ret < 0) {
+ dout(0) << "WARNING: client->receive_data() returned ret=" << ret << dendl;
+ }
+
+ if (pause) {
+ dout(20) << "RGWHTTPClient::receive_http_data(): pause" << dendl;
+ skip_bytes = len;
+ Mutex::Locker l(req_data->lock);
+ req_data->read_paused = true;
+ return CURL_WRITEFUNC_PAUSE;
+ }
+
+ skip_bytes = 0;
+
+ return len;
+}
+
+size_t RGWHTTPClient::send_http_data(void * const ptr,
+ const size_t size,
+ const size_t nmemb,
+ void * const _info)
+{
+ rgw_http_req_data *req_data = static_cast<rgw_http_req_data *>(_info);
+
+ RGWHTTPClient *client;
+
+ {
+ Mutex::Locker l(req_data->lock);
+
+ if (!req_data->registered) {
+ return 0;
+ }
+
+ client = req_data->client;
+ }
+
+ bool pause = false;
+
+ int ret = client->send_data(ptr, size * nmemb, &pause);
+ if (ret < 0) {
+ dout(0) << "WARNING: client->receive_data() returned ret=" << ret << dendl;
+ }
+
+ if (ret == 0 &&
+ pause) {
+ Mutex::Locker l(req_data->lock);
+ req_data->write_paused = true;
+ return CURL_READFUNC_PAUSE;
+ }
+
+ return ret;
+}
+
+Mutex& RGWHTTPClient::get_req_lock()
+{
+ return req_data->lock;
+}
+
+void RGWHTTPClient::_set_write_paused(bool pause)
+{
+ ceph_assert(req_data->lock.is_locked());
+
+ RGWHTTPManager *mgr = req_data->mgr;
+ if (pause == req_data->write_paused) {
+ return;
+ }
+ if (pause) {
+ mgr->set_request_state(this, SET_WRITE_PAUSED);
+ } else {
+ mgr->set_request_state(this, SET_WRITE_RESUME);
+ }
+}
+
+void RGWHTTPClient::_set_read_paused(bool pause)
+{
+ ceph_assert(req_data->lock.is_locked());
+
+ RGWHTTPManager *mgr = req_data->mgr;
+ if (pause == req_data->read_paused) {
+ return;
+ }
+ if (pause) {
+ mgr->set_request_state(this, SET_READ_PAUSED);
+ } else {
+ mgr->set_request_state(this, SET_READ_RESUME);
+ }
+}
+
+static curl_slist *headers_to_slist(param_vec_t& headers)
+{
+ curl_slist *h = NULL;
+
+ param_vec_t::iterator iter;
+ for (iter = headers.begin(); iter != headers.end(); ++iter) {
+ pair<string, string>& p = *iter;
+ string val = p.first;
+
+ if (strncmp(val.c_str(), "HTTP_", 5) == 0) {
+ val = val.substr(5);
+ }
+
+ /* we need to convert all underscores into dashes as some web servers forbid them
+ * in the http header field names
+ */
+ for (size_t i = 0; i < val.size(); i++) {
+ if (val[i] == '_') {
+ val[i] = '-';
+ }
+ }
+
+ val = camelcase_dash_http_attr(val);
+
+ // curl won't send headers with empty values unless it ends with a ; instead
+ if (p.second.empty()) {
+ val.append(1, ';');
+ } else {
+ val.append(": ");
+ val.append(p.second);
+ }
+ h = curl_slist_append(h, val.c_str());
+ }
+
+ return h;
+}
+
+static bool is_upload_request(const string& method)
+{
+ return method == "POST" || method == "PUT";
+}
+
+/*
+ * process a single simple one off request
+ */
+int RGWHTTPClient::process(optional_yield y)
+{
+ return RGWHTTP::process(this, y);
+}
+
+string RGWHTTPClient::to_str()
+{
+ string method_str = (method.empty() ? "<no-method>" : method);
+ string url_str = (url.empty() ? "<no-url>" : url);
+ return method_str + " " + url_str;
+}
+
+int RGWHTTPClient::get_req_retcode()
+{
+ if (!req_data) {
+ return -EINVAL;
+ }
+
+ return req_data->get_retcode();
+}
+
+/*
+ * init request, will be used later with RGWHTTPManager
+ */
+int RGWHTTPClient::init_request(rgw_http_req_data *_req_data)
+{
+ ceph_assert(!req_data);
+ _req_data->get();
+ req_data = _req_data;
+
+ req_data->curl_handle = do_curl_easy_init();
+
+ CURL *easy_handle = req_data->get_easy_handle();
+
+ dout(20) << "sending request to " << url << dendl;
+
+ curl_slist *h = headers_to_slist(headers);
+
+ req_data->h = h;
+
+ curl_easy_setopt(easy_handle, CURLOPT_CUSTOMREQUEST, method.c_str());
+ curl_easy_setopt(easy_handle, CURLOPT_URL, url.c_str());
+ curl_easy_setopt(easy_handle, CURLOPT_NOPROGRESS, 1L);
+ curl_easy_setopt(easy_handle, CURLOPT_NOSIGNAL, 1L);
+ curl_easy_setopt(easy_handle, CURLOPT_HEADERFUNCTION, receive_http_header);
+ curl_easy_setopt(easy_handle, CURLOPT_WRITEHEADER, (void *)req_data);
+ curl_easy_setopt(easy_handle, CURLOPT_WRITEFUNCTION, receive_http_data);
+ curl_easy_setopt(easy_handle, CURLOPT_WRITEDATA, (void *)req_data);
+ curl_easy_setopt(easy_handle, CURLOPT_ERRORBUFFER, (void *)req_data->error_buf);
+ curl_easy_setopt(easy_handle, CURLOPT_LOW_SPEED_TIME, cct->_conf->rgw_curl_low_speed_time);
+ curl_easy_setopt(easy_handle, CURLOPT_LOW_SPEED_LIMIT, cct->_conf->rgw_curl_low_speed_limit);
+ if (h) {
+ curl_easy_setopt(easy_handle, CURLOPT_HTTPHEADER, (void *)h);
+ }
+ curl_easy_setopt(easy_handle, CURLOPT_READFUNCTION, send_http_data);
+ curl_easy_setopt(easy_handle, CURLOPT_READDATA, (void *)req_data);
+ if (send_data_hint || is_upload_request(method)) {
+ curl_easy_setopt(easy_handle, CURLOPT_UPLOAD, 1L);
+ }
+ if (has_send_len) {
+ curl_easy_setopt(easy_handle, CURLOPT_INFILESIZE, (void *)send_len);
+ }
+ if (!verify_ssl) {
+ curl_easy_setopt(easy_handle, CURLOPT_SSL_VERIFYPEER, 0L);
+ curl_easy_setopt(easy_handle, CURLOPT_SSL_VERIFYHOST, 0L);
+ dout(20) << "ssl verification is set to off" << dendl;
+ }
+ curl_easy_setopt(easy_handle, CURLOPT_PRIVATE, (void *)req_data);
+
+ return 0;
+}
+
+bool RGWHTTPClient::is_done()
+{
+ return req_data->is_done();
+}
+
+/*
+ * wait for async request to complete
+ */
+int RGWHTTPClient::wait(optional_yield y)
+{
+ return req_data->wait(y);
+}
+
+void RGWHTTPClient::cancel()
+{
+ if (req_data) {
+ RGWHTTPManager *http_manager = req_data->mgr;
+ if (http_manager) {
+ http_manager->remove_request(this);
+ }
+ }
+}
+
+RGWHTTPClient::~RGWHTTPClient()
+{
+ cancel();
+ if (req_data) {
+ req_data->put();
+ }
+}
+
+
+int RGWHTTPHeadersCollector::receive_header(void * const ptr, const size_t len)
+{
+ const boost::string_ref header_line(static_cast<const char *>(ptr), len);
+
+ /* We're tokening the line that way due to backward compatibility. */
+ const size_t sep_loc = header_line.find_first_of(" \t:");
+
+ if (boost::string_ref::npos == sep_loc) {
+ /* Wrongly formatted header? Just skip it. */
+ return 0;
+ }
+
+ header_name_t name(header_line.substr(0, sep_loc));
+ if (0 == relevant_headers.count(name)) {
+ /* Not interested in this particular header. */
+ return 0;
+ }
+
+ const auto value_part = header_line.substr(sep_loc + 1);
+
+ /* Skip spaces and tabs after the separator. */
+ const size_t val_loc_s = value_part.find_first_not_of(' ');
+ const size_t val_loc_e = value_part.find_first_of("\r\n");
+
+ if (boost::string_ref::npos == val_loc_s ||
+ boost::string_ref::npos == val_loc_e) {
+ /* Empty value case. */
+ found_headers.emplace(name, header_value_t());
+ } else {
+ found_headers.emplace(name, header_value_t(
+ value_part.substr(val_loc_s, val_loc_e - val_loc_s)));
+ }
+
+ return 0;
+}
+
+int RGWHTTPTransceiver::send_data(void* ptr, size_t len, bool* pause)
+{
+ int length_to_copy = 0;
+ if (post_data_index < post_data.length()) {
+ length_to_copy = min(post_data.length() - post_data_index, len);
+ memcpy(ptr, post_data.data() + post_data_index, length_to_copy);
+ post_data_index += length_to_copy;
+ }
+ return length_to_copy;
+}
+
+
+static int clear_signal(int fd)
+{
+ // since we're in non-blocking mode, we can try to read a lot more than
+ // one signal from signal_thread() to avoid later wakeups. non-blocking reads
+ // are also required to support the curl_multi_wait bug workaround
+ std::array<char, 256> buf;
+ int ret = ::read(fd, (void *)buf.data(), buf.size());
+ if (ret < 0) {
+ ret = -errno;
+ return ret == -EAGAIN ? 0 : ret; // clear EAGAIN
+ }
+ return 0;
+}
+
+#if HAVE_CURL_MULTI_WAIT
+
+static std::once_flag detect_flag;
+static bool curl_multi_wait_bug_present = false;
+
+static int detect_curl_multi_wait_bug(CephContext *cct, CURLM *handle,
+ int write_fd, int read_fd)
+{
+ int ret = 0;
+
+ // write to write_fd so that read_fd becomes readable
+ uint32_t buf = 0;
+ ret = ::write(write_fd, &buf, sizeof(buf));
+ if (ret < 0) {
+ ret = -errno;
+ ldout(cct, 0) << "ERROR: " << __func__ << "(): write() returned " << ret << dendl;
+ return ret;
+ }
+
+ // pass read_fd in extra_fds for curl_multi_wait()
+ int num_fds;
+ struct curl_waitfd wait_fd;
+
+ wait_fd.fd = read_fd;
+ wait_fd.events = CURL_WAIT_POLLIN;
+ wait_fd.revents = 0;
+
+ ret = curl_multi_wait(handle, &wait_fd, 1, 0, &num_fds);
+ if (ret != CURLM_OK) {
+ ldout(cct, 0) << "ERROR: curl_multi_wait() returned " << ret << dendl;
+ return -EIO;
+ }
+
+ // curl_multi_wait should flag revents when extra_fd is readable. if it
+ // doesn't, the bug is present and we can't rely on revents
+ if (wait_fd.revents == 0) {
+ curl_multi_wait_bug_present = true;
+ ldout(cct, 0) << "WARNING: detected a version of libcurl which contains a "
+ "bug in curl_multi_wait(). enabling a workaround that may degrade "
+ "performance slightly." << dendl;
+ }
+
+ return clear_signal(read_fd);
+}
+
+static bool is_signaled(const curl_waitfd& wait_fd)
+{
+ if (wait_fd.fd < 0) {
+ // no fd to signal
+ return false;
+ }
+
+ if (curl_multi_wait_bug_present) {
+ // we can't rely on revents, so we always return true if a wait_fd is given.
+ // this means we'll be trying a non-blocking read on this fd every time that
+ // curl_multi_wait() wakes up
+ return true;
+ }
+
+ return wait_fd.revents > 0;
+}
+
+static int do_curl_wait(CephContext *cct, CURLM *handle, int signal_fd)
+{
+ int num_fds;
+ struct curl_waitfd wait_fd;
+
+ wait_fd.fd = signal_fd;
+ wait_fd.events = CURL_WAIT_POLLIN;
+ wait_fd.revents = 0;
+
+ int ret = curl_multi_wait(handle, &wait_fd, 1, cct->_conf->rgw_curl_wait_timeout_ms, &num_fds);
+ if (ret) {
+ ldout(cct, 0) << "ERROR: curl_multi_wait() returned " << ret << dendl;
+ return -EIO;
+ }
+
+ if (is_signaled(wait_fd)) {
+ ret = clear_signal(signal_fd);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: " << __func__ << "(): read() returned " << ret << dendl;
+ return ret;
+ }
+ }
+ return 0;
+}
+
+#else
+
+static int do_curl_wait(CephContext *cct, CURLM *handle, int signal_fd)
+{
+ fd_set fdread;
+ fd_set fdwrite;
+ fd_set fdexcep;
+ int maxfd = -1;
+
+ FD_ZERO(&fdread);
+ FD_ZERO(&fdwrite);
+ FD_ZERO(&fdexcep);
+
+ /* get file descriptors from the transfers */
+ int ret = curl_multi_fdset(handle, &fdread, &fdwrite, &fdexcep, &maxfd);
+ if (ret) {
+ ldout(cct, 0) << "ERROR: curl_multi_fdset returned " << ret << dendl;
+ return -EIO;
+ }
+
+ if (signal_fd > 0) {
+ FD_SET(signal_fd, &fdread);
+ if (signal_fd >= maxfd) {
+ maxfd = signal_fd + 1;
+ }
+ }
+
+ /* forcing a strict timeout, as the returned fdsets might not reference all fds we wait on */
+ uint64_t to = cct->_conf->rgw_curl_wait_timeout_ms;
+#define RGW_CURL_TIMEOUT 1000
+ if (!to)
+ to = RGW_CURL_TIMEOUT;
+ struct timeval timeout;
+ timeout.tv_sec = to / 1000;
+ timeout.tv_usec = to % 1000;
+
+ ret = select(maxfd+1, &fdread, &fdwrite, &fdexcep, &timeout);
+ if (ret < 0) {
+ ret = -errno;
+ ldout(cct, 0) << "ERROR: select returned " << ret << dendl;
+ return ret;
+ }
+
+ if (signal_fd > 0 && FD_ISSET(signal_fd, &fdread)) {
+ ret = clear_signal(signal_fd);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: " << __func__ << "(): read() returned " << ret << dendl;
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+#endif
+
+void *RGWHTTPManager::ReqsThread::entry()
+{
+ manager->reqs_thread_entry();
+ return NULL;
+}
+
+/*
+ * RGWHTTPManager has two modes of operation: threaded and non-threaded.
+ */
+RGWHTTPManager::RGWHTTPManager(CephContext *_cct, RGWCompletionManager *_cm) : cct(_cct),
+ completion_mgr(_cm), is_started(false),
+ reqs_lock("RGWHTTPManager::reqs_lock"), num_reqs(0), max_threaded_req(0),
+ reqs_thread(NULL)
+{
+ multi_handle = (void *)curl_multi_init();
+ thread_pipe[0] = -1;
+ thread_pipe[1] = -1;
+}
+
+RGWHTTPManager::~RGWHTTPManager() {
+ stop();
+ if (multi_handle)
+ curl_multi_cleanup((CURLM *)multi_handle);
+}
+
+void RGWHTTPManager::register_request(rgw_http_req_data *req_data)
+{
+ RWLock::WLocker rl(reqs_lock);
+ req_data->id = num_reqs;
+ req_data->registered = true;
+ reqs[num_reqs] = req_data;
+ num_reqs++;
+ ldout(cct, 20) << __func__ << " mgr=" << this << " req_data->id=" << req_data->id << ", curl_handle=" << req_data->curl_handle << dendl;
+}
+
+bool RGWHTTPManager::unregister_request(rgw_http_req_data *req_data)
+{
+ RWLock::WLocker rl(reqs_lock);
+ if (!req_data->registered) {
+ return false;
+ }
+ req_data->get();
+ req_data->registered = false;
+ unregistered_reqs.push_back(req_data);
+ ldout(cct, 20) << __func__ << " mgr=" << this << " req_data->id=" << req_data->id << ", curl_handle=" << req_data->curl_handle << dendl;
+ return true;
+}
+
+void RGWHTTPManager::complete_request(rgw_http_req_data *req_data)
+{
+ RWLock::WLocker rl(reqs_lock);
+ _complete_request(req_data);
+}
+
+void RGWHTTPManager::_complete_request(rgw_http_req_data *req_data)
+{
+ map<uint64_t, rgw_http_req_data *>::iterator iter = reqs.find(req_data->id);
+ if (iter != reqs.end()) {
+ reqs.erase(iter);
+ }
+ {
+ Mutex::Locker l(req_data->lock);
+ req_data->mgr = nullptr;
+ }
+ if (completion_mgr) {
+ completion_mgr->complete(NULL, req_data->control_io_id, req_data->user_info);
+ }
+
+ req_data->put();
+}
+
+void RGWHTTPManager::finish_request(rgw_http_req_data *req_data, int ret, long http_status)
+{
+ req_data->finish(ret, http_status);
+ complete_request(req_data);
+}
+
+void RGWHTTPManager::_finish_request(rgw_http_req_data *req_data, int ret)
+{
+ req_data->finish(ret);
+ _complete_request(req_data);
+}
+
+void RGWHTTPManager::_set_req_state(set_state& ss)
+{
+ ss.req->set_state(ss.bitmask);
+}
+/*
+ * hook request to the curl multi handle
+ */
+int RGWHTTPManager::link_request(rgw_http_req_data *req_data)
+{
+ ldout(cct, 20) << __func__ << " req_data=" << req_data << " req_data->id=" << req_data->id << ", curl_handle=" << req_data->curl_handle << dendl;
+ CURLMcode mstatus = curl_multi_add_handle((CURLM *)multi_handle, req_data->get_easy_handle());
+ if (mstatus) {
+ dout(0) << "ERROR: failed on curl_multi_add_handle, status=" << mstatus << dendl;
+ return -EIO;
+ }
+ return 0;
+}
+
+/*
+ * unhook request from the curl multi handle, and finish request if it wasn't finished yet as
+ * there will be no more processing on this request
+ */
+void RGWHTTPManager::_unlink_request(rgw_http_req_data *req_data)
+{
+ if (req_data->curl_handle) {
+ curl_multi_remove_handle((CURLM *)multi_handle, req_data->get_easy_handle());
+ }
+ if (!req_data->is_done()) {
+ _finish_request(req_data, -ECANCELED);
+ }
+}
+
+void RGWHTTPManager::unlink_request(rgw_http_req_data *req_data)
+{
+ RWLock::WLocker wl(reqs_lock);
+ _unlink_request(req_data);
+}
+
+void RGWHTTPManager::manage_pending_requests()
+{
+ reqs_lock.get_read();
+ if (max_threaded_req == num_reqs &&
+ unregistered_reqs.empty() &&
+ reqs_change_state.empty()) {
+ reqs_lock.unlock();
+ return;
+ }
+ reqs_lock.unlock();
+
+ RWLock::WLocker wl(reqs_lock);
+
+ if (!reqs_change_state.empty()) {
+ for (auto siter : reqs_change_state) {
+ _set_req_state(siter);
+ }
+ reqs_change_state.clear();
+ }
+
+ if (!unregistered_reqs.empty()) {
+ for (auto& r : unregistered_reqs) {
+ _unlink_request(r);
+ r->put();
+ }
+
+ unregistered_reqs.clear();
+ }
+
+ map<uint64_t, rgw_http_req_data *>::iterator iter = reqs.find(max_threaded_req);
+
+ list<std::pair<rgw_http_req_data *, int> > remove_reqs;
+
+ for (; iter != reqs.end(); ++iter) {
+ rgw_http_req_data *req_data = iter->second;
+ int r = link_request(req_data);
+ if (r < 0) {
+ ldout(cct, 0) << "ERROR: failed to link http request" << dendl;
+ remove_reqs.push_back(std::make_pair(iter->second, r));
+ } else {
+ max_threaded_req = iter->first + 1;
+ }
+ }
+
+ for (auto piter : remove_reqs) {
+ rgw_http_req_data *req_data = piter.first;
+ int r = piter.second;
+
+ _finish_request(req_data, r);
+ }
+}
+
+int RGWHTTPManager::add_request(RGWHTTPClient *client)
+{
+ rgw_http_req_data *req_data = new rgw_http_req_data;
+
+ int ret = client->init_request(req_data);
+ if (ret < 0) {
+ req_data->put();
+ req_data = NULL;
+ return ret;
+ }
+
+ req_data->mgr = this;
+ req_data->client = client;
+ req_data->control_io_id = client->get_io_id(RGWHTTPClient::HTTPCLIENT_IO_CONTROL);
+ req_data->user_info = client->get_io_user_info();
+
+ register_request(req_data);
+
+ if (!is_started) {
+ ret = link_request(req_data);
+ if (ret < 0) {
+ req_data->put();
+ req_data = NULL;
+ }
+ return ret;
+ }
+ ret = signal_thread();
+ if (ret < 0) {
+ finish_request(req_data, ret);
+ }
+
+ return ret;
+}
+
+int RGWHTTPManager::remove_request(RGWHTTPClient *client)
+{
+ rgw_http_req_data *req_data = client->get_req_data();
+
+ if (!is_started) {
+ unlink_request(req_data);
+ return 0;
+ }
+ if (!unregister_request(req_data)) {
+ return 0;
+ }
+ int ret = signal_thread();
+ if (ret < 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWHTTPManager::set_request_state(RGWHTTPClient *client, RGWHTTPRequestSetState state)
+{
+ rgw_http_req_data *req_data = client->get_req_data();
+
+ ceph_assert(req_data->lock.is_locked());
+
+ /* can only do that if threaded */
+ if (!is_started) {
+ return -EINVAL;
+ }
+
+ bool suggested_wr_paused = req_data->write_paused;
+ bool suggested_rd_paused = req_data->read_paused;
+
+ switch (state) {
+ case SET_WRITE_PAUSED:
+ suggested_wr_paused = true;
+ break;
+ case SET_WRITE_RESUME:
+ suggested_wr_paused = false;
+ break;
+ case SET_READ_PAUSED:
+ suggested_rd_paused = true;
+ break;
+ case SET_READ_RESUME:
+ suggested_rd_paused = false;
+ break;
+ default:
+ /* shouldn't really be here */
+ return -EIO;
+ }
+ if (suggested_wr_paused == req_data->write_paused &&
+ suggested_rd_paused == req_data->read_paused) {
+ return 0;
+ }
+
+ req_data->write_paused = suggested_wr_paused;
+ req_data->read_paused = suggested_rd_paused;
+
+ int bitmask = CURLPAUSE_CONT;
+
+ if (req_data->write_paused) {
+ bitmask |= CURLPAUSE_SEND;
+ }
+
+ if (req_data->read_paused) {
+ bitmask |= CURLPAUSE_RECV;
+ }
+
+ reqs_change_state.push_back(set_state(req_data, bitmask));
+ int ret = signal_thread();
+ if (ret < 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWHTTPManager::start()
+{
+ if (pipe_cloexec(thread_pipe) < 0) {
+ int e = errno;
+ ldout(cct, 0) << "ERROR: pipe(): " << cpp_strerror(e) << dendl;
+ return -e;
+ }
+
+ // enable non-blocking reads
+ if (::fcntl(thread_pipe[0], F_SETFL, O_NONBLOCK) < 0) {
+ int e = errno;
+ ldout(cct, 0) << "ERROR: fcntl(): " << cpp_strerror(e) << dendl;
+ TEMP_FAILURE_RETRY(::close(thread_pipe[0]));
+ TEMP_FAILURE_RETRY(::close(thread_pipe[1]));
+ return -e;
+ }
+
+#ifdef HAVE_CURL_MULTI_WAIT
+ // on first initialization, use this pipe to detect whether we're using a
+ // buggy version of libcurl
+ std::call_once(detect_flag, detect_curl_multi_wait_bug, cct,
+ static_cast<CURLM*>(multi_handle),
+ thread_pipe[1], thread_pipe[0]);
+#endif
+
+ is_started = true;
+ reqs_thread = new ReqsThread(this);
+ reqs_thread->create("http_manager");
+ return 0;
+}
+
+void RGWHTTPManager::stop()
+{
+ if (is_stopped) {
+ return;
+ }
+
+ is_stopped = true;
+
+ if (is_started) {
+ going_down = true;
+ signal_thread();
+ reqs_thread->join();
+ delete reqs_thread;
+ TEMP_FAILURE_RETRY(::close(thread_pipe[1]));
+ TEMP_FAILURE_RETRY(::close(thread_pipe[0]));
+ }
+}
+
+int RGWHTTPManager::signal_thread()
+{
+ uint32_t buf = 0;
+ int ret = write(thread_pipe[1], (void *)&buf, sizeof(buf));
+ if (ret < 0) {
+ ret = -errno;
+ ldout(cct, 0) << "ERROR: " << __func__ << ": write() returned ret=" << ret << dendl;
+ return ret;
+ }
+ return 0;
+}
+
+void *RGWHTTPManager::reqs_thread_entry()
+{
+ int still_running;
+ int mstatus;
+
+ ldout(cct, 20) << __func__ << ": start" << dendl;
+
+ while (!going_down) {
+ int ret = do_curl_wait(cct, (CURLM *)multi_handle, thread_pipe[0]);
+ if (ret < 0) {
+ dout(0) << "ERROR: do_curl_wait() returned: " << ret << dendl;
+ return NULL;
+ }
+
+ manage_pending_requests();
+
+ mstatus = curl_multi_perform((CURLM *)multi_handle, &still_running);
+ switch (mstatus) {
+ case CURLM_OK:
+ case CURLM_CALL_MULTI_PERFORM:
+ break;
+ default:
+ dout(10) << "curl_multi_perform returned: " << mstatus << dendl;
+ break;
+ }
+ int msgs_left;
+ CURLMsg *msg;
+ while ((msg = curl_multi_info_read((CURLM *)multi_handle, &msgs_left))) {
+ if (msg->msg == CURLMSG_DONE) {
+ int result = msg->data.result;
+ CURL *e = msg->easy_handle;
+ rgw_http_req_data *req_data;
+ curl_easy_getinfo(e, CURLINFO_PRIVATE, (void **)&req_data);
+ curl_multi_remove_handle((CURLM *)multi_handle, e);
+
+ long http_status;
+ curl_easy_getinfo(e, CURLINFO_RESPONSE_CODE, (void **)&http_status);
+
+ int status = rgw_http_error_to_errno(http_status);
+ if (result != CURLE_OK && status == 0) {
+ dout(0) << "ERROR: curl error: " << curl_easy_strerror((CURLcode)result) << ", maybe network unstable" << dendl;
+ status = -EAGAIN;
+ }
+ int id = req_data->id;
+ finish_request(req_data, status, http_status);
+ switch (result) {
+ case CURLE_OK:
+ break;
+ case CURLE_OPERATION_TIMEDOUT:
+ dout(0) << "WARNING: curl operation timed out, network average transfer speed less than "
+ << cct->_conf->rgw_curl_low_speed_limit << " Bytes per second during " << cct->_conf->rgw_curl_low_speed_time << " seconds." << dendl;
+ default:
+ dout(20) << "ERROR: msg->data.result=" << result << " req_data->id=" << id << " http_status=" << http_status << dendl;
+ dout(20) << "ERROR: curl error: " << curl_easy_strerror((CURLcode)result) << dendl;
+ break;
+ }
+ }
+ }
+ }
+
+
+ RWLock::WLocker rl(reqs_lock);
+ for (auto r : unregistered_reqs) {
+ _unlink_request(r);
+ }
+
+ unregistered_reqs.clear();
+
+ auto all_reqs = std::move(reqs);
+ for (auto iter : all_reqs) {
+ _unlink_request(iter.second);
+ }
+
+ reqs.clear();
+
+ if (completion_mgr) {
+ completion_mgr->go_down();
+ }
+
+ return 0;
+}
+
+void rgw_http_client_init(CephContext *cct)
+{
+ curl_global_init(CURL_GLOBAL_ALL);
+ rgw_http_manager = new RGWHTTPManager(cct);
+ rgw_http_manager->start();
+}
+
+void rgw_http_client_cleanup()
+{
+ rgw_http_manager->stop();
+ delete rgw_http_manager;
+ curl_global_cleanup();
+}
+
+
+int RGWHTTP::send(RGWHTTPClient *req) {
+ if (!req) {
+ return 0;
+ }
+ int r = rgw_http_manager->add_request(req);
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+int RGWHTTP::process(RGWHTTPClient *req, optional_yield y) {
+ if (!req) {
+ return 0;
+ }
+ int r = send(req);
+ if (r < 0) {
+ return r;
+ }
+
+ return req->wait(y);
+}
+
diff --git a/src/rgw/rgw_http_client.h b/src/rgw/rgw_http_client.h
new file mode 100644
index 00000000..eabe8a85
--- /dev/null
+++ b/src/rgw/rgw_http_client.h
@@ -0,0 +1,370 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_HTTP_CLIENT_H
+#define CEPH_RGW_HTTP_CLIENT_H
+
+#include "common/async/yield_context.h"
+#include "common/RWLock.h"
+#include "common/Cond.h"
+#include "rgw_common.h"
+#include "rgw_string.h"
+
+#include <atomic>
+
+using param_pair_t = pair<string, string>;
+using param_vec_t = vector<param_pair_t>;
+
+void rgw_http_client_init(CephContext *cct);
+void rgw_http_client_cleanup();
+
+struct rgw_http_req_data;
+class RGWHTTPManager;
+
+class RGWIOIDProvider
+{
+ std::atomic<int64_t> max = {0};
+
+public:
+ RGWIOIDProvider() {}
+ int64_t get_next() {
+ return ++max;
+ }
+};
+
+struct rgw_io_id {
+ int64_t id{0};
+ int channels{0};
+
+ rgw_io_id() {}
+ rgw_io_id(int64_t _id, int _channels) : id(_id), channels(_channels) {}
+
+ bool intersects(const rgw_io_id& rhs) {
+ return (id == rhs.id && ((channels | rhs.channels) != 0));
+ }
+
+ bool operator<(const rgw_io_id& rhs) const {
+ if (id < rhs.id) {
+ return true;
+ }
+ return (id == rhs.id &&
+ channels < rhs.channels);
+ }
+};
+
+class RGWIOProvider
+{
+ int64_t id{-1};
+
+public:
+ RGWIOProvider() {}
+ virtual ~RGWIOProvider() = default;
+
+ void assign_io(RGWIOIDProvider& io_id_provider, int io_type = -1);
+ rgw_io_id get_io_id(int io_type) {
+ return rgw_io_id{id, io_type};
+ }
+
+ virtual void set_io_user_info(void *_user_info) = 0;
+ virtual void *get_io_user_info() = 0;
+};
+
+class RGWHTTPClient : public RGWIOProvider
+{
+ friend class RGWHTTPManager;
+
+ bufferlist send_bl;
+ bufferlist::iterator send_iter;
+ bool has_send_len;
+ long http_status;
+ bool send_data_hint{false};
+ size_t receive_pause_skip{0}; /* how many bytes to skip next time receive_data is called
+ due to being paused */
+
+ void *user_info{nullptr};
+
+ rgw_http_req_data *req_data;
+
+ bool verify_ssl; // Do not validate self signed certificates, default to false
+
+ std::atomic<unsigned> stopped { 0 };
+
+
+protected:
+ CephContext *cct;
+
+ string method;
+ string url;
+
+ size_t send_len{0};
+
+ param_vec_t headers;
+
+ RGWHTTPManager *get_manager();
+
+ int init_request(rgw_http_req_data *req_data);
+
+ virtual int receive_header(void *ptr, size_t len) {
+ return 0;
+ }
+ virtual int receive_data(void *ptr, size_t len, bool *pause) {
+ return 0;
+ }
+
+ virtual int send_data(void *ptr, size_t len, bool *pause=nullptr) {
+ return 0;
+ }
+
+ /* Callbacks for libcurl. */
+ static size_t receive_http_header(void *ptr,
+ size_t size,
+ size_t nmemb,
+ void *_info);
+
+ static size_t receive_http_data(void *ptr,
+ size_t size,
+ size_t nmemb,
+ void *_info);
+
+ static size_t send_http_data(void *ptr,
+ size_t size,
+ size_t nmemb,
+ void *_info);
+
+ Mutex& get_req_lock();
+
+ /* needs to be called under req_lock() */
+ void _set_write_paused(bool pause);
+ void _set_read_paused(bool pause);
+public:
+ static const long HTTP_STATUS_NOSTATUS = 0;
+ static const long HTTP_STATUS_UNAUTHORIZED = 401;
+ static const long HTTP_STATUS_NOTFOUND = 404;
+
+ static constexpr int HTTPCLIENT_IO_READ = 0x1;
+ static constexpr int HTTPCLIENT_IO_WRITE = 0x2;
+ static constexpr int HTTPCLIENT_IO_CONTROL = 0x4;
+
+ virtual ~RGWHTTPClient();
+ explicit RGWHTTPClient(CephContext *cct,
+ const string& _method,
+ const string& _url)
+ : has_send_len(false),
+ http_status(HTTP_STATUS_NOSTATUS),
+ req_data(nullptr),
+ verify_ssl(cct->_conf->rgw_verify_ssl),
+ cct(cct),
+ method(_method),
+ url(_url) {
+ }
+
+ void append_header(const string& name, const string& val) {
+ headers.push_back(pair<string, string>(name, val));
+ }
+
+ void set_send_length(size_t len) {
+ send_len = len;
+ has_send_len = true;
+ }
+
+ void set_send_data_hint(bool hint) {
+ send_data_hint = hint;
+ }
+
+ long get_http_status() const {
+ return http_status;
+ }
+
+ void set_http_status(long _http_status) {
+ http_status = _http_status;
+ }
+
+ void set_verify_ssl(bool flag) {
+ verify_ssl = flag;
+ }
+
+ int process(optional_yield y=null_yield);
+
+ int wait(optional_yield y=null_yield);
+ void cancel();
+ bool is_done();
+
+ rgw_http_req_data *get_req_data() { return req_data; }
+
+ string to_str();
+
+ int get_req_retcode();
+
+ void set_url(const string& _url) {
+ url = _url;
+ }
+
+ void set_method(const string& _method) {
+ method = _method;
+ }
+
+ void set_io_user_info(void *_user_info) override {
+ user_info = _user_info;
+ }
+
+ void *get_io_user_info() override {
+ return user_info;
+ }
+};
+
+
+class RGWHTTPHeadersCollector : public RGWHTTPClient {
+public:
+ typedef std::string header_name_t;
+ typedef std::string header_value_t;
+ typedef std::set<header_name_t, ltstr_nocase> header_spec_t;
+
+ RGWHTTPHeadersCollector(CephContext * const cct,
+ const string& method,
+ const string& url,
+ const header_spec_t &relevant_headers)
+ : RGWHTTPClient(cct, method, url),
+ relevant_headers(relevant_headers) {
+ }
+
+ std::map<header_name_t, header_value_t, ltstr_nocase> get_headers() const {
+ return found_headers;
+ }
+
+ /* Throws std::out_of_range */
+ const header_value_t& get_header_value(const header_name_t& name) const {
+ return found_headers.at(name);
+ }
+
+protected:
+ int receive_header(void *ptr, size_t len) override;
+
+private:
+ const std::set<header_name_t, ltstr_nocase> relevant_headers;
+ std::map<header_name_t, header_value_t, ltstr_nocase> found_headers;
+};
+
+
+class RGWHTTPTransceiver : public RGWHTTPHeadersCollector {
+ bufferlist * const read_bl;
+ std::string post_data;
+ size_t post_data_index;
+
+public:
+ RGWHTTPTransceiver(CephContext * const cct,
+ const string& method,
+ const string& url,
+ bufferlist * const read_bl,
+ const header_spec_t intercept_headers = {})
+ : RGWHTTPHeadersCollector(cct, method, url, intercept_headers),
+ read_bl(read_bl),
+ post_data_index(0) {
+ }
+
+ RGWHTTPTransceiver(CephContext * const cct,
+ const string& method,
+ const string& url,
+ bufferlist * const read_bl,
+ const bool verify_ssl,
+ const header_spec_t intercept_headers = {})
+ : RGWHTTPHeadersCollector(cct, method, url, intercept_headers),
+ read_bl(read_bl),
+ post_data_index(0) {
+ set_verify_ssl(verify_ssl);
+ }
+
+ void set_post_data(const std::string& _post_data) {
+ this->post_data = _post_data;
+ }
+
+protected:
+ int send_data(void* ptr, size_t len, bool *pause=nullptr) override;
+
+ int receive_data(void *ptr, size_t len, bool *pause) override {
+ read_bl->append((char *)ptr, len);
+ return 0;
+ }
+};
+
+typedef RGWHTTPTransceiver RGWPostHTTPData;
+
+
+class RGWCompletionManager;
+
+enum RGWHTTPRequestSetState {
+ SET_NOP = 0,
+ SET_WRITE_PAUSED = 1,
+ SET_WRITE_RESUME = 2,
+ SET_READ_PAUSED = 3,
+ SET_READ_RESUME = 4,
+};
+
+class RGWHTTPManager {
+ struct set_state {
+ rgw_http_req_data *req;
+ int bitmask;
+
+ set_state(rgw_http_req_data *_req, int _bitmask) : req(_req), bitmask(_bitmask) {}
+ };
+ CephContext *cct;
+ RGWCompletionManager *completion_mgr;
+ void *multi_handle;
+ bool is_started;
+ std::atomic<unsigned> going_down { 0 };
+ std::atomic<unsigned> is_stopped { 0 };
+
+ RWLock reqs_lock;
+ map<uint64_t, rgw_http_req_data *> reqs;
+ list<rgw_http_req_data *> unregistered_reqs;
+ list<set_state> reqs_change_state;
+ map<uint64_t, rgw_http_req_data *> complete_reqs;
+ int64_t num_reqs;
+ int64_t max_threaded_req;
+ int thread_pipe[2];
+
+ void register_request(rgw_http_req_data *req_data);
+ void complete_request(rgw_http_req_data *req_data);
+ void _complete_request(rgw_http_req_data *req_data);
+ bool unregister_request(rgw_http_req_data *req_data);
+ void _unlink_request(rgw_http_req_data *req_data);
+ void unlink_request(rgw_http_req_data *req_data);
+ void finish_request(rgw_http_req_data *req_data, int r, long http_status = -1);
+ void _finish_request(rgw_http_req_data *req_data, int r);
+ void _set_req_state(set_state& ss);
+ int link_request(rgw_http_req_data *req_data);
+
+ void manage_pending_requests();
+
+ class ReqsThread : public Thread {
+ RGWHTTPManager *manager;
+
+ public:
+ explicit ReqsThread(RGWHTTPManager *_m) : manager(_m) {}
+ void *entry() override;
+ };
+
+ ReqsThread *reqs_thread;
+
+ void *reqs_thread_entry();
+
+ int signal_thread();
+
+public:
+ RGWHTTPManager(CephContext *_cct, RGWCompletionManager *completion_mgr = NULL);
+ ~RGWHTTPManager();
+
+ int start();
+ void stop();
+
+ int add_request(RGWHTTPClient *client);
+ int remove_request(RGWHTTPClient *client);
+ int set_request_state(RGWHTTPClient *client, RGWHTTPRequestSetState state);
+};
+
+class RGWHTTP
+{
+public:
+ static int send(RGWHTTPClient *req);
+ static int process(RGWHTTPClient *req, optional_yield y=null_yield);
+};
+#endif
diff --git a/src/rgw/rgw_http_client_curl.cc b/src/rgw/rgw_http_client_curl.cc
new file mode 100644
index 00000000..34681348
--- /dev/null
+++ b/src/rgw/rgw_http_client_curl.cc
@@ -0,0 +1,122 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "rgw_http_client_curl.h"
+#include <mutex>
+#include <vector>
+#include <curl/curl.h>
+
+#include "rgw_common.h"
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+#ifdef WITH_CURL_OPENSSL
+#include <openssl/crypto.h>
+#endif
+
+#if defined(WITH_CURL_OPENSSL) && OPENSSL_API_COMPAT < 0x10100000L
+namespace openssl {
+
+class RGWSSLSetup
+{
+ std::vector <std::mutex> locks;
+public:
+ explicit RGWSSLSetup(int n) : locks (n){}
+
+ void set_lock(int id){
+ try {
+ locks.at(id).lock();
+ } catch (std::out_of_range& e) {
+ dout(0) << __func__ << " failed to set locks" << dendl;
+ }
+ }
+
+ void clear_lock(int id){
+ try {
+ locks.at(id).unlock();
+ } catch (std::out_of_range& e) {
+ dout(0) << __func__ << " failed to unlock" << dendl;
+ }
+ }
+};
+
+
+void rgw_ssl_locking_callback(int mode, int id, const char *file, int line)
+{
+ static RGWSSLSetup locks(CRYPTO_num_locks());
+ if (mode & CRYPTO_LOCK)
+ locks.set_lock(id);
+ else
+ locks.clear_lock(id);
+}
+
+unsigned long rgw_ssl_thread_id_callback(){
+ return (unsigned long)pthread_self();
+}
+
+void init_ssl(){
+ CRYPTO_set_id_callback((unsigned long (*) ()) rgw_ssl_thread_id_callback);
+ CRYPTO_set_locking_callback(rgw_ssl_locking_callback);
+}
+
+} /* namespace openssl */
+#endif // WITH_CURL_OPENSSL
+
+
+namespace rgw {
+namespace curl {
+
+static void check_curl()
+{
+#ifndef HAVE_CURL_MULTI_WAIT
+ derr << "WARNING: libcurl doesn't support curl_multi_wait()" << dendl;
+ derr << "WARNING: cross zone / region transfer performance may be affected" << dendl;
+#endif
+}
+
+#if defined(WITH_CURL_OPENSSL) && OPENSSL_API_COMPAT < 0x10100000L
+void init_ssl() {
+ ::openssl::init_ssl();
+}
+
+bool fe_inits_ssl(boost::optional <const fe_map_t&> m, long& curl_global_flags){
+ if (m) {
+ for (const auto& kv: *m){
+ if (kv.first == "civetweb" || kv.first == "beast"){
+ std::string cert;
+ kv.second->get_val("ssl_certificate","", &cert);
+ if (!cert.empty()){
+ /* TODO this flag is no op for curl > 7.57 */
+ curl_global_flags &= ~CURL_GLOBAL_SSL;
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+}
+#endif // WITH_CURL_OPENSSL
+
+std::once_flag curl_init_flag;
+
+void setup_curl(boost::optional<const fe_map_t&> m) {
+ check_curl();
+
+ long curl_global_flags = CURL_GLOBAL_ALL;
+
+ #if defined(WITH_CURL_OPENSSL) && OPENSSL_API_COMPAT < 0x10100000L
+ if (!fe_inits_ssl(m, curl_global_flags))
+ init_ssl();
+ #endif
+
+ std::call_once(curl_init_flag, curl_global_init, curl_global_flags);
+ rgw_setup_saved_curl_handles();
+}
+
+void cleanup_curl() {
+ rgw_release_all_curl_handles();
+ curl_global_cleanup();
+}
+
+} /* namespace curl */
+} /* namespace rgw */
diff --git a/src/rgw/rgw_http_client_curl.h b/src/rgw/rgw_http_client_curl.h
new file mode 100644
index 00000000..d9080f44
--- /dev/null
+++ b/src/rgw/rgw_http_client_curl.h
@@ -0,0 +1,32 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2018 SUSE Linux GmBH
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef RGW_HTTP_CLIENT_CURL_H
+#define RGW_HTTP_CLIENT_CURL_H
+
+#include <map>
+#include <boost/optional.hpp>
+#include "rgw_frontend.h"
+
+namespace rgw {
+namespace curl {
+using fe_map_t = std::multimap <std::string, RGWFrontendConfig *>;
+
+void setup_curl(boost::optional<const fe_map_t&> m);
+void cleanup_curl();
+}
+}
+
+#endif
diff --git a/src/rgw/rgw_http_errors.h b/src/rgw/rgw_http_errors.h
new file mode 100644
index 00000000..22113448
--- /dev/null
+++ b/src/rgw/rgw_http_errors.h
@@ -0,0 +1,46 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RGW_HTTP_ERRORS_H_
+#define RGW_HTTP_ERRORS_H_
+
+#include "rgw_common.h"
+
+typedef const std::map<int,const std::pair<int, const char*>> rgw_http_errors;
+
+extern rgw_http_errors rgw_http_s3_errors;
+
+extern rgw_http_errors rgw_http_swift_errors;
+
+extern rgw_http_errors rgw_http_sts_errors;
+
+extern rgw_http_errors rgw_http_iam_errors;
+
+static inline int rgw_http_error_to_errno(int http_err)
+{
+ if (http_err >= 200 && http_err <= 299)
+ return 0;
+ switch (http_err) {
+ case 304:
+ return -ERR_NOT_MODIFIED;
+ case 400:
+ return -EINVAL;
+ case 401:
+ return -EPERM;
+ case 403:
+ return -EACCES;
+ case 404:
+ return -ENOENT;
+ case 409:
+ return -ENOTEMPTY;
+ case 503:
+ return -EBUSY;
+ default:
+ return -EIO;
+ }
+
+ return 0; /* unreachable */
+}
+
+
+#endif
diff --git a/src/rgw/rgw_iam_policy.cc b/src/rgw/rgw_iam_policy.cc
new file mode 100644
index 00000000..53573993
--- /dev/null
+++ b/src/rgw/rgw_iam_policy.cc
@@ -0,0 +1,1432 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+
+#include <cstring>
+#include <iostream>
+#include <regex>
+#include <sstream>
+#include <stack>
+#include <utility>
+
+#include <experimental/iterator>
+
+#include "rapidjson/reader.h"
+
+#include "rgw_auth.h"
+#include <arpa/inet.h>
+#include "rgw_iam_policy.h"
+
+namespace {
+constexpr int dout_subsys = ceph_subsys_rgw;
+}
+
+using std::bitset;
+using std::find;
+using std::int64_t;
+using std::move;
+using std::pair;
+using std::size_t;
+using std::string;
+using std::stringstream;
+using std::ostream;
+using std::uint16_t;
+using std::uint64_t;
+using std::unordered_map;
+
+using boost::container::flat_set;
+using std::regex;
+using std::regex_constants::ECMAScript;
+using std::regex_constants::optimize;
+using std::regex_match;
+using std::smatch;
+
+using rapidjson::BaseReaderHandler;
+using rapidjson::UTF8;
+using rapidjson::SizeType;
+using rapidjson::Reader;
+using rapidjson::kParseCommentsFlag;
+using rapidjson::kParseNumbersAsStringsFlag;
+using rapidjson::StringStream;
+using rapidjson::ParseResult;
+
+using rgw::auth::Principal;
+
+namespace rgw {
+namespace IAM {
+#include "rgw_iam_policy_keywords.frag.cc"
+
+struct actpair {
+ const char* name;
+ const uint64_t bit;
+};
+
+
+
+static const actpair actpairs[] =
+{{ "s3:AbortMultipartUpload", s3AbortMultipartUpload },
+ { "s3:CreateBucket", s3CreateBucket },
+ { "s3:DeleteBucketPolicy", s3DeleteBucketPolicy },
+ { "s3:DeleteBucket", s3DeleteBucket },
+ { "s3:DeleteBucketWebsite", s3DeleteBucketWebsite },
+ { "s3:DeleteObject", s3DeleteObject },
+ { "s3:DeleteObjectVersion", s3DeleteObjectVersion },
+ { "s3:DeleteObjectTagging", s3DeleteObjectTagging },
+ { "s3:DeleteObjectVersionTagging", s3DeleteObjectVersionTagging },
+ { "s3:DeleteReplicationConfiguration", s3DeleteReplicationConfiguration },
+ { "s3:GetAccelerateConfiguration", s3GetAccelerateConfiguration },
+ { "s3:GetBucketAcl", s3GetBucketAcl },
+ { "s3:GetBucketCORS", s3GetBucketCORS },
+ { "s3:GetBucketLocation", s3GetBucketLocation },
+ { "s3:GetBucketLogging", s3GetBucketLogging },
+ { "s3:GetBucketNotification", s3GetBucketNotification },
+ { "s3:GetBucketPolicy", s3GetBucketPolicy },
+ { "s3:GetBucketRequestPayment", s3GetBucketRequestPayment },
+ { "s3:GetBucketTagging", s3GetBucketTagging },
+ { "s3:GetBucketVersioning", s3GetBucketVersioning },
+ { "s3:GetBucketWebsite", s3GetBucketWebsite },
+ { "s3:GetLifecycleConfiguration", s3GetLifecycleConfiguration },
+ { "s3:GetBucketObjectLockConfiguration", s3GetBucketObjectLockConfiguration },
+ { "s3:GetObjectAcl", s3GetObjectAcl },
+ { "s3:GetObject", s3GetObject },
+ { "s3:GetObjectTorrent", s3GetObjectTorrent },
+ { "s3:GetObjectVersionAcl", s3GetObjectVersionAcl },
+ { "s3:GetObjectVersion", s3GetObjectVersion },
+ { "s3:GetObjectVersionTorrent", s3GetObjectVersionTorrent },
+ { "s3:GetObjectTagging", s3GetObjectTagging },
+ { "s3:GetObjectVersionTagging", s3GetObjectVersionTagging},
+ { "s3:GetObjectRetention", s3GetObjectRetention},
+ { "s3:GetObjectLegalHold", s3GetObjectLegalHold},
+ { "s3:GetReplicationConfiguration", s3GetReplicationConfiguration },
+ { "s3:ListAllMyBuckets", s3ListAllMyBuckets },
+ { "s3:ListBucketMultipartUploads", s3ListBucketMultipartUploads },
+ { "s3:ListBucket", s3ListBucket },
+ { "s3:ListBucketVersions", s3ListBucketVersions },
+ { "s3:ListMultipartUploadParts", s3ListMultipartUploadParts },
+ { "s3:PutAccelerateConfiguration", s3PutAccelerateConfiguration },
+ { "s3:PutBucketAcl", s3PutBucketAcl },
+ { "s3:PutBucketCORS", s3PutBucketCORS },
+ { "s3:PutBucketLogging", s3PutBucketLogging },
+ { "s3:PutBucketNotification", s3PutBucketNotification },
+ { "s3:PutBucketPolicy", s3PutBucketPolicy },
+ { "s3:PutBucketRequestPayment", s3PutBucketRequestPayment },
+ { "s3:PutBucketTagging", s3PutBucketTagging },
+ { "s3:PutBucketVersioning", s3PutBucketVersioning },
+ { "s3:PutBucketWebsite", s3PutBucketWebsite },
+ { "s3:PutLifecycleConfiguration", s3PutLifecycleConfiguration },
+ { "s3:PutBucketObjectLockConfiguration", s3PutBucketObjectLockConfiguration },
+ { "s3:PutObjectAcl", s3PutObjectAcl },
+ { "s3:PutObject", s3PutObject },
+ { "s3:PutObjectVersionAcl", s3PutObjectVersionAcl },
+ { "s3:PutObjectTagging", s3PutObjectTagging },
+ { "s3:PutObjectVersionTagging", s3PutObjectVersionTagging },
+ { "s3:PutObjectRetention", s3PutObjectRetention },
+ { "s3:PutObjectLegalHold", s3PutObjectLegalHold },
+ { "s3:BypassGovernanceRetention", s3BypassGovernanceRetention },
+ { "s3:PutReplicationConfiguration", s3PutReplicationConfiguration },
+ { "s3:RestoreObject", s3RestoreObject },
+ { "iam:PutUserPolicy", iamPutUserPolicy },
+ { "iam:GetUserPolicy", iamGetUserPolicy },
+ { "iam:DeleteUserPolicy", iamDeleteUserPolicy },
+ { "iam:ListUserPolicies", iamListUserPolicies },
+ { "iam:CreateRole", iamCreateRole},
+ { "iam:DeleteRole", iamDeleteRole},
+ { "iam:GetRole", iamGetRole},
+ { "iam:ModifyRole", iamModifyRole},
+ { "iam:ListRoles", iamListRoles},
+ { "iam:PutRolePolicy", iamPutRolePolicy},
+ { "iam:GetRolePolicy", iamGetRolePolicy},
+ { "iam:ListRolePolicies", iamListRolePolicies},
+ { "iam:DeleteRolePolicy", iamDeleteRolePolicy},
+ { "sts:AssumeRole", stsAssumeRole},
+ { "sts:AssumeRoleWithWebIdentity", stsAssumeRoleWithWebIdentity},
+ { "sts:GetSessionToken", stsGetSessionToken},
+};
+
+struct PolicyParser;
+
+const Keyword top[1]{"<Top>", TokenKind::pseudo, TokenID::Top, 0, false,
+ false};
+const Keyword cond_key[1]{"<Condition Key>", TokenKind::cond_key,
+ TokenID::CondKey, 0, true, false};
+
+struct ParseState {
+ PolicyParser* pp;
+ const Keyword* w;
+
+ bool arraying = false;
+ bool objecting = false;
+ bool cond_ifexists = false;
+
+ void reset();
+
+ ParseState(PolicyParser* pp, const Keyword* w)
+ : pp(pp), w(w) {}
+
+ bool obj_start();
+
+ bool obj_end();
+
+ bool array_start() {
+ if (w->arrayable && !arraying) {
+ arraying = true;
+ return true;
+ }
+ return false;
+ }
+
+ bool array_end();
+
+ bool key(const char* s, size_t l);
+ bool do_string(CephContext* cct, const char* s, size_t l);
+ bool number(const char* str, size_t l);
+};
+
+// If this confuses you, look up the Curiously Recurring Template Pattern
+struct PolicyParser : public BaseReaderHandler<UTF8<>, PolicyParser> {
+ keyword_hash tokens;
+ std::vector<ParseState> s;
+ CephContext* cct;
+ const string& tenant;
+ Policy& policy;
+ uint32_t v = 0;
+
+ uint32_t seen = 0;
+
+ uint32_t dex(TokenID in) const {
+ switch (in) {
+ case TokenID::Version:
+ return 0x1;
+ case TokenID::Id:
+ return 0x2;
+ case TokenID::Statement:
+ return 0x4;
+ case TokenID::Sid:
+ return 0x8;
+ case TokenID::Effect:
+ return 0x10;
+ case TokenID::Principal:
+ return 0x20;
+ case TokenID::NotPrincipal:
+ return 0x40;
+ case TokenID::Action:
+ return 0x80;
+ case TokenID::NotAction:
+ return 0x100;
+ case TokenID::Resource:
+ return 0x200;
+ case TokenID::NotResource:
+ return 0x400;
+ case TokenID::Condition:
+ return 0x800;
+ case TokenID::AWS:
+ return 0x1000;
+ case TokenID::Federated:
+ return 0x2000;
+ case TokenID::Service:
+ return 0x4000;
+ case TokenID::CanonicalUser:
+ return 0x8000;
+ default:
+ ceph_abort();
+ }
+ }
+ bool test(TokenID in) {
+ return seen & dex(in);
+ }
+ void set(TokenID in) {
+ seen |= dex(in);
+ if (dex(in) & (dex(TokenID::Sid) | dex(TokenID::Effect) |
+ dex(TokenID::Principal) | dex(TokenID::NotPrincipal) |
+ dex(TokenID::Action) | dex(TokenID::NotAction) |
+ dex(TokenID::Resource) | dex(TokenID::NotResource) |
+ dex(TokenID::Condition) | dex(TokenID::AWS) |
+ dex(TokenID::Federated) | dex(TokenID::Service) |
+ dex(TokenID::CanonicalUser))) {
+ v |= dex(in);
+ }
+ }
+ void set(std::initializer_list<TokenID> l) {
+ for (auto in : l) {
+ seen |= dex(in);
+ if (dex(in) & (dex(TokenID::Sid) | dex(TokenID::Effect) |
+ dex(TokenID::Principal) | dex(TokenID::NotPrincipal) |
+ dex(TokenID::Action) | dex(TokenID::NotAction) |
+ dex(TokenID::Resource) | dex(TokenID::NotResource) |
+ dex(TokenID::Condition) | dex(TokenID::AWS) |
+ dex(TokenID::Federated) | dex(TokenID::Service) |
+ dex(TokenID::CanonicalUser))) {
+ v |= dex(in);
+ }
+ }
+ }
+ void reset(TokenID in) {
+ seen &= ~dex(in);
+ if (dex(in) & (dex(TokenID::Sid) | dex(TokenID::Effect) |
+ dex(TokenID::Principal) | dex(TokenID::NotPrincipal) |
+ dex(TokenID::Action) | dex(TokenID::NotAction) |
+ dex(TokenID::Resource) | dex(TokenID::NotResource) |
+ dex(TokenID::Condition) | dex(TokenID::AWS) |
+ dex(TokenID::Federated) | dex(TokenID::Service) |
+ dex(TokenID::CanonicalUser))) {
+ v &= ~dex(in);
+ }
+ }
+ void reset(std::initializer_list<TokenID> l) {
+ for (auto in : l) {
+ seen &= ~dex(in);
+ if (dex(in) & (dex(TokenID::Sid) | dex(TokenID::Effect) |
+ dex(TokenID::Principal) | dex(TokenID::NotPrincipal) |
+ dex(TokenID::Action) | dex(TokenID::NotAction) |
+ dex(TokenID::Resource) | dex(TokenID::NotResource) |
+ dex(TokenID::Condition) | dex(TokenID::AWS) |
+ dex(TokenID::Federated) | dex(TokenID::Service) |
+ dex(TokenID::CanonicalUser))) {
+ v &= ~dex(in);
+ }
+ }
+ }
+ void reset(uint32_t& v) {
+ seen &= ~v;
+ v = 0;
+ }
+
+ PolicyParser(CephContext* cct, const string& tenant, Policy& policy)
+ : cct(cct), tenant(tenant), policy(policy) {}
+ PolicyParser(const PolicyParser& policy) = delete;
+
+ bool StartObject() {
+ if (s.empty()) {
+ s.push_back({this, top});
+ s.back().objecting = true;
+ return true;
+ }
+
+ return s.back().obj_start();
+ }
+ bool EndObject(SizeType memberCount) {
+ if (s.empty()) {
+ return false;
+ }
+ return s.back().obj_end();
+ }
+ bool Key(const char* str, SizeType length, bool copy) {
+ if (s.empty()) {
+ return false;
+ }
+ return s.back().key(str, length);
+ }
+
+ bool String(const char* str, SizeType length, bool copy) {
+ if (s.empty()) {
+ return false;
+ }
+ return s.back().do_string(cct, str, length);
+ }
+ bool RawNumber(const char* str, SizeType length, bool copy) {
+ if (s.empty()) {
+ return false;
+ }
+
+ return s.back().number(str, length);
+ }
+ bool StartArray() {
+ if (s.empty()) {
+ return false;
+ }
+
+ return s.back().array_start();
+ }
+ bool EndArray(SizeType) {
+ if (s.empty()) {
+ return false;
+ }
+
+ return s.back().array_end();
+ }
+
+ bool Default() {
+ return false;
+ }
+};
+
+
+// I really despise this misfeature of C++.
+//
+bool ParseState::obj_end() {
+ if (objecting) {
+ objecting = false;
+ if (!arraying) {
+ pp->s.pop_back();
+ } else {
+ reset();
+ }
+ return true;
+ }
+ return false;
+}
+
+bool ParseState::key(const char* s, size_t l) {
+ auto token_len = l;
+ bool ifexists = false;
+ if (w->id == TokenID::Condition && w->kind == TokenKind::statement) {
+ static constexpr char IfExists[] = "IfExists";
+ if (boost::algorithm::ends_with(boost::string_view{s, l}, IfExists)) {
+ ifexists = true;
+ token_len -= sizeof(IfExists)-1;
+ }
+ }
+ auto k = pp->tokens.lookup(s, token_len);
+
+ if (!k) {
+ if (w->kind == TokenKind::cond_op) {
+ auto id = w->id;
+ auto& t = pp->policy.statements.back();
+ auto c_ife = cond_ifexists;
+ pp->s.emplace_back(pp, cond_key);
+ t.conditions.emplace_back(id, s, l, c_ife);
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ // If the token we're going with belongs within the condition at the
+ // top of the stack and we haven't already encountered it, push it
+ // on the stack
+ // Top
+ if ((((w->id == TokenID::Top) && (k->kind == TokenKind::top)) ||
+ // Statement
+ ((w->id == TokenID::Statement) && (k->kind == TokenKind::statement)) ||
+
+ /// Principal
+ ((w->id == TokenID::Principal || w->id == TokenID::NotPrincipal) &&
+ (k->kind == TokenKind::princ_type))) &&
+
+ // Check that it hasn't been encountered. Note that this
+ // conjoins with the run of disjunctions above.
+ !pp->test(k->id)) {
+ pp->set(k->id);
+ pp->s.emplace_back(pp, k);
+ return true;
+ } else if ((w->id == TokenID::Condition) &&
+ (k->kind == TokenKind::cond_op)) {
+ pp->s.emplace_back(pp, k);
+ pp->s.back().cond_ifexists = ifexists;
+ return true;
+ }
+ return false;
+}
+
+// I should just rewrite a few helper functions to use iterators,
+// which will make all of this ever so much nicer.
+static boost::optional<Principal> parse_principal(CephContext* cct, TokenID t,
+ string&& s) {
+ // Wildcard!
+ if ((t == TokenID::AWS) && (s == "*")) {
+ return Principal::wildcard();
+
+ // Do nothing for now.
+ } else if (t == TokenID::CanonicalUser) {
+
+ } // AWS and Federated ARNs
+ else if (t == TokenID::AWS || t == TokenID::Federated) {
+ if (auto a = ARN::parse(s)) {
+ if (a->resource == "root") {
+ return Principal::tenant(std::move(a->account));
+ }
+
+ static const char rx_str[] = "([^/]*)/(.*)";
+ static const regex rx(rx_str, sizeof(rx_str) - 1,
+ std::regex_constants::ECMAScript |
+ std::regex_constants::optimize);
+ smatch match;
+ if (regex_match(a->resource, match, rx) && match.size() == 3) {
+ if (match[1] == "user") {
+ return Principal::user(std::move(a->account),
+ match[2]);
+ }
+
+ if (match[1] == "role") {
+ return Principal::role(std::move(a->account),
+ match[2]);
+ }
+
+ if (match[1] == "oidc-provider") {
+ return Principal::oidc_provider(std::move(match[2]));
+ }
+ }
+ } else {
+ if (std::none_of(s.begin(), s.end(),
+ [](const char& c) {
+ return (c == ':') || (c == '/');
+ })) {
+ // Since tenants are simply prefixes, there's no really good
+ // way to see if one exists or not. So we return the thing and
+ // let them try to match against it.
+ return Principal::tenant(std::move(s));
+ }
+ }
+ }
+
+ ldout(cct, 0) << "Supplied principal is discarded: " << s << dendl;
+ return boost::none;
+}
+
+bool ParseState::do_string(CephContext* cct, const char* s, size_t l) {
+ auto k = pp->tokens.lookup(s, l);
+ Policy& p = pp->policy;
+ bool is_action = false;
+ bool is_validaction = false;
+ Statement* t = p.statements.empty() ? nullptr : &(p.statements.back());
+
+ // Top level!
+ if ((w->id == TokenID::Version) && k &&
+ k->kind == TokenKind::version_key) {
+ p.version = static_cast<Version>(k->specific);
+ } else if (w->id == TokenID::Id) {
+ p.id = string(s, l);
+
+ // Statement
+
+ } else if (w->id == TokenID::Sid) {
+ t->sid.emplace(s, l);
+ } else if ((w->id == TokenID::Effect) && k &&
+ k->kind == TokenKind::effect_key) {
+ t->effect = static_cast<Effect>(k->specific);
+ } else if (w->id == TokenID::Principal && s && *s == '*') {
+ t->princ.emplace(Principal::wildcard());
+ } else if (w->id == TokenID::NotPrincipal && s && *s == '*') {
+ t->noprinc.emplace(Principal::wildcard());
+ } else if ((w->id == TokenID::Action) ||
+ (w->id == TokenID::NotAction)) {
+ is_action = true;
+ if (*s == '*') {
+ is_validaction = true;
+ (w->id == TokenID::Action ?
+ t->action = allValue : t->notaction = allValue);
+ } else {
+ for (auto& p : actpairs) {
+ if (match_policy({s, l}, p.name, MATCH_POLICY_ACTION)) {
+ is_validaction = true;
+ (w->id == TokenID::Action ? t->action[p.bit] = 1 : t->notaction[p.bit] = 1);
+ }
+ if ((t->action & s3AllValue) == s3AllValue) {
+ t->action[s3All] = 1;
+ }
+ if ((t->notaction & s3AllValue) == s3AllValue) {
+ t->notaction[s3All] = 1;
+ }
+ if ((t->action & iamAllValue) == iamAllValue) {
+ t->action[iamAll] = 1;
+ }
+ if ((t->notaction & iamAllValue) == iamAllValue) {
+ t->notaction[iamAll] = 1;
+ }
+ if ((t->action & stsAllValue) == stsAllValue) {
+ t->action[stsAll] = 1;
+ }
+ if ((t->notaction & stsAllValue) == stsAllValue) {
+ t->notaction[stsAll] = 1;
+ }
+ }
+ }
+ } else if (w->id == TokenID::Resource || w->id == TokenID::NotResource) {
+ auto a = ARN::parse({s, l}, true);
+ // You can't specify resources for someone ELSE'S account.
+ if (a && (a->account.empty() || a->account == pp->tenant ||
+ a->account == "*")) {
+ if (a->account.empty() || a->account == "*")
+ a->account = pp->tenant;
+ (w->id == TokenID::Resource ? t->resource : t->notresource)
+ .emplace(std::move(*a));
+ }
+ else
+ ldout(cct, 0) << "Supplied resource is discarded: " << string(s, l)
+ << dendl;
+ } else if (w->kind == TokenKind::cond_key) {
+ auto& t = pp->policy.statements.back();
+ t.conditions.back().vals.emplace_back(s, l);
+
+ // Principals
+
+ } else if (w->kind == TokenKind::princ_type) {
+ if (pp->s.size() <= 1) {
+ return false;
+ }
+ auto& pri = pp->s[pp->s.size() - 2].w->id == TokenID::Principal ?
+ t->princ : t->noprinc;
+
+
+ if (auto o = parse_principal(pp->cct, w->id, string(s, l))) {
+ pri.emplace(std::move(*o));
+ }
+
+ // Failure
+
+ } else {
+ return false;
+ }
+
+ if (!arraying) {
+ pp->s.pop_back();
+ }
+
+ if (is_action && !is_validaction){
+ return false;
+ }
+
+ return true;
+}
+
+bool ParseState::number(const char* s, size_t l) {
+ // Top level!
+ if (w->kind == TokenKind::cond_key) {
+ auto& t = pp->policy.statements.back();
+ t.conditions.back().vals.emplace_back(s, l);
+
+ // Failure
+
+ } else {
+ return false;
+ }
+
+ if (!arraying) {
+ pp->s.pop_back();
+ }
+
+ return true;
+}
+
+void ParseState::reset() {
+ pp->reset(pp->v);
+}
+
+bool ParseState::obj_start() {
+ if (w->objectable && !objecting) {
+ objecting = true;
+ if (w->id == TokenID::Statement) {
+ pp->policy.statements.emplace_back();
+ }
+
+ return true;
+ }
+
+ return false;
+}
+
+
+bool ParseState::array_end() {
+ if (arraying && !objecting) {
+ pp->s.pop_back();
+ return true;
+ }
+
+ return false;
+}
+
+ostream& operator <<(ostream& m, const MaskedIP& ip) {
+ // I have a theory about why std::bitset is the way it is.
+ if (ip.v6) {
+ for (int i = 7; i >= 0; --i) {
+ uint16_t hextet = 0;
+ for (int j = 15; j >= 0; --j) {
+ hextet |= (ip.addr[(i * 16) + j] << j);
+ }
+ m << hex << (unsigned int) hextet;
+ if (i != 0) {
+ m << ":";
+ }
+ }
+ } else {
+ // It involves Satan.
+ for (int i = 3; i >= 0; --i) {
+ uint8_t b = 0;
+ for (int j = 7; j >= 0; --j) {
+ b |= (ip.addr[(i * 8) + j] << j);
+ }
+ m << (unsigned int) b;
+ if (i != 0) {
+ m << ".";
+ }
+ }
+ }
+ m << "/" << dec << ip.prefix;
+ // It would explain a lot
+ return m;
+}
+
+bool Condition::eval(const Environment& env) const {
+ auto i = env.find(key);
+ if (op == TokenID::Null) {
+ return i == env.end() ? true : false;
+ }
+
+ if (i == env.end()) {
+ return ifexists;
+ }
+ const auto& s = i->second;
+
+ switch (op) {
+ // String!
+ case TokenID::StringEquals:
+ return orrible(std::equal_to<std::string>(), s, vals);
+
+ case TokenID::StringNotEquals:
+ return orrible(std::not_fn(std::equal_to<std::string>()),
+ s, vals);
+
+ case TokenID::StringEqualsIgnoreCase:
+ return orrible(ci_equal_to(), s, vals);
+
+ case TokenID::StringNotEqualsIgnoreCase:
+ return orrible(std::not_fn(ci_equal_to()), s, vals);
+
+ case TokenID::StringLike:
+ return orrible(string_like(), s, vals);
+
+ case TokenID::StringNotLike:
+ return orrible(std::not_fn(string_like()), s, vals);
+
+ // Numeric
+ case TokenID::NumericEquals:
+ return shortible(std::equal_to<double>(), as_number, s, vals);
+
+ case TokenID::NumericNotEquals:
+ return shortible(std::not_fn(std::equal_to<double>()),
+ as_number, s, vals);
+
+
+ case TokenID::NumericLessThan:
+ return shortible(std::less<double>(), as_number, s, vals);
+
+
+ case TokenID::NumericLessThanEquals:
+ return shortible(std::less_equal<double>(), as_number, s, vals);
+
+ case TokenID::NumericGreaterThan:
+ return shortible(std::greater<double>(), as_number, s, vals);
+
+ case TokenID::NumericGreaterThanEquals:
+ return shortible(std::greater_equal<double>(), as_number, s, vals);
+
+ // Date!
+ case TokenID::DateEquals:
+ return shortible(std::equal_to<ceph::real_time>(), as_date, s, vals);
+
+ case TokenID::DateNotEquals:
+ return shortible(std::not_fn(std::equal_to<ceph::real_time>()),
+ as_date, s, vals);
+
+ case TokenID::DateLessThan:
+ return shortible(std::less<ceph::real_time>(), as_date, s, vals);
+
+
+ case TokenID::DateLessThanEquals:
+ return shortible(std::less_equal<ceph::real_time>(), as_date, s, vals);
+
+ case TokenID::DateGreaterThan:
+ return shortible(std::greater<ceph::real_time>(), as_date, s, vals);
+
+ case TokenID::DateGreaterThanEquals:
+ return shortible(std::greater_equal<ceph::real_time>(), as_date, s,
+ vals);
+
+ // Bool!
+ case TokenID::Bool:
+ return shortible(std::equal_to<bool>(), as_bool, s, vals);
+
+ // Binary!
+ case TokenID::BinaryEquals:
+ return shortible(std::equal_to<ceph::bufferlist>(), as_binary, s,
+ vals);
+
+ // IP Address!
+ case TokenID::IpAddress:
+ return shortible(std::equal_to<MaskedIP>(), as_network, s, vals);
+
+ case TokenID::NotIpAddress:
+ {
+ auto xc = as_network(s);
+ if (!xc) {
+ return false;
+ }
+
+ for (const string& d : vals) {
+ auto xd = as_network(d);
+ if (!xd) {
+ continue;
+ }
+
+ if (xc == xd) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+#if 0
+ // Amazon Resource Names! (Does S3 need this?)
+ TokenID::ArnEquals, TokenID::ArnNotEquals, TokenID::ArnLike,
+ TokenID::ArnNotLike,
+#endif
+
+ default:
+ return false;
+ }
+}
+
+boost::optional<MaskedIP> Condition::as_network(const string& s) {
+ MaskedIP m;
+ if (s.empty()) {
+ return boost::none;
+ }
+
+ m.v6 = (s.find(':') == string::npos) ? false : true;
+
+ auto slash = s.find('/');
+ if (slash == string::npos) {
+ m.prefix = m.v6 ? 128 : 32;
+ } else {
+ char* end = 0;
+ m.prefix = strtoul(s.data() + slash + 1, &end, 10);
+ if (*end != 0 || (m.v6 && m.prefix > 128) ||
+ (!m.v6 && m.prefix > 32)) {
+ return boost::none;
+ }
+ }
+
+ string t;
+ auto p = &s;
+
+ if (slash != string::npos) {
+ t.assign(s, 0, slash);
+ p = &t;
+ }
+
+ if (m.v6) {
+ struct in6_addr a;
+ if (inet_pton(AF_INET6, p->c_str(), static_cast<void*>(&a)) != 1) {
+ return boost::none;
+ }
+
+ m.addr |= Address(a.s6_addr[15]) << 0;
+ m.addr |= Address(a.s6_addr[14]) << 8;
+ m.addr |= Address(a.s6_addr[13]) << 16;
+ m.addr |= Address(a.s6_addr[12]) << 24;
+ m.addr |= Address(a.s6_addr[11]) << 32;
+ m.addr |= Address(a.s6_addr[10]) << 40;
+ m.addr |= Address(a.s6_addr[9]) << 48;
+ m.addr |= Address(a.s6_addr[8]) << 56;
+ m.addr |= Address(a.s6_addr[7]) << 64;
+ m.addr |= Address(a.s6_addr[6]) << 72;
+ m.addr |= Address(a.s6_addr[5]) << 80;
+ m.addr |= Address(a.s6_addr[4]) << 88;
+ m.addr |= Address(a.s6_addr[3]) << 96;
+ m.addr |= Address(a.s6_addr[2]) << 104;
+ m.addr |= Address(a.s6_addr[1]) << 112;
+ m.addr |= Address(a.s6_addr[0]) << 120;
+ } else {
+ struct in_addr a;
+ if (inet_pton(AF_INET, p->c_str(), static_cast<void*>(&a)) != 1) {
+ return boost::none;
+ }
+
+ m.addr = ntohl(a.s_addr);
+ }
+
+ return m;
+}
+
+namespace {
+const char* condop_string(const TokenID t) {
+ switch (t) {
+ case TokenID::StringEquals:
+ return "StringEquals";
+
+ case TokenID::StringNotEquals:
+ return "StringNotEquals";
+
+ case TokenID::StringEqualsIgnoreCase:
+ return "StringEqualsIgnoreCase";
+
+ case TokenID::StringNotEqualsIgnoreCase:
+ return "StringNotEqualsIgnoreCase";
+
+ case TokenID::StringLike:
+ return "StringLike";
+
+ case TokenID::StringNotLike:
+ return "StringNotLike";
+
+ // Numeric!
+ case TokenID::NumericEquals:
+ return "NumericEquals";
+
+ case TokenID::NumericNotEquals:
+ return "NumericNotEquals";
+
+ case TokenID::NumericLessThan:
+ return "NumericLessThan";
+
+ case TokenID::NumericLessThanEquals:
+ return "NumericLessThanEquals";
+
+ case TokenID::NumericGreaterThan:
+ return "NumericGreaterThan";
+
+ case TokenID::NumericGreaterThanEquals:
+ return "NumericGreaterThanEquals";
+
+ case TokenID::DateEquals:
+ return "DateEquals";
+
+ case TokenID::DateNotEquals:
+ return "DateNotEquals";
+
+ case TokenID::DateLessThan:
+ return "DateLessThan";
+
+ case TokenID::DateLessThanEquals:
+ return "DateLessThanEquals";
+
+ case TokenID::DateGreaterThan:
+ return "DateGreaterThan";
+
+ case TokenID::DateGreaterThanEquals:
+ return "DateGreaterThanEquals";
+
+ case TokenID::Bool:
+ return "Bool";
+
+ case TokenID::BinaryEquals:
+ return "BinaryEquals";
+
+ case TokenID::IpAddress:
+ return "case TokenID::IpAddress";
+
+ case TokenID::NotIpAddress:
+ return "NotIpAddress";
+
+ case TokenID::ArnEquals:
+ return "ArnEquals";
+
+ case TokenID::ArnNotEquals:
+ return "ArnNotEquals";
+
+ case TokenID::ArnLike:
+ return "ArnLike";
+
+ case TokenID::ArnNotLike:
+ return "ArnNotLike";
+
+ case TokenID::Null:
+ return "Null";
+
+ default:
+ return "InvalidConditionOperator";
+ }
+}
+
+template<typename Iterator>
+ostream& print_array(ostream& m, Iterator begin, Iterator end) {
+ if (begin == end) {
+ m << "[]";
+ } else {
+ m << "[ ";
+ std::copy(begin, end, std::experimental::make_ostream_joiner(m, ", "));
+ m << " ]";
+ }
+ return m;
+}
+
+template<typename Iterator>
+ostream& print_dict(ostream& m, Iterator begin, Iterator end) {
+ m << "{ ";
+ std::copy(begin, end, std::experimental::make_ostream_joiner(m, ", "));
+ m << " }";
+ return m;
+}
+
+}
+
+ostream& operator <<(ostream& m, const Condition& c) {
+ m << condop_string(c.op);
+ if (c.ifexists) {
+ m << "IfExists";
+ }
+ m << ": { " << c.key;
+ print_array(m, c.vals.cbegin(), c.vals.cend());
+ return m << " }";
+}
+
+Effect Statement::eval(const Environment& e,
+ boost::optional<const rgw::auth::Identity&> ida,
+ uint64_t act, const ARN& res) const {
+
+ if (eval_principal(e, ida) == Effect::Deny) {
+ return Effect::Pass;
+ }
+
+ if (!resource.empty()) {
+ if (!std::any_of(resource.begin(), resource.end(),
+ [&res](const ARN& pattern) {
+ return pattern.match(res);
+ })) {
+ return Effect::Pass;
+ }
+ } else if (!notresource.empty()) {
+ if (std::any_of(notresource.begin(), notresource.end(),
+ [&res](const ARN& pattern) {
+ return pattern.match(res);
+ })) {
+ return Effect::Pass;
+ }
+ }
+
+ if (!(action[act] == 1) || (notaction[act] == 1)) {
+ return Effect::Pass;
+ }
+
+ if (std::all_of(conditions.begin(),
+ conditions.end(),
+ [&e](const Condition& c) { return c.eval(e);})) {
+ return effect;
+ }
+
+ return Effect::Pass;
+}
+
+Effect Statement::eval_principal(const Environment& e,
+ boost::optional<const rgw::auth::Identity&> ida) const {
+ if (ida) {
+ if (princ.empty() && noprinc.empty()) {
+ return Effect::Deny;
+ }
+ if (!princ.empty() && !ida->is_identity(princ)) {
+ return Effect::Deny;
+ } else if (!noprinc.empty() && ida->is_identity(noprinc)) {
+ return Effect::Deny;
+ }
+ }
+ return Effect::Allow;
+}
+
+Effect Statement::eval_conditions(const Environment& e) const {
+ if (std::all_of(conditions.begin(),
+ conditions.end(),
+ [&e](const Condition& c) { return c.eval(e);})) {
+ return Effect::Allow;
+ }
+ return Effect::Deny;
+}
+
+namespace {
+const char* action_bit_string(uint64_t action) {
+ switch (action) {
+ case s3GetObject:
+ return "s3:GetObject";
+
+ case s3GetObjectVersion:
+ return "s3:GetObjectVersion";
+
+ case s3PutObject:
+ return "s3:PutObject";
+
+ case s3GetObjectAcl:
+ return "s3:GetObjectAcl";
+
+ case s3GetObjectVersionAcl:
+ return "s3:GetObjectVersionAcl";
+
+ case s3PutObjectAcl:
+ return "s3:PutObjectAcl";
+
+ case s3PutObjectVersionAcl:
+ return "s3:PutObjectVersionAcl";
+
+ case s3DeleteObject:
+ return "s3:DeleteObject";
+
+ case s3DeleteObjectVersion:
+ return "s3:DeleteObjectVersion";
+
+ case s3ListMultipartUploadParts:
+ return "s3:ListMultipartUploadParts";
+
+ case s3AbortMultipartUpload:
+ return "s3:AbortMultipartUpload";
+
+ case s3GetObjectTorrent:
+ return "s3:GetObjectTorrent";
+
+ case s3GetObjectVersionTorrent:
+ return "s3:GetObjectVersionTorrent";
+
+ case s3RestoreObject:
+ return "s3:RestoreObject";
+
+ case s3CreateBucket:
+ return "s3:CreateBucket";
+
+ case s3DeleteBucket:
+ return "s3:DeleteBucket";
+
+ case s3ListBucket:
+ return "s3:ListBucket";
+
+ case s3ListBucketVersions:
+ return "s3:ListBucketVersions";
+ case s3ListAllMyBuckets:
+ return "s3:ListAllMyBuckets";
+
+ case s3ListBucketMultipartUploads:
+ return "s3:ListBucketMultipartUploads";
+
+ case s3GetAccelerateConfiguration:
+ return "s3:GetAccelerateConfiguration";
+
+ case s3PutAccelerateConfiguration:
+ return "s3:PutAccelerateConfiguration";
+
+ case s3GetBucketAcl:
+ return "s3:GetBucketAcl";
+
+ case s3PutBucketAcl:
+ return "s3:PutBucketAcl";
+
+ case s3GetBucketCORS:
+ return "s3:GetBucketCORS";
+
+ case s3PutBucketCORS:
+ return "s3:PutBucketCORS";
+
+ case s3GetBucketVersioning:
+ return "s3:GetBucketVersioning";
+
+ case s3PutBucketVersioning:
+ return "s3:PutBucketVersioning";
+
+ case s3GetBucketRequestPayment:
+ return "s3:GetBucketRequestPayment";
+
+ case s3PutBucketRequestPayment:
+ return "s3:PutBucketRequestPayment";
+
+ case s3GetBucketLocation:
+ return "s3:GetBucketLocation";
+
+ case s3GetBucketPolicy:
+ return "s3:GetBucketPolicy";
+
+ case s3DeleteBucketPolicy:
+ return "s3:DeleteBucketPolicy";
+
+ case s3PutBucketPolicy:
+ return "s3:PutBucketPolicy";
+
+ case s3GetBucketNotification:
+ return "s3:GetBucketNotification";
+
+ case s3PutBucketNotification:
+ return "s3:PutBucketNotification";
+
+ case s3GetBucketLogging:
+ return "s3:GetBucketLogging";
+
+ case s3PutBucketLogging:
+ return "s3:PutBucketLogging";
+
+ case s3GetBucketTagging:
+ return "s3:GetBucketTagging";
+
+ case s3PutBucketTagging:
+ return "s3:PutBucketTagging";
+
+ case s3GetBucketWebsite:
+ return "s3:GetBucketWebsite";
+
+ case s3PutBucketWebsite:
+ return "s3:PutBucketWebsite";
+
+ case s3DeleteBucketWebsite:
+ return "s3:DeleteBucketWebsite";
+
+ case s3GetLifecycleConfiguration:
+ return "s3:GetLifecycleConfiguration";
+
+ case s3PutLifecycleConfiguration:
+ return "s3:PutLifecycleConfiguration";
+
+ case s3PutReplicationConfiguration:
+ return "s3:PutReplicationConfiguration";
+
+ case s3GetReplicationConfiguration:
+ return "s3:GetReplicationConfiguration";
+
+ case s3DeleteReplicationConfiguration:
+ return "s3:DeleteReplicationConfiguration";
+
+ case s3PutObjectTagging:
+ return "s3:PutObjectTagging";
+
+ case s3PutObjectVersionTagging:
+ return "s3:PutObjectVersionTagging";
+
+ case s3GetObjectTagging:
+ return "s3:GetObjectTagging";
+
+ case s3GetObjectVersionTagging:
+ return "s3:GetObjectVersionTagging";
+
+ case s3DeleteObjectTagging:
+ return "s3:DeleteObjectTagging";
+
+ case s3DeleteObjectVersionTagging:
+ return "s3:DeleteObjectVersionTagging";
+
+ case s3PutBucketObjectLockConfiguration:
+ return "s3:PutBucketObjectLockConfiguration";
+
+ case s3GetBucketObjectLockConfiguration:
+ return "s3:GetBucketObjectLockConfiguration";
+
+ case s3PutObjectRetention:
+ return "s3:PutObjectRetention";
+
+ case s3GetObjectRetention:
+ return "s3:GetObjectRetention";
+
+ case s3PutObjectLegalHold:
+ return "s3:PutObjectLegalHold";
+
+ case s3GetObjectLegalHold:
+ return "s3:GetObjectLegalHold";
+
+ case s3BypassGovernanceRetention:
+ return "s3:BypassGovernanceRetention";
+
+ case iamPutUserPolicy:
+ return "iam:PutUserPolicy";
+
+ case iamGetUserPolicy:
+ return "iam:GetUserPolicy";
+
+ case iamListUserPolicies:
+ return "iam:ListUserPolicies";
+
+ case iamDeleteUserPolicy:
+ return "iam:DeleteUserPolicy";
+
+ case iamCreateRole:
+ return "iam:CreateRole";
+
+ case iamDeleteRole:
+ return "iam:DeleteRole";
+
+ case iamGetRole:
+ return "iam:GetRole";
+
+ case iamModifyRole:
+ return "iam:ModifyRole";
+
+ case iamListRoles:
+ return "iam:ListRoles";
+
+ case iamPutRolePolicy:
+ return "iam:PutRolePolicy";
+
+ case iamGetRolePolicy:
+ return "iam:GetRolePolicy";
+
+ case iamListRolePolicies:
+ return "iam:ListRolePolicies";
+
+ case iamDeleteRolePolicy:
+ return "iam:DeleteRolePolicy";
+
+ case stsAssumeRole:
+ return "sts:AssumeRole";
+
+ case stsAssumeRoleWithWebIdentity:
+ return "sts:AssumeRoleWithWebIdentity";
+
+ case stsGetSessionToken:
+ return "sts:GetSessionToken";
+ }
+ return "s3Invalid";
+}
+
+ostream& print_actions(ostream& m, const Action_t a) {
+ bool begun = false;
+ m << "[ ";
+ for (auto i = 0U; i < allCount; ++i) {
+ if (a[i] == 1) {
+ if (begun) {
+ m << ", ";
+ } else {
+ begun = true;
+ }
+ m << action_bit_string(i);
+ }
+ }
+ if (begun) {
+ m << " ]";
+ } else {
+ m << "]";
+ }
+ return m;
+}
+}
+
+ostream& operator <<(ostream& m, const Statement& s) {
+ m << "{ ";
+ if (s.sid) {
+ m << "Sid: " << *s.sid << ", ";
+ }
+ if (!s.princ.empty()) {
+ m << "Principal: ";
+ print_dict(m, s.princ.cbegin(), s.princ.cend());
+ m << ", ";
+ }
+ if (!s.noprinc.empty()) {
+ m << "NotPrincipal: ";
+ print_dict(m, s.noprinc.cbegin(), s.noprinc.cend());
+ m << ", ";
+ }
+
+ m << "Effect: " <<
+ (s.effect == Effect::Allow ?
+ (const char*) "Allow" :
+ (const char*) "Deny");
+
+ if (s.action.any() || s.notaction.any() || !s.resource.empty() ||
+ !s.notresource.empty() || !s.conditions.empty()) {
+ m << ", ";
+ }
+
+ if (s.action.any()) {
+ m << "Action: ";
+ print_actions(m, s.action);
+
+ if (s.notaction.any() || !s.resource.empty() ||
+ !s.notresource.empty() || !s.conditions.empty()) {
+ m << ", ";
+ }
+ }
+
+ if (s.notaction.any()) {
+ m << "NotAction: ";
+ print_actions(m, s.notaction);
+
+ if (!s.resource.empty() || !s.notresource.empty() ||
+ !s.conditions.empty()) {
+ m << ", ";
+ }
+ }
+
+ if (!s.resource.empty()) {
+ m << "Resource: ";
+ print_array(m, s.resource.cbegin(), s.resource.cend());
+
+ if (!s.notresource.empty() || !s.conditions.empty()) {
+ m << ", ";
+ }
+ }
+
+ if (!s.notresource.empty()) {
+ m << "NotResource: ";
+ print_array(m, s.notresource.cbegin(), s.notresource.cend());
+
+ if (!s.conditions.empty()) {
+ m << ", ";
+ }
+ }
+
+ if (!s.conditions.empty()) {
+ m << "Condition: ";
+ print_dict(m, s.conditions.cbegin(), s.conditions.cend());
+ }
+
+ return m << " }";
+}
+
+Policy::Policy(CephContext* cct, const string& tenant,
+ const bufferlist& _text)
+ : text(_text.to_str()) {
+ StringStream ss(text.data());
+ PolicyParser pp(cct, tenant, *this);
+ auto pr = Reader{}.Parse<kParseNumbersAsStringsFlag |
+ kParseCommentsFlag>(ss, pp);
+ if (!pr) {
+ throw PolicyParseException(std::move(pr));
+ }
+}
+
+Effect Policy::eval(const Environment& e,
+ boost::optional<const rgw::auth::Identity&> ida,
+ std::uint64_t action, const ARN& resource) const {
+ auto allowed = false;
+ for (auto& s : statements) {
+ auto g = s.eval(e, ida, action, resource);
+ if (g == Effect::Deny) {
+ return g;
+ } else if (g == Effect::Allow) {
+ allowed = true;
+ }
+ }
+ return allowed ? Effect::Allow : Effect::Pass;
+}
+
+Effect Policy::eval_principal(const Environment& e,
+ boost::optional<const rgw::auth::Identity&> ida) const {
+ auto allowed = false;
+ for (auto& s : statements) {
+ auto g = s.eval_principal(e, ida);
+ if (g == Effect::Deny) {
+ return g;
+ } else if (g == Effect::Allow) {
+ allowed = true;
+ }
+ }
+ return allowed ? Effect::Allow : Effect::Deny;
+}
+
+Effect Policy::eval_conditions(const Environment& e) const {
+ auto allowed = false;
+ for (auto& s : statements) {
+ auto g = s.eval_conditions(e);
+ if (g == Effect::Deny) {
+ return g;
+ } else if (g == Effect::Allow) {
+ allowed = true;
+ }
+ }
+ return allowed ? Effect::Allow : Effect::Deny;
+}
+
+ostream& operator <<(ostream& m, const Policy& p) {
+ m << "{ Version: "
+ << (p.version == Version::v2008_10_17 ? "2008-10-17" : "2012-10-17");
+
+ if (p.id || !p.statements.empty()) {
+ m << ", ";
+ }
+
+ if (p.id) {
+ m << "Id: " << *p.id;
+ if (!p.statements.empty()) {
+ m << ", ";
+ }
+ }
+
+ if (!p.statements.empty()) {
+ m << "Statements: ";
+ print_array(m, p.statements.cbegin(), p.statements.cend());
+ m << ", ";
+ }
+ return m << " }";
+}
+
+}
+}
diff --git a/src/rgw/rgw_iam_policy.h b/src/rgw/rgw_iam_policy.h
new file mode 100644
index 00000000..8f7875ca
--- /dev/null
+++ b/src/rgw/rgw_iam_policy.h
@@ -0,0 +1,480 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_IAM_POLICY_H
+#define CEPH_RGW_IAM_POLICY_H
+
+#include <bitset>
+#include <chrono>
+#include <cstdint>
+#include <iostream>
+#include <string>
+
+#include <boost/algorithm/string/predicate.hpp>
+#include <boost/container/flat_map.hpp>
+#include <boost/container/flat_set.hpp>
+#include <boost/optional.hpp>
+#include <boost/thread/shared_mutex.hpp>
+#include <boost/utility/string_ref.hpp>
+#include <boost/variant.hpp>
+
+#include "common/ceph_time.h"
+#include "common/iso_8601.h"
+
+#include "rapidjson/error/error.h"
+#include "rapidjson/error/en.h"
+
+#include "rgw_acl.h"
+#include "rgw_basic_types.h"
+#include "rgw_iam_policy_keywords.h"
+#include "rgw_string.h"
+#include "rgw_arn.h"
+
+class RGWRados;
+namespace rgw {
+namespace auth {
+class Identity;
+}
+}
+struct rgw_obj;
+struct rgw_bucket;
+
+namespace rgw {
+namespace IAM {
+
+static constexpr std::uint64_t s3GetObject = 0;
+static constexpr std::uint64_t s3GetObjectVersion = 1;
+static constexpr std::uint64_t s3PutObject = 2;
+static constexpr std::uint64_t s3GetObjectAcl = 3;
+static constexpr std::uint64_t s3GetObjectVersionAcl = 4;
+static constexpr std::uint64_t s3PutObjectAcl = 5;
+static constexpr std::uint64_t s3PutObjectVersionAcl = 6;
+static constexpr std::uint64_t s3DeleteObject = 7;
+static constexpr std::uint64_t s3DeleteObjectVersion = 8;
+static constexpr std::uint64_t s3ListMultipartUploadParts = 9;
+static constexpr std::uint64_t s3AbortMultipartUpload = 10;
+static constexpr std::uint64_t s3GetObjectTorrent = 11;
+static constexpr std::uint64_t s3GetObjectVersionTorrent = 12;
+static constexpr std::uint64_t s3RestoreObject = 13;
+static constexpr std::uint64_t s3CreateBucket = 14;
+static constexpr std::uint64_t s3DeleteBucket = 15;
+static constexpr std::uint64_t s3ListBucket = 16;
+static constexpr std::uint64_t s3ListBucketVersions = 17;
+static constexpr std::uint64_t s3ListAllMyBuckets = 18;
+static constexpr std::uint64_t s3ListBucketMultipartUploads = 19;
+static constexpr std::uint64_t s3GetAccelerateConfiguration = 20;
+static constexpr std::uint64_t s3PutAccelerateConfiguration = 21;
+static constexpr std::uint64_t s3GetBucketAcl = 22;
+static constexpr std::uint64_t s3PutBucketAcl = 23;
+static constexpr std::uint64_t s3GetBucketCORS = 24;
+static constexpr std::uint64_t s3PutBucketCORS = 25;
+static constexpr std::uint64_t s3GetBucketVersioning = 26;
+static constexpr std::uint64_t s3PutBucketVersioning = 27;
+static constexpr std::uint64_t s3GetBucketRequestPayment = 28;
+static constexpr std::uint64_t s3PutBucketRequestPayment = 29;
+static constexpr std::uint64_t s3GetBucketLocation = 30;
+static constexpr std::uint64_t s3GetBucketPolicy = 31;
+static constexpr std::uint64_t s3DeleteBucketPolicy = 32;
+static constexpr std::uint64_t s3PutBucketPolicy = 33;
+static constexpr std::uint64_t s3GetBucketNotification = 34;
+static constexpr std::uint64_t s3PutBucketNotification = 35;
+static constexpr std::uint64_t s3GetBucketLogging = 36;
+static constexpr std::uint64_t s3PutBucketLogging = 37;
+static constexpr std::uint64_t s3GetBucketTagging = 38;
+static constexpr std::uint64_t s3PutBucketTagging = 39;
+static constexpr std::uint64_t s3GetBucketWebsite = 40;
+static constexpr std::uint64_t s3PutBucketWebsite = 41;
+static constexpr std::uint64_t s3DeleteBucketWebsite = 42;
+static constexpr std::uint64_t s3GetLifecycleConfiguration = 43;
+static constexpr std::uint64_t s3PutLifecycleConfiguration = 44;
+static constexpr std::uint64_t s3PutReplicationConfiguration = 45;
+static constexpr std::uint64_t s3GetReplicationConfiguration = 46;
+static constexpr std::uint64_t s3DeleteReplicationConfiguration = 47;
+static constexpr std::uint64_t s3GetObjectTagging = 48;
+static constexpr std::uint64_t s3PutObjectTagging = 49;
+static constexpr std::uint64_t s3DeleteObjectTagging = 50;
+static constexpr std::uint64_t s3GetObjectVersionTagging = 51;
+static constexpr std::uint64_t s3PutObjectVersionTagging = 52;
+static constexpr std::uint64_t s3DeleteObjectVersionTagging = 53;
+static constexpr std::uint64_t s3PutBucketObjectLockConfiguration = 54;
+static constexpr std::uint64_t s3GetBucketObjectLockConfiguration = 55;
+static constexpr std::uint64_t s3PutObjectRetention = 56;
+static constexpr std::uint64_t s3GetObjectRetention = 57;
+static constexpr std::uint64_t s3PutObjectLegalHold = 58;
+static constexpr std::uint64_t s3GetObjectLegalHold = 59;
+static constexpr std::uint64_t s3BypassGovernanceRetention = 60;
+static constexpr std::uint64_t s3All = 61;
+
+static constexpr std::uint64_t iamPutUserPolicy = 62;
+static constexpr std::uint64_t iamGetUserPolicy = 63;
+static constexpr std::uint64_t iamDeleteUserPolicy = 64;
+static constexpr std::uint64_t iamListUserPolicies = 65;
+static constexpr std::uint64_t iamCreateRole = 66;
+static constexpr std::uint64_t iamDeleteRole = 67;
+static constexpr std::uint64_t iamModifyRole = 68;
+static constexpr std::uint64_t iamGetRole = 69;
+static constexpr std::uint64_t iamListRoles = 70;
+static constexpr std::uint64_t iamPutRolePolicy = 71;
+static constexpr std::uint64_t iamGetRolePolicy = 72;
+static constexpr std::uint64_t iamListRolePolicies = 73;
+static constexpr std::uint64_t iamDeleteRolePolicy = 74;
+static constexpr std::uint64_t iamAll = 75;
+static constexpr std::uint64_t stsAssumeRole = 76;
+static constexpr std::uint64_t stsAssumeRoleWithWebIdentity = 77;
+static constexpr std::uint64_t stsGetSessionToken = 78;
+static constexpr std::uint64_t stsAll = 79;
+
+static constexpr std::uint64_t s3Count = s3BypassGovernanceRetention + 1;
+static constexpr std::uint64_t allCount = stsAll + 1;
+
+using Action_t = std::bitset<allCount>;
+using NotAction_t = Action_t;
+
+static const Action_t None(0);
+static const Action_t s3AllValue("1111111111111111111111111111111111111111111111111111111111111");
+static const Action_t iamAllValue("111111111111100000000000000000000000000000000000000000000000000000000000000");
+static const Action_t stsAllValue("1110000000000000000000000000000000000000000000000000000000000000000000000000000");
+//Modify allValue if more Actions are added
+static const Action_t allValue("11111111111111111111111111111111111111111111111111111111111111111111111111111111");
+
+namespace {
+// Please update the table in doc/radosgw/s3/authentication.rst if you
+// modify this function.
+inline int op_to_perm(std::uint64_t op) {
+ switch (op) {
+ case s3GetObject:
+ case s3GetObjectTorrent:
+ case s3GetObjectVersion:
+ case s3GetObjectVersionTorrent:
+ case s3GetObjectTagging:
+ case s3GetObjectVersionTagging:
+ case s3GetObjectRetention:
+ case s3GetObjectLegalHold:
+ case s3ListAllMyBuckets:
+ case s3ListBucket:
+ case s3ListBucketMultipartUploads:
+ case s3ListBucketVersions:
+ case s3ListMultipartUploadParts:
+ return RGW_PERM_READ;
+
+ case s3AbortMultipartUpload:
+ case s3CreateBucket:
+ case s3DeleteBucket:
+ case s3DeleteObject:
+ case s3DeleteObjectVersion:
+ case s3PutObject:
+ case s3PutObjectTagging:
+ case s3PutObjectVersionTagging:
+ case s3DeleteObjectTagging:
+ case s3DeleteObjectVersionTagging:
+ case s3RestoreObject:
+ case s3PutObjectRetention:
+ case s3PutObjectLegalHold:
+ case s3BypassGovernanceRetention:
+ return RGW_PERM_WRITE;
+
+ case s3GetAccelerateConfiguration:
+ case s3GetBucketAcl:
+ case s3GetBucketCORS:
+ case s3GetBucketLocation:
+ case s3GetBucketLogging:
+ case s3GetBucketNotification:
+ case s3GetBucketPolicy:
+ case s3GetBucketRequestPayment:
+ case s3GetBucketTagging:
+ case s3GetBucketVersioning:
+ case s3GetBucketWebsite:
+ case s3GetLifecycleConfiguration:
+ case s3GetObjectAcl:
+ case s3GetObjectVersionAcl:
+ case s3GetReplicationConfiguration:
+ case s3GetBucketObjectLockConfiguration:
+ return RGW_PERM_READ_ACP;
+
+ case s3DeleteBucketPolicy:
+ case s3DeleteBucketWebsite:
+ case s3DeleteReplicationConfiguration:
+ case s3PutAccelerateConfiguration:
+ case s3PutBucketAcl:
+ case s3PutBucketCORS:
+ case s3PutBucketLogging:
+ case s3PutBucketNotification:
+ case s3PutBucketPolicy:
+ case s3PutBucketRequestPayment:
+ case s3PutBucketTagging:
+ case s3PutBucketVersioning:
+ case s3PutBucketWebsite:
+ case s3PutLifecycleConfiguration:
+ case s3PutObjectAcl:
+ case s3PutObjectVersionAcl:
+ case s3PutReplicationConfiguration:
+ case s3PutBucketObjectLockConfiguration:
+ return RGW_PERM_WRITE_ACP;
+
+ case s3All:
+ return RGW_PERM_FULL_CONTROL;
+ }
+ return RGW_PERM_INVALID;
+}
+}
+
+using Environment = boost::container::flat_map<std::string, std::string>;
+
+using Address = std::bitset<128>;
+struct MaskedIP {
+ bool v6;
+ Address addr;
+ // Since we're mapping IPv6 to IPv4 addresses, we may want to
+ // consider making the prefix always be in terms of a v6 address
+ // and just use the v6 bit to rewrite it as a v4 prefix for
+ // output.
+ unsigned int prefix;
+};
+
+std::ostream& operator <<(std::ostream& m, const MaskedIP& ip);
+
+inline bool operator ==(const MaskedIP& l, const MaskedIP& r) {
+ auto shift = std::max((l.v6 ? 128 : 32) - ((int) l.prefix),
+ (r.v6 ? 128 : 32) - ((int) r.prefix));
+ ceph_assert(shift >= 0);
+ return (l.addr >> shift) == (r.addr >> shift);
+}
+
+struct Condition {
+ TokenID op;
+ // Originally I was going to use a perfect hash table, but Marcus
+ // says keys are to be added at run-time not compile time.
+
+ // In future development, use symbol internment.
+ std::string key;
+ bool ifexists = false;
+ // Much to my annoyance there is no actual way to do this in a
+ // typed way that is compatible with AWS. I know this because I've
+ // seen examples where the same value is used as a string in one
+ // context and a date in another.
+ std::vector<std::string> vals;
+
+ Condition() = default;
+ Condition(TokenID op, const char* s, std::size_t len, bool ifexists)
+ : op(op), key(s, len), ifexists(ifexists) {}
+
+ bool eval(const Environment& e) const;
+
+ static boost::optional<double> as_number(const std::string& s) {
+ std::size_t p = 0;
+
+ try {
+ double d = std::stod(s, &p);
+ if (p < s.length()) {
+ return boost::none;
+ }
+
+ return d;
+ } catch (const std::logic_error& e) {
+ return boost::none;
+ }
+ }
+
+ static boost::optional<ceph::real_time> as_date(const std::string& s) {
+ std::size_t p = 0;
+
+ try {
+ double d = std::stod(s, &p);
+ if (p == s.length()) {
+ return ceph::real_time(
+ std::chrono::seconds(static_cast<uint64_t>(d)) +
+ std::chrono::nanoseconds(
+ static_cast<uint64_t>((d - static_cast<uint64_t>(d))
+ * 1000000000)));
+ }
+
+ return from_iso_8601(boost::string_ref(s), false);
+ } catch (const std::logic_error& e) {
+ return boost::none;
+ }
+ }
+
+ static boost::optional<bool> as_bool(const std::string& s) {
+ std::size_t p = 0;
+
+ if (s.empty() || boost::iequals(s, "false")) {
+ return false;
+ }
+
+ try {
+ double d = std::stod(s, &p);
+ if (p == s.length()) {
+ return !((d == +0.0) || (d == -0.0) || std::isnan(d));
+ }
+ } catch (const std::logic_error& e) {
+ // Fallthrough
+ }
+
+ return true;
+ }
+
+ static boost::optional<ceph::bufferlist> as_binary(const std::string& s) {
+ // In a just world
+ ceph::bufferlist base64;
+ // I could populate a bufferlist
+ base64.push_back(buffer::create_static(
+ s.length(),
+ const_cast<char*>(s.data()))); // Yuck
+ // From a base64 encoded std::string.
+ ceph::bufferlist bin;
+
+ try {
+ bin.decode_base64(base64);
+ } catch (const ceph::buffer::malformed_input& e) {
+ return boost::none;
+ }
+ return bin;
+ }
+
+ static boost::optional<MaskedIP> as_network(const std::string& s);
+
+
+ struct ci_equal_to {
+ bool operator ()(const std::string& s1,
+ const std::string& s2) const {
+ return boost::iequals(s1, s2);
+ }
+ };
+
+ struct string_like {
+ bool operator ()(const std::string& input,
+ const std::string& pattern) const {
+ return match_wildcards(pattern, input, 0);
+ }
+ };
+
+ struct ci_starts_with {
+ bool operator()(const std::string& s1,
+ const std::string& s2) const {
+ return boost::istarts_with(s1, s2);
+ }
+ };
+
+ template<typename F>
+ static bool orrible(F&& f, const std::string& c,
+ const std::vector<std::string>& v) {
+ for (const auto& d : v) {
+ if (std::forward<F>(f)(c, d)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ template<typename F, typename X>
+ static bool shortible(F&& f, X& x, const std::string& c,
+ const std::vector<std::string>& v) {
+ auto xc = std::forward<X>(x)(c);
+ if (!xc) {
+ return false;
+ }
+
+ for (const auto& d : v) {
+ auto xd = std::forward<X>(x)(d);
+ if (!xd) {
+ continue;
+ }
+
+ if (std::forward<F>(f)(*xc, *xd)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ template <typename F>
+ bool has_key_p(const std::string& _key, F p) const {
+ return p(key, _key);
+ }
+};
+
+std::ostream& operator <<(std::ostream& m, const Condition& c);
+
+struct Statement {
+ boost::optional<std::string> sid = boost::none;
+
+ boost::container::flat_set<rgw::auth::Principal> princ;
+ boost::container::flat_set<rgw::auth::Principal> noprinc;
+
+ // Every statement MUST provide an effect. I just initialize it to
+ // deny as defensive programming.
+ Effect effect = Effect::Deny;
+
+ Action_t action = 0;
+ NotAction_t notaction = 0;
+
+ boost::container::flat_set<ARN> resource;
+ boost::container::flat_set<ARN> notresource;
+
+ std::vector<Condition> conditions;
+
+ Effect eval(const Environment& e,
+ boost::optional<const rgw::auth::Identity&> ida,
+ std::uint64_t action, const ARN& resource) const;
+
+ Effect eval_principal(const Environment& e,
+ boost::optional<const rgw::auth::Identity&> ida) const;
+
+ Effect eval_conditions(const Environment& e) const;
+};
+
+std::ostream& operator <<(ostream& m, const Statement& s);
+
+struct PolicyParseException : public std::exception {
+ rapidjson::ParseResult pr;
+
+ explicit PolicyParseException(rapidjson::ParseResult&& pr)
+ : pr(pr) { }
+ const char* what() const noexcept override {
+ return rapidjson::GetParseError_En(pr.Code());
+ }
+};
+
+struct Policy {
+ std::string text;
+ Version version = Version::v2008_10_17;
+ boost::optional<std::string> id = boost::none;
+
+ std::vector<Statement> statements;
+
+ Policy(CephContext* cct, const std::string& tenant,
+ const bufferlist& text);
+
+ Effect eval(const Environment& e,
+ boost::optional<const rgw::auth::Identity&> ida,
+ std::uint64_t action, const ARN& resource) const;
+
+ Effect eval_principal(const Environment& e,
+ boost::optional<const rgw::auth::Identity&> ida) const;
+
+ Effect eval_conditions(const Environment& e) const;
+
+ template <typename F>
+ bool has_conditional(const string& conditional, F p) const {
+ for (const auto&s: statements){
+ if (std::any_of(s.conditions.begin(), s.conditions.end(),
+ [&](const Condition& c) { return c.has_key_p(conditional, p);}))
+ return true;
+ }
+ return false;
+ }
+
+ bool has_conditional(const string& c) const {
+ return has_conditional(c, Condition::ci_equal_to());
+ }
+
+ bool has_partial_conditional(const string& c) const {
+ return has_conditional(c, Condition::ci_starts_with());
+ }
+};
+
+std::ostream& operator <<(ostream& m, const Policy& p);
+}
+}
+
+#endif
diff --git a/src/rgw/rgw_iam_policy_keywords.gperf b/src/rgw/rgw_iam_policy_keywords.gperf
new file mode 100644
index 00000000..4f6f22a9
--- /dev/null
+++ b/src/rgw/rgw_iam_policy_keywords.gperf
@@ -0,0 +1,130 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+%language=C++
+%compare-strncmp
+%define class-name keyword_hash
+%define lookup-function-name lookup
+%struct-type
+struct Keyword {
+ const char* name;
+ TokenKind kind;
+ TokenID id;
+ uint64_t specific;
+ bool arrayable;
+ bool objectable;
+};
+%%
+# Top-level
+#
+Version, TokenKind::top, TokenID::Version, 0, false, false
+Id, TokenKind::top, TokenID::Id, 0, false, false
+Statement, TokenKind::top, TokenID::Statement, 0, true, true
+#
+# Statement level
+#
+Sid, TokenKind::statement, TokenID::Sid, 0, false, false
+Effect, TokenKind::statement, TokenID::Effect, 0, false, false
+Principal, TokenKind::statement, TokenID::Principal, 0, false, true
+NotPrincipal, TokenKind::statement, TokenID::NotPrincipal, 0, true, true
+Action, TokenKind::statement, TokenID::Action, 0, true, false
+NotAction, TokenKind::statement, TokenID::NotAction, 0, true, false
+Resource, TokenKind::statement, TokenID::Resource, 0, true, false
+NotResource, TokenKind::statement, TokenID::NotResource, 0, true, false
+Condition, TokenKind::statement, TokenID::Condition, 0, true, true
+#
+# Condition operators
+#
+# String
+StringEquals, TokenKind::cond_op, TokenID::StringEquals, (uint64_t) Type::string, true, true
+StringNotEquals, TokenKind::cond_op, TokenID::StringNotEquals, (uint64_t) Type::string, true, true
+StringEqualsIgnoreCase, TokenKind::cond_op, TokenID::StringEqualsIgnoreCase, (uint64_t) Type::string, true, true
+StringNotEqualsIgnoreCase, TokenKind::cond_op, TokenID::StringNotEqualsIgnoreCase, (uint64_t) Type::string, true, true
+StringLike, TokenKind::cond_op, TokenID::StringLike, (uint64_t) Type::string, true, true,
+StringNotLike, TokenKind::cond_op, TokenID::StringNotLike, (uint64_t) Type::string, true, true
+# Numeric
+NumericEquals, TokenKind::cond_op, TokenID::NumericEquals, (uint64_t) Type::number, true, true
+NumericNotEquals, TokenKind::cond_op, TokenID::NumericNotEquals, (uint64_t) Type::number, true, true
+NumericLessThan, TokenKind::cond_op, TokenID::NumericLessThan, (uint64_t) Type::number, true, true
+NumericLessThanEquals, TokenKind::cond_op, TokenID::NumericLessThanEquals, (uint64_t) Type::number, true, true
+NumericGreaterThan, TokenKind::cond_op, TokenID::NumericGreaterThan, (uint64_t) Type::number, true, true
+NumericGreaterThanEquals, TokenKind::cond_op, TokenID::NumericGreaterThanEquals, (uint64_t) Type::number, true, true
+# Date
+DateEquals, TokenKind::cond_op, TokenID::DateEquals, (uint64_t) Type::date, true, true
+DateNotEquals, TokenKind::cond_op, TokenID::DateNotEquals, (uint64_t) Type::date, true, true
+DateLessThan, TokenKind::cond_op, TokenID::DateLessThan, (uint64_t) Type::date, true, true
+DateLessThanEquals, TokenKind::cond_op, TokenID::DateLessThanEquals, (uint64_t) Type::date, true, true
+DateGreaterThan, TokenKind::cond_op, TokenID::DateGreaterThan, (uint64_t) Type::date, true, true
+DateGreaterThanEquals, TokenKind::cond_op, TokenID::DateGreaterThanEquals, (uint64_t) Type::date, true, true
+# Bool
+Bool, TokenKind::cond_op, TokenID::Bool, (uint64_t) Type::boolean, true, true
+# Binary
+BinaryEquals, TokenKind::cond_op, TokenID::BinaryEquals, (uint64_t) Type::binary, true, true
+# IP Address
+IpAddress, TokenKind::cond_op, TokenID::IpAddress, (uint64_t) Type::ipaddr, true, true
+NotIpAddress, TokenKind::cond_op, TokenID::NotIpAddress, (uint64_t) Type::ipaddr, true, true
+# Amazon Resource Names
+ArnEquals, TokenKind::cond_op, TokenID::ArnEquals, (uint64_t) Type::arn, true, true
+ArnNotEquals, TokenKind::cond_op, TokenID::ArnNotEquals, (uint64_t) Type::arn, true, true
+ArnLike, TokenKind::cond_op, TokenID::ArnLike, (uint64_t) Type::arn, true, true
+ArnNotLike, TokenKind::cond_op, TokenID::ArnNotLike, (uint64_t) Type::arn, true, true
+# Null
+Null, TokenKind::cond_op, TokenID::Null, (uint64_t) Type::null, true, true
+#
+# Condition keys
+#
+# AWS
+#aws:CurrentTime, TokenKind::cond_key, TokenID::awsCurrentTime, (uint64_t) Type::date, true, false
+#aws:EpochTime, TokenKind::cond_key, TokenID::awsEpochTime, (uint64_t) Type::date, true, false
+#aws:TokenIssueTime, TokenKind::cond_key, TokenID::awsTokenIssueTime, (uint64_t) Type::date, true, false
+#aws:MultiFactorAuthPresent, TokenKind::cond_key, TokenID::awsMultiFactorAuthPresent, (uint64_t) Type::boolean, true, false
+#aws:MultiFactorAuthAge, TokenKind::cond_key, TokenID::awsMultiFactorAuthAge, (uint64_t) Type::number, true, false
+#aws:PrincipalType, TokenKind::cond_key, TokenID::awsPrincipalType, (uint64_t) Type::string, true, false
+#aws:Referer, TokenKind::cond_key, TokenID::awsReferer, (uint64_t) Type::string, true, false
+#aws:SecureTransport, TokenKind::cond_key, TokenID::awsSecureTransport, (uint64_t) Type::boolean, true, false
+#aws:SourceArn, TokenKind::cond_key, TokenID::awsSourceArn, (uint64_t) Type::arn, true, false
+#aws:SourceIp, TokenKind::cond_key, TokenID::awsSourceIp, (uint64_t) Type::ipaddr, true, false
+#aws:SourceVpc, TokenKind::cond_key, TokenID::awsSourceVpc, (uint64_t) Type::string, true, false
+#aws:SourceVpce, TokenKind::cond_key, TokenID::awsSourceVpce, (uint64_t) Type::string, true, false
+#aws:UserAgent, TokenKind::cond_key, TokenID::awsUserAgent, (uint64_t) Type::string, true, false
+#aws:userid, TokenKind::cond_key, TokenID::awsuserid, (uint64_t) Type::string, true, false
+#aws:username, TokenKind::cond_key, TokenID::awsusername, (uint64_t) Type::string, true, false
+# S3
+#s3:x-amz-acl, TokenKind::cond_key, TokenID::s3x_amz_acl, (uint64_t) Type::string, true, false
+#s3:x-amz-grant-read, TokenKind::cond_key, TokenID::s3x_amz_grant_permission, (uint64_t) Type::boolean, true, false
+#s3:x-amz-grant-write, TokenKind::cond_key, TokenID::s3x_amz_grant_permission, (uint64_t) Type::boolean, true, false
+#s3:x-amz-grant-read-acp, TokenKind::cond_key, TokenID::s3x_amz_grant_permission, (uint64_t) Type::boolean, true, false
+#s3:x-amz-grant-write-acp, TokenKind::cond_key, TokenID::s3x_amz_grant_permission, (uint64_t) Type::boolean, true, false
+#s3:x-amz-grant-full-control, TokenKind::cond_key, TokenID::s3x_amz_grant_permission, (uint64_t) Type::boolean, true, false
+#s3:x-amz-copy-source, TokenKind::cond_key, TokenID::s3x_amz_copy_source, (uint64_t) Type::string, true, false
+#s3:x-amz-server-side-encryption, TokenKind::cond_key, TokenID::s3x_amz_server_side_encryption, (uint64_t) Type::boolean, true, false
+#s3:x-amz-server-side-encryption-aws-kms-key-id, TokenKind::cond_key, TokenID::s3x_amz_server_side_encryption_aws_kms_key_id, (uint64_t) Type::arn, true, false
+#s3:x-amz-metadata-directive, TokenKind::cond_key, TokenID::s3x_amz_metadata_directive, (uint64_t) Type::string, true, false
+#s3:x-amz-storage-class, TokenKind::cond_key, TokenID::s3x_amz_storage_class, (uint64_t) Type::string, true, false
+#s3:VersionId, TokenKind::cond_key, TokenID::s3VersionId, (uint64_t) Type::string, true, false
+#s3:LocationConstraint, TokenKind::cond_key, TokenID::s3LocationConstraint, (uint64_t) Type::string, true, false
+#s3:prefix, TokenKind::cond_key, TokenID::s3prefix, (uint64_t) Type::string, true, false
+#s3:delimiter, TokenKind::cond_key, TokenID::s3delimiter, (uint64_t) Type::string, true, false
+#s3:max-keys, TokenKind::cond_key, TokenID::s3max_keys, (uint64_t) Type::number, true, false
+#s3:signatureversion, TokenKind::cond_key, TokenID::s3signatureversion, (uint64_t) Type::string, true, false
+#s3:authType, TokenKind::cond_key, TokenID::s3authType, (uint64_t) Type::string, true, false
+#s3:signatureAge, TokenKind::cond_key, TokenID::s3signatureAge, (uint64_t) Type::number, true, false
+#s3:x-amz-content-sha256, TokenKind::cond_key, TokenID::s3x_amz_content_sha256, (uint64_t) Type::string, true, false
+# STS
+#sts:authentication, TokenKind::cond_key, TokenID::stsauthentication, (uint64_t) Type::boolean, true, false
+#
+# Version Keywords
+#
+2008-10-17, TokenKind::version_key, TokenID::v2008_10_17, (uint64_t) Version::v2008_10_17, false, false
+2012-10-17, TokenKind::version_key, TokenID::v2012_10_17, (uint64_t) Version::v2012_10_17, false, false
+#
+# Effect Keywords
+#
+Allow, TokenKind::effect_key, TokenID::Allow, (uint64_t) Effect::Allow, false, false
+Deny, TokenKind::effect_key, TokenID::Deny, (uint64_t) Effect::Deny, false, false
+#
+# Principal types
+#
+AWS, TokenKind::princ_type, TokenID::AWS, 0, true, false
+Federated, TokenKind::princ_type, TokenID::Federated, 0, true, false
+Service, TokenKind::princ_type, TokenID::Service, 0, true, false
+CanonicalUser, TokenKind::princ_type, TokenID::CanonicalUser, 0, true, false
diff --git a/src/rgw/rgw_iam_policy_keywords.h b/src/rgw/rgw_iam_policy_keywords.h
new file mode 100644
index 00000000..a0cd34b6
--- /dev/null
+++ b/src/rgw/rgw_iam_policy_keywords.h
@@ -0,0 +1,139 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_POLICY_S3V2_KEYWORDS_H
+#define CEPH_RGW_POLICY_S3V2_KEYWORDS_H
+
+namespace rgw {
+namespace IAM {
+
+enum class TokenKind {
+ pseudo, top, statement, cond_op, cond_key, version_key, effect_key,
+ princ_type
+};
+
+enum class TokenID {
+ /// Pseudo-token
+ Top,
+
+ /// Top-level tokens
+ Version, Id, Statement,
+
+ /// Statement level tokens
+ Sid, Effect, Principal, NotPrincipal, Action, NotAction,
+ Resource, NotResource, Condition,
+
+ /// Condition Operators!
+ /// Any of these, except Null, can have an IfExists variant.
+
+ // String!
+ StringEquals, StringNotEquals, StringEqualsIgnoreCase,
+ StringNotEqualsIgnoreCase, StringLike, StringNotLike,
+
+ // Numeric!
+ NumericEquals, NumericNotEquals, NumericLessThan, NumericLessThanEquals,
+ NumericGreaterThan, NumericGreaterThanEquals,
+
+ // Date!
+ DateEquals, DateNotEquals, DateLessThan, DateLessThanEquals,
+ DateGreaterThan, DateGreaterThanEquals,
+
+ // Bool!
+ Bool,
+
+ // Binary!
+ BinaryEquals,
+
+ // IP Address!
+ IpAddress, NotIpAddress,
+
+ // Amazon Resource Names! (Does S3 need this?)
+ ArnEquals, ArnNotEquals, ArnLike, ArnNotLike,
+
+ // Null!
+ Null,
+
+#if 0 // Keys are done at runtime now
+
+ /// Condition Keys!
+ awsCurrentTime,
+ awsEpochTime,
+ awsTokenIssueTime,
+ awsMultiFactorAuthPresent,
+ awsMultiFactorAuthAge,
+ awsPrincipalType,
+ awsReferer,
+ awsSecureTransport,
+ awsSourceArn,
+ awsSourceIp,
+ awsSourceVpc,
+ awsSourceVpce,
+ awsUserAgent,
+ awsuserid,
+ awsusername,
+ s3x_amz_acl,
+ s3x_amz_grant_permission,
+ s3x_amz_copy_source,
+ s3x_amz_server_side_encryption,
+ s3x_amz_server_side_encryption_aws_kms_key_id,
+ s3x_amz_metadata_directive,
+ s3x_amz_storage_class,
+ s3VersionId,
+ s3LocationConstraint,
+ s3prefix,
+ s3delimiter,
+ s3max_keys,
+ s3signatureversion,
+ s3authType,
+ s3signatureAge,
+ s3x_amz_content_sha256,
+#else
+ CondKey,
+#endif
+
+ ///
+ /// Versions!
+ ///
+ v2008_10_17,
+ v2012_10_17,
+
+ ///
+ /// Effects!
+ ///
+ Allow,
+ Deny,
+
+ /// Principal Types!
+ AWS,
+ Federated,
+ Service,
+ CanonicalUser
+};
+
+
+enum class Version {
+ v2008_10_17,
+ v2012_10_17
+};
+
+
+enum class Effect {
+ Allow,
+ Deny,
+ Pass
+};
+
+enum class Type {
+ string,
+ number,
+ date,
+ boolean,
+ binary,
+ ipaddr,
+ arn,
+ null
+};
+}
+}
+
+#endif // CEPH_RGW_POLICY_S3V2_KEYWORDS_H
diff --git a/src/rgw/rgw_json_enc.cc b/src/rgw/rgw_json_enc.cc
new file mode 100644
index 00000000..5804d7c7
--- /dev/null
+++ b/src/rgw/rgw_json_enc.cc
@@ -0,0 +1,1777 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "rgw_common.h"
+#include "rgw_rados.h"
+#include "rgw_zone.h"
+#include "rgw_log.h"
+#include "rgw_acl.h"
+#include "rgw_acl_s3.h"
+#include "rgw_cache.h"
+#include "rgw_bucket.h"
+#include "rgw_keystone.h"
+#include "rgw_basic_types.h"
+#include "rgw_op.h"
+#include "rgw_data_sync.h"
+#include "rgw_sync.h"
+#include "rgw_orphan.h"
+
+#include "common/ceph_json.h"
+#include "common/Formatter.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+void encode_json(const char *name, const obj_version& v, Formatter *f)
+{
+ f->open_object_section(name);
+ f->dump_string("tag", v.tag);
+ f->dump_unsigned("ver", v.ver);
+ f->close_section();
+}
+
+void decode_json_obj(obj_version& v, JSONObj *obj)
+{
+ JSONDecoder::decode_json("tag", v.tag, obj);
+ JSONDecoder::decode_json("ver", v.ver, obj);
+}
+
+void encode_json(const char *name, const RGWUserCaps& val, Formatter *f)
+{
+ val.dump(f, name);
+}
+
+
+void encode_json(const char *name, const rgw_pool& pool, Formatter *f)
+{
+ f->dump_string(name, pool.to_str());
+}
+
+void decode_json_obj(rgw_pool& pool, JSONObj *obj)
+{
+ string s;
+ decode_json_obj(s, obj);
+ pool = rgw_pool(s);
+}
+
+void encode_json(const char *name, const rgw_placement_rule& r, Formatter *f)
+{
+ encode_json(name, r.to_str(), f);
+}
+
+void decode_json_obj(rgw_placement_rule& v, JSONObj *obj)
+{
+ string s;
+ decode_json_obj(s, obj);
+ v.from_str(s);
+}
+
+void RGWOLHInfo::dump(Formatter *f) const
+{
+ encode_json("target", target, f);
+}
+
+void RGWOLHPendingInfo::dump(Formatter *f) const
+{
+ utime_t ut(time);
+ encode_json("time", ut, f);
+}
+
+void RGWObjManifestPart::dump(Formatter *f) const
+{
+ f->open_object_section("loc");
+ loc.dump(f);
+ f->close_section();
+ f->dump_unsigned("loc_ofs", loc_ofs);
+ f->dump_unsigned("size", size);
+}
+
+void RGWObjManifestRule::dump(Formatter *f) const
+{
+ encode_json("start_part_num", start_part_num, f);
+ encode_json("start_ofs", start_ofs, f);
+ encode_json("part_size", part_size, f);
+ encode_json("stripe_max_size", stripe_max_size, f);
+ encode_json("override_prefix", override_prefix, f);
+}
+
+void rgw_bucket_placement::dump(Formatter *f) const
+{
+ encode_json("bucket", bucket, f);
+ encode_json("placement_rule", placement_rule, f);
+}
+
+void rgw_obj_select::dump(Formatter *f) const
+{
+ f->dump_string("placement_rule", placement_rule.to_str());
+ f->dump_object("obj", obj);
+ f->dump_object("raw_obj", raw_obj);
+ f->dump_bool("is_raw", is_raw);
+}
+
+void RGWObjManifest::obj_iterator::dump(Formatter *f) const
+{
+ f->dump_unsigned("part_ofs", part_ofs);
+ f->dump_unsigned("stripe_ofs", stripe_ofs);
+ f->dump_unsigned("ofs", ofs);
+ f->dump_unsigned("stripe_size", stripe_size);
+ f->dump_int("cur_part_id", cur_part_id);
+ f->dump_int("cur_stripe", cur_stripe);
+ f->dump_string("cur_override_prefix", cur_override_prefix);
+ f->dump_object("location", location);
+}
+
+void RGWObjManifest::dump(Formatter *f) const
+{
+ map<uint64_t, RGWObjManifestPart>::const_iterator iter = objs.begin();
+ f->open_array_section("objs");
+ for (; iter != objs.end(); ++iter) {
+ f->dump_unsigned("ofs", iter->first);
+ f->open_object_section("part");
+ iter->second.dump(f);
+ f->close_section();
+ }
+ f->close_section();
+ f->dump_unsigned("obj_size", obj_size);
+ ::encode_json("explicit_objs", explicit_objs, f);
+ ::encode_json("head_size", head_size, f);
+ ::encode_json("max_head_size", max_head_size, f);
+ ::encode_json("prefix", prefix, f);
+ ::encode_json("rules", rules, f);
+ ::encode_json("tail_instance", tail_instance, f);
+ ::encode_json("tail_placement", tail_placement, f);
+
+ f->dump_object("begin_iter", begin_iter);
+ f->dump_object("end_iter", end_iter);
+}
+
+void rgw_log_entry::dump(Formatter *f) const
+{
+ f->dump_string("object_owner", object_owner.to_str());
+ f->dump_string("bucket_owner", bucket_owner.to_str());
+ f->dump_string("bucket", bucket);
+ f->dump_stream("time") << time;
+ f->dump_string("remote_addr", remote_addr);
+ f->dump_string("user", user);
+ stringstream s;
+ s << obj;
+ f->dump_string("obj", s.str());
+ f->dump_string("op", op);
+ f->dump_string("uri", uri);
+ f->dump_string("http_status", http_status);
+ f->dump_string("error_code", error_code);
+ f->dump_unsigned("bytes_sent", bytes_sent);
+ f->dump_unsigned("bytes_received", bytes_received);
+ f->dump_unsigned("obj_size", obj_size);
+ f->dump_stream("total_time") << total_time;
+ f->dump_string("user_agent", user_agent);
+ f->dump_string("referrer", referrer);
+ f->dump_string("bucket_id", bucket_id);
+}
+
+void ACLPermission::dump(Formatter *f) const
+{
+ f->dump_int("flags", flags);
+}
+
+void ACLGranteeType::dump(Formatter *f) const
+{
+ f->dump_unsigned("type", type);
+}
+
+void ACLGrant::dump(Formatter *f) const
+{
+ f->open_object_section("type");
+ type.dump(f);
+ f->close_section();
+
+ f->dump_string("id", id.to_str());
+ f->dump_string("email", email);
+
+ f->open_object_section("permission");
+ permission.dump(f);
+ f->close_section();
+
+ f->dump_string("name", name);
+ f->dump_int("group", (int)group);
+ f->dump_string("url_spec", url_spec);
+}
+
+void RGWAccessControlList::dump(Formatter *f) const
+{
+ map<string, int>::const_iterator acl_user_iter = acl_user_map.begin();
+ f->open_array_section("acl_user_map");
+ for (; acl_user_iter != acl_user_map.end(); ++acl_user_iter) {
+ f->open_object_section("entry");
+ f->dump_string("user", acl_user_iter->first);
+ f->dump_int("acl", acl_user_iter->second);
+ f->close_section();
+ }
+ f->close_section();
+
+ map<uint32_t, int>::const_iterator acl_group_iter = acl_group_map.begin();
+ f->open_array_section("acl_group_map");
+ for (; acl_group_iter != acl_group_map.end(); ++acl_group_iter) {
+ f->open_object_section("entry");
+ f->dump_unsigned("group", acl_group_iter->first);
+ f->dump_int("acl", acl_group_iter->second);
+ f->close_section();
+ }
+ f->close_section();
+
+ multimap<string, ACLGrant>::const_iterator giter = grant_map.begin();
+ f->open_array_section("grant_map");
+ for (; giter != grant_map.end(); ++giter) {
+ f->open_object_section("entry");
+ f->dump_string("id", giter->first);
+ f->open_object_section("grant");
+ giter->second.dump(f);
+ f->close_section();
+ f->close_section();
+ }
+ f->close_section();
+}
+
+void ACLOwner::dump(Formatter *f) const
+{
+ encode_json("id", id.to_str(), f);
+ encode_json("display_name", display_name, f);
+}
+
+void ACLOwner::decode_json(JSONObj *obj) {
+ string id_str;
+ JSONDecoder::decode_json("id", id_str, obj);
+ id.from_str(id_str);
+ JSONDecoder::decode_json("display_name", display_name, obj);
+}
+
+void RGWAccessControlPolicy::dump(Formatter *f) const
+{
+ encode_json("acl", acl, f);
+ encode_json("owner", owner, f);
+}
+
+void ObjectMetaInfo::dump(Formatter *f) const
+{
+ encode_json("size", size, f);
+ encode_json("mtime", utime_t(mtime), f);
+}
+
+void ObjectCacheInfo::dump(Formatter *f) const
+{
+ encode_json("status", status, f);
+ encode_json("flags", flags, f);
+ encode_json("data", data, f);
+ encode_json_map("xattrs", "name", "value", "length", xattrs, f);
+ encode_json_map("rm_xattrs", "name", "value", "length", rm_xattrs, f);
+ encode_json("meta", meta, f);
+
+}
+
+void RGWCacheNotifyInfo::dump(Formatter *f) const
+{
+ encode_json("op", op, f);
+ encode_json("obj", obj, f);
+ encode_json("obj_info", obj_info, f);
+ encode_json("ofs", ofs, f);
+ encode_json("ns", ns, f);
+}
+
+void RGWAccessKey::dump(Formatter *f) const
+{
+ encode_json("access_key", id, f);
+ encode_json("secret_key", key, f);
+ encode_json("subuser", subuser, f);
+}
+
+void RGWAccessKey::dump_plain(Formatter *f) const
+{
+ encode_json("access_key", id, f);
+ encode_json("secret_key", key, f);
+}
+
+void encode_json_plain(const char *name, const RGWAccessKey& val, Formatter *f)
+{
+ f->open_object_section(name);
+ val.dump_plain(f);
+ f->close_section();
+}
+
+void RGWAccessKey::dump(Formatter *f, const string& user, bool swift) const
+{
+ string u = user;
+ if (!subuser.empty()) {
+ u.append(":");
+ u.append(subuser);
+ }
+ encode_json("user", u, f);
+ if (!swift) {
+ encode_json("access_key", id, f);
+ }
+ encode_json("secret_key", key, f);
+}
+
+void RGWAccessKey::decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("access_key", id, obj, true);
+ JSONDecoder::decode_json("secret_key", key, obj, true);
+ if (!JSONDecoder::decode_json("subuser", subuser, obj)) {
+ string user;
+ JSONDecoder::decode_json("user", user, obj);
+ int pos = user.find(':');
+ if (pos >= 0) {
+ subuser = user.substr(pos + 1);
+ }
+ }
+}
+
+void RGWAccessKey::decode_json(JSONObj *obj, bool swift) {
+ if (!swift) {
+ decode_json(obj);
+ return;
+ }
+
+ if (!JSONDecoder::decode_json("subuser", subuser, obj)) {
+ JSONDecoder::decode_json("user", id, obj, true);
+ int pos = id.find(':');
+ if (pos >= 0) {
+ subuser = id.substr(pos + 1);
+ }
+ }
+ JSONDecoder::decode_json("secret_key", key, obj, true);
+}
+
+struct rgw_flags_desc {
+ uint32_t mask;
+ const char *str;
+};
+
+static struct rgw_flags_desc rgw_perms[] = {
+ { RGW_PERM_FULL_CONTROL, "full-control" },
+ { RGW_PERM_READ | RGW_PERM_WRITE, "read-write" },
+ { RGW_PERM_READ, "read" },
+ { RGW_PERM_WRITE, "write" },
+ { RGW_PERM_READ_ACP, "read-acp" },
+ { RGW_PERM_WRITE_ACP, "write-acp" },
+ { 0, NULL }
+};
+
+static void mask_to_str(rgw_flags_desc *mask_list, uint32_t mask, char *buf, int len)
+{
+ const char *sep = "";
+ int pos = 0;
+ if (!mask) {
+ snprintf(buf, len, "<none>");
+ return;
+ }
+ while (mask) {
+ uint32_t orig_mask = mask;
+ for (int i = 0; mask_list[i].mask; i++) {
+ struct rgw_flags_desc *desc = &mask_list[i];
+ if ((mask & desc->mask) == desc->mask) {
+ pos += snprintf(buf + pos, len - pos, "%s%s", sep, desc->str);
+ if (pos == len)
+ return;
+ sep = ", ";
+ mask &= ~desc->mask;
+ if (!mask)
+ return;
+ }
+ }
+ if (mask == orig_mask) // no change
+ break;
+ }
+}
+
+static void perm_to_str(uint32_t mask, char *buf, int len)
+{
+ return mask_to_str(rgw_perms, mask, buf, len);
+}
+
+static struct rgw_flags_desc op_type_flags[] = {
+ { RGW_OP_TYPE_READ, "read" },
+ { RGW_OP_TYPE_WRITE, "write" },
+ { RGW_OP_TYPE_DELETE, "delete" },
+ { 0, NULL }
+};
+
+extern void op_type_to_str(uint32_t mask, char *buf, int len)
+{
+ return mask_to_str(op_type_flags, mask, buf, len);
+}
+
+void RGWSubUser::dump(Formatter *f) const
+{
+ encode_json("id", name, f);
+ char buf[256];
+ perm_to_str(perm_mask, buf, sizeof(buf));
+ encode_json("permissions", (const char *)buf, f);
+}
+
+void RGWSubUser::dump(Formatter *f, const string& user) const
+{
+ string s = user;
+ s.append(":");
+ s.append(name);
+ encode_json("id", s, f);
+ char buf[256];
+ perm_to_str(perm_mask, buf, sizeof(buf));
+ encode_json("permissions", (const char *)buf, f);
+}
+
+static uint32_t str_to_perm(const string& s)
+{
+ if (s.compare("read") == 0)
+ return RGW_PERM_READ;
+ else if (s.compare("write") == 0)
+ return RGW_PERM_WRITE;
+ else if (s.compare("read-write") == 0)
+ return RGW_PERM_READ | RGW_PERM_WRITE;
+ else if (s.compare("full-control") == 0)
+ return RGW_PERM_FULL_CONTROL;
+ return 0;
+}
+
+void RGWSubUser::decode_json(JSONObj *obj)
+{
+ string uid;
+ JSONDecoder::decode_json("id", uid, obj);
+ int pos = uid.find(':');
+ if (pos >= 0)
+ name = uid.substr(pos + 1);
+ string perm_str;
+ JSONDecoder::decode_json("permissions", perm_str, obj);
+ perm_mask = str_to_perm(perm_str);
+}
+
+static void user_info_dump_subuser(const char *name, const RGWSubUser& subuser, Formatter *f, void *parent)
+{
+ RGWUserInfo *info = static_cast<RGWUserInfo *>(parent);
+ subuser.dump(f, info->user_id.to_str());
+}
+
+static void user_info_dump_key(const char *name, const RGWAccessKey& key, Formatter *f, void *parent)
+{
+ RGWUserInfo *info = static_cast<RGWUserInfo *>(parent);
+ key.dump(f, info->user_id.to_str(), false);
+}
+
+static void user_info_dump_swift_key(const char *name, const RGWAccessKey& key, Formatter *f, void *parent)
+{
+ RGWUserInfo *info = static_cast<RGWUserInfo *>(parent);
+ key.dump(f, info->user_id.to_str(), true);
+}
+
+void RGWUserInfo::dump(Formatter *f) const
+{
+
+ encode_json("user_id", user_id.to_str(), f);
+ encode_json("display_name", display_name, f);
+ encode_json("email", user_email, f);
+ encode_json("suspended", (int)suspended, f);
+ encode_json("max_buckets", (int)max_buckets, f);
+
+ encode_json_map("subusers", NULL, "subuser", NULL, user_info_dump_subuser,(void *)this, subusers, f);
+ encode_json_map("keys", NULL, "key", NULL, user_info_dump_key,(void *)this, access_keys, f);
+ encode_json_map("swift_keys", NULL, "key", NULL, user_info_dump_swift_key,(void *)this, swift_keys, f);
+
+ encode_json("caps", caps, f);
+
+ char buf[256];
+ op_type_to_str(op_mask, buf, sizeof(buf));
+ encode_json("op_mask", (const char *)buf, f);
+
+ if (system) { /* no need to show it for every user */
+ encode_json("system", (bool)system, f);
+ }
+ if (admin) {
+ encode_json("admin", (bool)admin, f);
+ }
+ encode_json("default_placement", default_placement.name, f);
+ encode_json("default_storage_class", default_placement.storage_class, f);
+ encode_json("placement_tags", placement_tags, f);
+ encode_json("bucket_quota", bucket_quota, f);
+ encode_json("user_quota", user_quota, f);
+ encode_json("temp_url_keys", temp_url_keys, f);
+
+ string user_source_type;
+ switch ((RGWIdentityType)type) {
+ case TYPE_RGW:
+ user_source_type = "rgw";
+ break;
+ case TYPE_KEYSTONE:
+ user_source_type = "keystone";
+ break;
+ case TYPE_LDAP:
+ user_source_type = "ldap";
+ break;
+ case TYPE_NONE:
+ user_source_type = "none";
+ break;
+ default:
+ user_source_type = "none";
+ break;
+ }
+ encode_json("type", user_source_type, f);
+ encode_json("mfa_ids", mfa_ids, f);
+}
+
+
+static void decode_access_keys(map<string, RGWAccessKey>& m, JSONObj *o)
+{
+ RGWAccessKey k;
+ k.decode_json(o);
+ m[k.id] = k;
+}
+
+static void decode_swift_keys(map<string, RGWAccessKey>& m, JSONObj *o)
+{
+ RGWAccessKey k;
+ k.decode_json(o, true);
+ m[k.id] = k;
+}
+
+static void decode_subusers(map<string, RGWSubUser>& m, JSONObj *o)
+{
+ RGWSubUser u;
+ u.decode_json(o);
+ m[u.name] = u;
+}
+
+void RGWUserInfo::decode_json(JSONObj *obj)
+{
+ string uid;
+
+ JSONDecoder::decode_json("user_id", uid, obj, true);
+ user_id.from_str(uid);
+
+ JSONDecoder::decode_json("display_name", display_name, obj);
+ JSONDecoder::decode_json("email", user_email, obj);
+ bool susp = false;
+ JSONDecoder::decode_json("suspended", susp, obj);
+ suspended = (__u8)susp;
+ JSONDecoder::decode_json("max_buckets", max_buckets, obj);
+
+ JSONDecoder::decode_json("keys", access_keys, decode_access_keys, obj);
+ JSONDecoder::decode_json("swift_keys", swift_keys, decode_swift_keys, obj);
+ JSONDecoder::decode_json("subusers", subusers, decode_subusers, obj);
+
+ JSONDecoder::decode_json("caps", caps, obj);
+
+ string mask_str;
+ JSONDecoder::decode_json("op_mask", mask_str, obj);
+ rgw_parse_op_type_list(mask_str, &op_mask);
+
+ bool sys = false;
+ JSONDecoder::decode_json("system", sys, obj);
+ system = (__u8)sys;
+ bool ad = false;
+ JSONDecoder::decode_json("admin", ad, obj);
+ admin = (__u8)ad;
+ JSONDecoder::decode_json("default_placement", default_placement.name, obj);
+ JSONDecoder::decode_json("default_storage_class", default_placement.storage_class, obj);
+ JSONDecoder::decode_json("placement_tags", placement_tags, obj);
+ JSONDecoder::decode_json("bucket_quota", bucket_quota, obj);
+ JSONDecoder::decode_json("user_quota", user_quota, obj);
+ JSONDecoder::decode_json("temp_url_keys", temp_url_keys, obj);
+
+ string user_source_type;
+ JSONDecoder::decode_json("type", user_source_type, obj);
+ if (user_source_type == "rgw") {
+ type = TYPE_RGW;
+ } else if (user_source_type == "keystone") {
+ type = TYPE_KEYSTONE;
+ } else if (user_source_type == "ldap") {
+ type = TYPE_LDAP;
+ } else if (user_source_type == "none") {
+ type = TYPE_NONE;
+ }
+ JSONDecoder::decode_json("mfa_ids", mfa_ids, obj);
+}
+
+void RGWQuotaInfo::dump(Formatter *f) const
+{
+ f->dump_bool("enabled", enabled);
+ f->dump_bool("check_on_raw", check_on_raw);
+
+ f->dump_int("max_size", max_size);
+ f->dump_int("max_size_kb", rgw_rounded_kb(max_size));
+ f->dump_int("max_objects", max_objects);
+}
+
+void RGWQuotaInfo::decode_json(JSONObj *obj)
+{
+ if (false == JSONDecoder::decode_json("max_size", max_size, obj)) {
+ /* We're parsing an older version of the struct. */
+ int64_t max_size_kb = 0;
+
+ JSONDecoder::decode_json("max_size_kb", max_size_kb, obj);
+ max_size = max_size_kb * 1024;
+ }
+ JSONDecoder::decode_json("max_objects", max_objects, obj);
+
+ JSONDecoder::decode_json("check_on_raw", check_on_raw, obj);
+ JSONDecoder::decode_json("enabled", enabled, obj);
+}
+
+void rgw_data_placement_target::dump(Formatter *f) const
+{
+ encode_json("data_pool", data_pool, f);
+ encode_json("data_extra_pool", data_extra_pool, f);
+ encode_json("index_pool", index_pool, f);
+}
+
+void rgw_data_placement_target::decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("data_pool", data_pool, obj);
+ JSONDecoder::decode_json("data_extra_pool", data_extra_pool, obj);
+ JSONDecoder::decode_json("index_pool", index_pool, obj);
+}
+
+void rgw_bucket::dump(Formatter *f) const
+{
+ encode_json("name", name, f);
+ encode_json("marker", marker, f);
+ encode_json("bucket_id", bucket_id, f);
+ encode_json("tenant", tenant, f);
+ encode_json("explicit_placement", explicit_placement, f);
+}
+
+void rgw_bucket::decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("name", name, obj);
+ JSONDecoder::decode_json("marker", marker, obj);
+ JSONDecoder::decode_json("bucket_id", bucket_id, obj);
+ JSONDecoder::decode_json("tenant", tenant, obj);
+ JSONDecoder::decode_json("explicit_placement", explicit_placement, obj);
+ if (explicit_placement.data_pool.empty()) {
+ /* decoding old format */
+ JSONDecoder::decode_json("pool", explicit_placement.data_pool, obj);
+ JSONDecoder::decode_json("data_extra_pool", explicit_placement.data_extra_pool, obj);
+ JSONDecoder::decode_json("index_pool", explicit_placement.index_pool, obj);
+ }
+}
+
+void RGWBucketEntryPoint::dump(Formatter *f) const
+{
+ encode_json("bucket", bucket, f);
+ encode_json("owner", owner, f);
+ utime_t ut(creation_time);
+ encode_json("creation_time", ut, f);
+ encode_json("linked", linked, f);
+ encode_json("has_bucket_info", has_bucket_info, f);
+ if (has_bucket_info) {
+ encode_json("old_bucket_info", old_bucket_info, f);
+ }
+}
+
+void RGWBucketEntryPoint::decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("bucket", bucket, obj);
+ JSONDecoder::decode_json("owner", owner, obj);
+ utime_t ut;
+ JSONDecoder::decode_json("creation_time", ut, obj);
+ creation_time = ut.to_real_time();
+ JSONDecoder::decode_json("linked", linked, obj);
+ JSONDecoder::decode_json("has_bucket_info", has_bucket_info, obj);
+ if (has_bucket_info) {
+ JSONDecoder::decode_json("old_bucket_info", old_bucket_info, obj);
+ }
+}
+
+void RGWStorageStats::dump(Formatter *f) const
+{
+ encode_json("size", size, f);
+ encode_json("size_actual", size_rounded, f);
+ encode_json("size_utilized", size_utilized, f);
+ encode_json("size_kb", rgw_rounded_kb(size), f);
+ encode_json("size_kb_actual", rgw_rounded_kb(size_rounded), f);
+ encode_json("size_kb_utilized", rgw_rounded_kb(size_utilized), f);
+ encode_json("num_objects", num_objects, f);
+}
+
+void RGWRedirectInfo::dump(Formatter *f) const
+{
+ encode_json("protocol", protocol, f);
+ encode_json("hostname", hostname, f);
+ encode_json("http_redirect_code", (int)http_redirect_code, f);
+}
+
+void RGWRedirectInfo::decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("protocol", protocol, obj);
+ JSONDecoder::decode_json("hostname", hostname, obj);
+ int code;
+ JSONDecoder::decode_json("http_redirect_code", code, obj);
+ http_redirect_code = code;
+}
+
+void RGWBWRedirectInfo::dump(Formatter *f) const
+{
+ encode_json("redirect", redirect, f);
+ encode_json("replace_key_prefix_with", replace_key_prefix_with, f);
+ encode_json("replace_key_with", replace_key_with, f);
+}
+
+void RGWBWRedirectInfo::decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("redirect", redirect, obj);
+ JSONDecoder::decode_json("replace_key_prefix_with", replace_key_prefix_with, obj);
+ JSONDecoder::decode_json("replace_key_with", replace_key_with, obj);
+}
+
+void RGWBWRoutingRuleCondition::dump(Formatter *f) const
+{
+ encode_json("key_prefix_equals", key_prefix_equals, f);
+ encode_json("http_error_code_returned_equals", (int)http_error_code_returned_equals, f);
+}
+
+void RGWBWRoutingRuleCondition::decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("key_prefix_equals", key_prefix_equals, obj);
+ int code;
+ JSONDecoder::decode_json("http_error_code_returned_equals", code, obj);
+ http_error_code_returned_equals = code;
+}
+
+void RGWBWRoutingRule::dump(Formatter *f) const
+{
+ encode_json("condition", condition, f);
+ encode_json("redirect_info", redirect_info, f);
+}
+
+void RGWBWRoutingRule::decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("condition", condition, obj);
+ JSONDecoder::decode_json("redirect_info", redirect_info, obj);
+}
+
+void RGWBWRoutingRules::dump(Formatter *f) const
+{
+ encode_json("rules", rules, f);
+}
+
+void RGWBWRoutingRules::decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("rules", rules, obj);
+}
+
+void RGWBucketWebsiteConf::dump(Formatter *f) const
+{
+ if (!redirect_all.hostname.empty()) {
+ encode_json("redirect_all", redirect_all, f);
+ } else {
+ encode_json("index_doc_suffix", index_doc_suffix, f);
+ encode_json("error_doc", error_doc, f);
+ encode_json("routing_rules", routing_rules, f);
+ }
+}
+
+void RGWBucketWebsiteConf::decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("redirect_all", redirect_all, obj);
+ JSONDecoder::decode_json("index_doc_suffix", index_doc_suffix, obj);
+ JSONDecoder::decode_json("error_doc", error_doc, obj);
+ JSONDecoder::decode_json("routing_rules", routing_rules, obj);
+}
+
+void RGWBucketInfo::dump(Formatter *f) const
+{
+ encode_json("bucket", bucket, f);
+ utime_t ut(creation_time);
+ encode_json("creation_time", ut, f);
+ encode_json("owner", owner.to_str(), f);
+ encode_json("flags", flags, f);
+ encode_json("zonegroup", zonegroup, f);
+ encode_json("placement_rule", placement_rule, f);
+ encode_json("has_instance_obj", has_instance_obj, f);
+ encode_json("quota", quota, f);
+ encode_json("num_shards", num_shards, f);
+ encode_json("bi_shard_hash_type", (uint32_t)bucket_index_shard_hash_type, f);
+ encode_json("requester_pays", requester_pays, f);
+ encode_json("has_website", has_website, f);
+ if (has_website) {
+ encode_json("website_conf", website_conf, f);
+ }
+ encode_json("swift_versioning", swift_versioning, f);
+ encode_json("swift_ver_location", swift_ver_location, f);
+ encode_json("index_type", (uint32_t)index_type, f);
+ encode_json("mdsearch_config", mdsearch_config, f);
+ encode_json("reshard_status", (int)reshard_status, f);
+ encode_json("new_bucket_instance_id", new_bucket_instance_id, f);
+}
+
+void RGWBucketInfo::decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("bucket", bucket, obj);
+ utime_t ut;
+ JSONDecoder::decode_json("creation_time", ut, obj);
+ creation_time = ut.to_real_time();
+ JSONDecoder::decode_json("owner", owner, obj);
+ JSONDecoder::decode_json("flags", flags, obj);
+ JSONDecoder::decode_json("zonegroup", zonegroup, obj);
+ /* backward compatability with region */
+ if (zonegroup.empty()) {
+ JSONDecoder::decode_json("region", zonegroup, obj);
+ }
+ string pr;
+ JSONDecoder::decode_json("placement_rule", pr, obj);
+ placement_rule.from_str(pr);
+ JSONDecoder::decode_json("has_instance_obj", has_instance_obj, obj);
+ JSONDecoder::decode_json("quota", quota, obj);
+ JSONDecoder::decode_json("num_shards", num_shards, obj);
+ uint32_t hash_type;
+ JSONDecoder::decode_json("bi_shard_hash_type", hash_type, obj);
+ bucket_index_shard_hash_type = (uint8_t)hash_type;
+ JSONDecoder::decode_json("requester_pays", requester_pays, obj);
+ JSONDecoder::decode_json("has_website", has_website, obj);
+ if (has_website) {
+ JSONDecoder::decode_json("website_conf", website_conf, obj);
+ }
+ JSONDecoder::decode_json("swift_versioning", swift_versioning, obj);
+ JSONDecoder::decode_json("swift_ver_location", swift_ver_location, obj);
+ uint32_t it;
+ JSONDecoder::decode_json("index_type", it, obj);
+ index_type = (RGWBucketIndexType)it;
+ JSONDecoder::decode_json("mdsearch_config", mdsearch_config, obj);
+ int rs;
+ JSONDecoder::decode_json("reshard_status", rs, obj);
+ reshard_status = (cls_rgw_reshard_status)rs;
+}
+
+void rgw_obj_key::dump(Formatter *f) const
+{
+ encode_json("name", name, f);
+ encode_json("instance", instance, f);
+ encode_json("ns", ns, f);
+}
+
+void rgw_obj_key::decode_json(JSONObj *obj)
+{
+ JSONDecoder::decode_json("name", name, obj);
+ JSONDecoder::decode_json("instance", instance, obj);
+ JSONDecoder::decode_json("ns", ns, obj);
+}
+
+void RGWBucketEnt::dump(Formatter *f) const
+{
+ encode_json("bucket", bucket, f);
+ encode_json("size", size, f);
+ encode_json("size_rounded", size_rounded, f);
+ utime_t ut(creation_time);
+ encode_json("mtime", ut, f); /* mtime / creation time discrepency needed for backward compatibility */
+ encode_json("count", count, f);
+ encode_json("placement_rule", placement_rule.to_str(), f);
+}
+
+void RGWUploadPartInfo::dump(Formatter *f) const
+{
+ encode_json("num", num, f);
+ encode_json("size", size, f);
+ encode_json("etag", etag, f);
+ utime_t ut(modified);
+ encode_json("modified", ut, f);
+}
+
+void rgw_raw_obj::dump(Formatter *f) const
+{
+ encode_json("pool", pool, f);
+ encode_json("oid", oid, f);
+ encode_json("loc", loc, f);
+}
+
+void rgw_raw_obj::decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("pool", pool, obj);
+ JSONDecoder::decode_json("oid", oid, obj);
+ JSONDecoder::decode_json("loc", loc, obj);
+}
+
+void rgw_obj::dump(Formatter *f) const
+{
+ encode_json("bucket", bucket, f);
+ encode_json("key", key, f);
+}
+
+void RGWDefaultSystemMetaObjInfo::dump(Formatter *f) const {
+ encode_json("default_id", default_id, f);
+}
+
+void RGWDefaultSystemMetaObjInfo::decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("default_id", default_id, obj);
+}
+
+void RGWNameToId::dump(Formatter *f) const {
+ encode_json("obj_id", obj_id, f);
+}
+
+void RGWNameToId::decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("obj_id", obj_id, obj);
+}
+
+void RGWSystemMetaObj::dump(Formatter *f) const
+{
+ encode_json("id", id , f);
+ encode_json("name", name , f);
+}
+
+void RGWSystemMetaObj::decode_json(JSONObj *obj)
+{
+ JSONDecoder::decode_json("id", id, obj);
+ JSONDecoder::decode_json("name", name, obj);
+}
+
+void RGWPeriodLatestEpochInfo::dump(Formatter *f) const {
+ encode_json("latest_epoch", epoch, f);
+}
+
+void RGWPeriodLatestEpochInfo::decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("latest_epoch", epoch, obj);
+}
+
+void RGWPeriod::dump(Formatter *f) const
+{
+ encode_json("id", id, f);
+ encode_json("epoch", epoch , f);
+ encode_json("predecessor_uuid", predecessor_uuid, f);
+ encode_json("sync_status", sync_status, f);
+ encode_json("period_map", period_map, f);
+ encode_json("master_zonegroup", master_zonegroup, f);
+ encode_json("master_zone", master_zone, f);
+ encode_json("period_config", period_config, f);
+ encode_json("realm_id", realm_id, f);
+ encode_json("realm_name", realm_name, f);
+ encode_json("realm_epoch", realm_epoch, f);
+}
+
+void RGWPeriod::decode_json(JSONObj *obj)
+{
+ JSONDecoder::decode_json("id", id, obj);
+ JSONDecoder::decode_json("epoch", epoch, obj);
+ JSONDecoder::decode_json("predecessor_uuid", predecessor_uuid, obj);
+ JSONDecoder::decode_json("sync_status", sync_status, obj);
+ JSONDecoder::decode_json("period_map", period_map, obj);
+ JSONDecoder::decode_json("master_zonegroup", master_zonegroup, obj);
+ JSONDecoder::decode_json("master_zone", master_zone, obj);
+ JSONDecoder::decode_json("period_config", period_config, obj);
+ JSONDecoder::decode_json("realm_id", realm_id, obj);
+ JSONDecoder::decode_json("realm_name", realm_name, obj);
+ JSONDecoder::decode_json("realm_epoch", realm_epoch, obj);
+}
+
+void RGWZoneParams::dump(Formatter *f) const
+{
+ RGWSystemMetaObj::dump(f);
+ encode_json("domain_root", domain_root, f);
+ encode_json("control_pool", control_pool, f);
+ encode_json("gc_pool", gc_pool, f);
+ encode_json("lc_pool", lc_pool, f);
+ encode_json("log_pool", log_pool, f);
+ encode_json("intent_log_pool", intent_log_pool, f);
+ encode_json("usage_log_pool", usage_log_pool, f);
+ encode_json("reshard_pool", reshard_pool, f);
+ encode_json("user_keys_pool", user_keys_pool, f);
+ encode_json("user_email_pool", user_email_pool, f);
+ encode_json("user_swift_pool", user_swift_pool, f);
+ encode_json("user_uid_pool", user_uid_pool, f);
+ encode_json("otp_pool", otp_pool, f);
+ encode_json_plain("system_key", system_key, f);
+ encode_json("placement_pools", placement_pools, f);
+ encode_json("metadata_heap", metadata_heap, f);
+ encode_json("tier_config", tier_config, f);
+ encode_json("realm_id", realm_id, f);
+}
+
+void RGWZoneStorageClass::dump(Formatter *f) const
+{
+ if (data_pool) {
+ encode_json("data_pool", data_pool.get(), f);
+ }
+ if (compression_type) {
+ encode_json("compression_type", compression_type.get(), f);
+ }
+}
+
+void RGWZoneStorageClass::decode_json(JSONObj *obj)
+{
+ JSONDecoder::decode_json("data_pool", data_pool, obj);
+ JSONDecoder::decode_json("compression_type", compression_type, obj);
+}
+
+void RGWZoneStorageClasses::dump(Formatter *f) const
+{
+ for (auto& i : m) {
+ encode_json(i.first.c_str(), i.second, f);
+ }
+}
+
+void RGWZoneStorageClasses::decode_json(JSONObj *obj)
+{
+ JSONFormattable f;
+ decode_json_obj(f, obj);
+
+ for (auto& field : f.object()) {
+ JSONObj *field_obj = obj->find_obj(field.first);
+ assert(field_obj);
+
+ decode_json_obj(m[field.first], field_obj);
+ }
+ standard_class = &m[RGW_STORAGE_CLASS_STANDARD];
+}
+
+void RGWZonePlacementInfo::dump(Formatter *f) const
+{
+ encode_json("index_pool", index_pool, f);
+ encode_json("storage_classes", storage_classes, f);
+ encode_json("data_extra_pool", data_extra_pool, f);
+ encode_json("index_type", (uint32_t)index_type, f);
+
+ /* no real need for backward compatibility of compression_type and data_pool in here,
+ * rather not clutter the output */
+}
+
+void RGWZonePlacementInfo::decode_json(JSONObj *obj)
+{
+ JSONDecoder::decode_json("index_pool", index_pool, obj);
+ JSONDecoder::decode_json("storage_classes", storage_classes, obj);
+ JSONDecoder::decode_json("data_extra_pool", data_extra_pool, obj);
+ uint32_t it;
+ JSONDecoder::decode_json("index_type", it, obj);
+ index_type = (RGWBucketIndexType)it;
+
+ /* backward compatibility, these are now defined in storage_classes */
+ string standard_compression_type;
+ string *pcompression = nullptr;
+ if (JSONDecoder::decode_json("compression", standard_compression_type, obj)) {
+ pcompression = &standard_compression_type;
+ }
+ rgw_pool standard_data_pool;
+ rgw_pool *ppool = nullptr;
+ if (JSONDecoder::decode_json("data_pool", standard_data_pool, obj)) {
+ ppool = &standard_data_pool;
+ }
+ if (ppool || pcompression) {
+ storage_classes.set_storage_class(RGW_STORAGE_CLASS_STANDARD, ppool, pcompression);
+ }
+}
+
+void RGWZoneParams::decode_json(JSONObj *obj)
+{
+ RGWSystemMetaObj::decode_json(obj);
+ JSONDecoder::decode_json("domain_root", domain_root, obj);
+ JSONDecoder::decode_json("control_pool", control_pool, obj);
+ JSONDecoder::decode_json("gc_pool", gc_pool, obj);
+ JSONDecoder::decode_json("lc_pool", lc_pool, obj);
+ JSONDecoder::decode_json("log_pool", log_pool, obj);
+ JSONDecoder::decode_json("intent_log_pool", intent_log_pool, obj);
+ JSONDecoder::decode_json("reshard_pool", reshard_pool, obj);
+ JSONDecoder::decode_json("usage_log_pool", usage_log_pool, obj);
+ JSONDecoder::decode_json("user_keys_pool", user_keys_pool, obj);
+ JSONDecoder::decode_json("user_email_pool", user_email_pool, obj);
+ JSONDecoder::decode_json("user_swift_pool", user_swift_pool, obj);
+ JSONDecoder::decode_json("user_uid_pool", user_uid_pool, obj);
+ JSONDecoder::decode_json("otp_pool", otp_pool, obj);
+ JSONDecoder::decode_json("system_key", system_key, obj);
+ JSONDecoder::decode_json("placement_pools", placement_pools, obj);
+ JSONDecoder::decode_json("metadata_heap", metadata_heap, obj);
+ JSONDecoder::decode_json("tier_config", tier_config, obj);
+ JSONDecoder::decode_json("realm_id", realm_id, obj);
+
+}
+
+void RGWZone::dump(Formatter *f) const
+{
+ encode_json("id", id, f);
+ encode_json("name", name, f);
+ encode_json("endpoints", endpoints, f);
+ encode_json("log_meta", log_meta, f);
+ encode_json("log_data", log_data, f);
+ encode_json("bucket_index_max_shards", bucket_index_max_shards, f);
+ encode_json("read_only", read_only, f);
+ encode_json("tier_type", tier_type, f);
+ encode_json("sync_from_all", sync_from_all, f);
+ encode_json("sync_from", sync_from, f);
+ encode_json("redirect_zone", redirect_zone, f);
+}
+
+void RGWZone::decode_json(JSONObj *obj)
+{
+ JSONDecoder::decode_json("id", id, obj);
+ JSONDecoder::decode_json("name", name, obj);
+ if (id.empty()) {
+ id = name;
+ }
+ JSONDecoder::decode_json("endpoints", endpoints, obj);
+ JSONDecoder::decode_json("log_meta", log_meta, obj);
+ JSONDecoder::decode_json("log_data", log_data, obj);
+ JSONDecoder::decode_json("bucket_index_max_shards", bucket_index_max_shards, obj);
+ JSONDecoder::decode_json("read_only", read_only, obj);
+ JSONDecoder::decode_json("tier_type", tier_type, obj);
+ JSONDecoder::decode_json("sync_from_all", sync_from_all, true, obj);
+ JSONDecoder::decode_json("sync_from", sync_from, obj);
+ JSONDecoder::decode_json("redirect_zone", redirect_zone, obj);
+}
+
+void RGWZoneGroupPlacementTarget::dump(Formatter *f) const
+{
+ encode_json("name", name, f);
+ encode_json("tags", tags, f);
+ encode_json("storage_classes", storage_classes, f);
+}
+
+void RGWZoneGroupPlacementTarget::decode_json(JSONObj *obj)
+{
+ JSONDecoder::decode_json("name", name, obj);
+ JSONDecoder::decode_json("tags", tags, obj);
+ JSONDecoder::decode_json("storage_classes", storage_classes, obj);
+ if (storage_classes.empty()) {
+ storage_classes.insert(RGW_STORAGE_CLASS_STANDARD);
+ }
+}
+
+void RGWZoneGroup::dump(Formatter *f) const
+{
+ RGWSystemMetaObj::dump(f);
+ encode_json("api_name", api_name, f);
+ encode_json("is_master", is_master, f);
+ encode_json("endpoints", endpoints, f);
+ encode_json("hostnames", hostnames, f);
+ encode_json("hostnames_s3website", hostnames_s3website, f);
+ encode_json("master_zone", master_zone, f);
+ encode_json_map("zones", zones, f); /* more friendly representation */
+ encode_json_map("placement_targets", placement_targets, f); /* more friendly representation */
+ encode_json("default_placement", default_placement, f);
+ encode_json("realm_id", realm_id, f);
+}
+
+static void decode_zones(map<string, RGWZone>& zones, JSONObj *o)
+{
+ RGWZone z;
+ z.decode_json(o);
+ zones[z.id] = z;
+}
+
+static void decode_placement_targets(map<string, RGWZoneGroupPlacementTarget>& targets, JSONObj *o)
+{
+ RGWZoneGroupPlacementTarget t;
+ t.decode_json(o);
+ targets[t.name] = t;
+}
+
+
+void RGWZoneGroup::decode_json(JSONObj *obj)
+{
+ RGWSystemMetaObj::decode_json(obj);
+ if (id.empty()) {
+ derr << "old format " << dendl;
+ JSONDecoder::decode_json("name", name, obj);
+ id = name;
+ }
+ JSONDecoder::decode_json("api_name", api_name, obj);
+ JSONDecoder::decode_json("is_master", is_master, obj);
+ JSONDecoder::decode_json("endpoints", endpoints, obj);
+ JSONDecoder::decode_json("hostnames", hostnames, obj);
+ JSONDecoder::decode_json("hostnames_s3website", hostnames_s3website, obj);
+ JSONDecoder::decode_json("master_zone", master_zone, obj);
+ JSONDecoder::decode_json("zones", zones, decode_zones, obj);
+ JSONDecoder::decode_json("placement_targets", placement_targets, decode_placement_targets, obj);
+ JSONDecoder::decode_json("default_placement", default_placement.name, obj);
+ JSONDecoder::decode_json("default_storage_class", default_placement.storage_class, obj);
+ JSONDecoder::decode_json("realm_id", realm_id, obj);
+}
+
+
+void RGWPeriodMap::dump(Formatter *f) const
+{
+ encode_json("id", id, f);
+ encode_json_map("zonegroups", zonegroups, f);
+ encode_json("short_zone_ids", short_zone_ids, f);
+}
+
+static void decode_zonegroups(map<string, RGWZoneGroup>& zonegroups, JSONObj *o)
+{
+ RGWZoneGroup zg;
+ zg.decode_json(o);
+ zonegroups[zg.get_id()] = zg;
+}
+
+void RGWPeriodMap::decode_json(JSONObj *obj)
+{
+ JSONDecoder::decode_json("id", id, obj);
+ JSONDecoder::decode_json("zonegroups", zonegroups, decode_zonegroups, obj);
+ /* backward compatability with region */
+ if (zonegroups.empty()) {
+ JSONDecoder::decode_json("regions", zonegroups, obj);
+ }
+ /* backward compatability with region */
+ if (master_zonegroup.empty()) {
+ JSONDecoder::decode_json("master_region", master_zonegroup, obj);
+ }
+ JSONDecoder::decode_json("short_zone_ids", short_zone_ids, obj);
+}
+
+
+void RGWPeriodConfig::dump(Formatter *f) const
+{
+ encode_json("bucket_quota", bucket_quota, f);
+ encode_json("user_quota", user_quota, f);
+}
+
+void RGWPeriodConfig::decode_json(JSONObj *obj)
+{
+ JSONDecoder::decode_json("bucket_quota", bucket_quota, obj);
+ JSONDecoder::decode_json("user_quota", user_quota, obj);
+}
+
+void RGWRegionMap::dump(Formatter *f) const
+{
+ encode_json("regions", regions, f);
+ encode_json("master_region", master_region, f);
+ encode_json("bucket_quota", bucket_quota, f);
+ encode_json("user_quota", user_quota, f);
+}
+
+void RGWRegionMap::decode_json(JSONObj *obj)
+{
+ JSONDecoder::decode_json("regions", regions, obj);
+ JSONDecoder::decode_json("master_region", master_region, obj);
+ JSONDecoder::decode_json("bucket_quota", bucket_quota, obj);
+ JSONDecoder::decode_json("user_quota", user_quota, obj);
+}
+
+void RGWZoneGroupMap::dump(Formatter *f) const
+{
+ encode_json("zonegroups", zonegroups, f);
+ encode_json("master_zonegroup", master_zonegroup, f);
+ encode_json("bucket_quota", bucket_quota, f);
+ encode_json("user_quota", user_quota, f);
+}
+
+void RGWZoneGroupMap::decode_json(JSONObj *obj)
+{
+ JSONDecoder::decode_json("zonegroups", zonegroups, obj);
+ /* backward compatability with region */
+ if (zonegroups.empty()) {
+ JSONDecoder::decode_json("regions", zonegroups, obj);
+ }
+ JSONDecoder::decode_json("master_zonegroup", master_zonegroup, obj);
+ /* backward compatability with region */
+ if (master_zonegroup.empty()) {
+ JSONDecoder::decode_json("master_region", master_zonegroup, obj);
+ }
+
+ JSONDecoder::decode_json("bucket_quota", bucket_quota, obj);
+ JSONDecoder::decode_json("user_quota", user_quota, obj);
+}
+
+void RGWMetadataLogInfo::dump(Formatter *f) const
+{
+ encode_json("marker", marker, f);
+ utime_t ut(last_update);
+ encode_json("last_update", ut, f);
+}
+
+void RGWMetadataLogInfo::decode_json(JSONObj *obj)
+{
+ JSONDecoder::decode_json("marker", marker, obj);
+ utime_t ut;
+ JSONDecoder::decode_json("last_update", ut, obj);
+ last_update = ut.to_real_time();
+}
+
+void RGWDataChangesLogInfo::dump(Formatter *f) const
+{
+ encode_json("marker", marker, f);
+ utime_t ut(last_update);
+ encode_json("last_update", ut, f);
+}
+
+void RGWDataChangesLogInfo::decode_json(JSONObj *obj)
+{
+ JSONDecoder::decode_json("marker", marker, obj);
+ utime_t ut;
+ JSONDecoder::decode_json("last_update", ut, obj);
+ last_update = ut.to_real_time();
+}
+
+
+void RGWRealm::dump(Formatter *f) const
+{
+ RGWSystemMetaObj::dump(f);
+ encode_json("current_period", current_period, f);
+ encode_json("epoch", epoch, f);
+}
+
+
+void RGWRealm::decode_json(JSONObj *obj)
+{
+ RGWSystemMetaObj::decode_json(obj);
+ JSONDecoder::decode_json("current_period", current_period, obj);
+ JSONDecoder::decode_json("epoch", epoch, obj);
+}
+
+void rgw::keystone::TokenEnvelope::Token::decode_json(JSONObj *obj)
+{
+ string expires_iso8601;
+ struct tm t;
+
+ JSONDecoder::decode_json("id", id, obj, true);
+ JSONDecoder::decode_json("tenant", tenant_v2, obj, true);
+ JSONDecoder::decode_json("expires", expires_iso8601, obj, true);
+
+ if (parse_iso8601(expires_iso8601.c_str(), &t)) {
+ expires = internal_timegm(&t);
+ } else {
+ expires = 0;
+ throw JSONDecoder::err("Failed to parse ISO8601 expiration date from Keystone response.");
+ }
+}
+
+void rgw::keystone::TokenEnvelope::Role::decode_json(JSONObj *obj)
+{
+ JSONDecoder::decode_json("id", id, obj);
+ JSONDecoder::decode_json("name", name, obj, true);
+}
+
+void rgw::keystone::TokenEnvelope::Domain::decode_json(JSONObj *obj)
+{
+ JSONDecoder::decode_json("id", id, obj, true);
+ JSONDecoder::decode_json("name", name, obj, true);
+}
+
+void rgw::keystone::TokenEnvelope::Project::decode_json(JSONObj *obj)
+{
+ JSONDecoder::decode_json("id", id, obj, true);
+ JSONDecoder::decode_json("name", name, obj, true);
+ JSONDecoder::decode_json("domain", domain, obj);
+}
+
+void rgw::keystone::TokenEnvelope::User::decode_json(JSONObj *obj)
+{
+ JSONDecoder::decode_json("id", id, obj, true);
+ JSONDecoder::decode_json("name", name, obj, true);
+ JSONDecoder::decode_json("domain", domain, obj);
+ JSONDecoder::decode_json("roles", roles_v2, obj);
+}
+
+void rgw::keystone::TokenEnvelope::decode_v3(JSONObj* const root_obj)
+{
+ std::string expires_iso8601;
+
+ JSONDecoder::decode_json("user", user, root_obj, true);
+ JSONDecoder::decode_json("expires_at", expires_iso8601, root_obj, true);
+ JSONDecoder::decode_json("roles", roles, root_obj, true);
+ JSONDecoder::decode_json("project", project, root_obj, true);
+
+ struct tm t;
+ if (parse_iso8601(expires_iso8601.c_str(), &t)) {
+ token.expires = internal_timegm(&t);
+ } else {
+ token.expires = 0;
+ throw JSONDecoder::err("Failed to parse ISO8601 expiration date"
+ "from Keystone response.");
+ }
+}
+
+void rgw::keystone::TokenEnvelope::decode_v2(JSONObj* const root_obj)
+{
+ JSONDecoder::decode_json("user", user, root_obj, true);
+ JSONDecoder::decode_json("token", token, root_obj, true);
+
+ roles = user.roles_v2;
+ project = token.tenant_v2;
+}
+
+void rgw_slo_entry::decode_json(JSONObj *obj)
+{
+ JSONDecoder::decode_json("path", path, obj);
+ JSONDecoder::decode_json("etag", etag, obj);
+ JSONDecoder::decode_json("size_bytes", size_bytes, obj);
+};
+
+void rgw_meta_sync_info::decode_json(JSONObj *obj)
+{
+ string s;
+ JSONDecoder::decode_json("status", s, obj);
+ if (s == "init") {
+ state = StateInit;
+ } else if (s == "building-full-sync-maps") {
+ state = StateBuildingFullSyncMaps;
+ } else if (s == "sync") {
+ state = StateSync;
+ }
+ JSONDecoder::decode_json("num_shards", num_shards, obj);
+ JSONDecoder::decode_json("period", period, obj);
+ JSONDecoder::decode_json("realm_epoch", realm_epoch, obj);
+}
+
+void rgw_meta_sync_info::dump(Formatter *f) const
+{
+ string s;
+ switch ((SyncState)state) {
+ case StateInit:
+ s = "init";
+ break;
+ case StateBuildingFullSyncMaps:
+ s = "building-full-sync-maps";
+ break;
+ case StateSync:
+ s = "sync";
+ break;
+ default:
+ s = "unknown";
+ break;
+ }
+ encode_json("status", s, f);
+ encode_json("num_shards", num_shards, f);
+ encode_json("period", period, f);
+ encode_json("realm_epoch", realm_epoch, f);
+}
+
+void rgw_meta_sync_marker::decode_json(JSONObj *obj)
+{
+ int s;
+ JSONDecoder::decode_json("state", s, obj);
+ state = s;
+ JSONDecoder::decode_json("marker", marker, obj);
+ JSONDecoder::decode_json("next_step_marker", next_step_marker, obj);
+ JSONDecoder::decode_json("total_entries", total_entries, obj);
+ JSONDecoder::decode_json("pos", pos, obj);
+ utime_t ut;
+ JSONDecoder::decode_json("timestamp", ut, obj);
+ timestamp = ut.to_real_time();
+ JSONDecoder::decode_json("realm_epoch", realm_epoch, obj);
+}
+
+void rgw_meta_sync_marker::dump(Formatter *f) const
+{
+ encode_json("state", (int)state, f);
+ encode_json("marker", marker, f);
+ encode_json("next_step_marker", next_step_marker, f);
+ encode_json("total_entries", total_entries, f);
+ encode_json("pos", pos, f);
+ encode_json("timestamp", utime_t(timestamp), f);
+ encode_json("realm_epoch", realm_epoch, f);
+}
+
+void rgw_meta_sync_status::decode_json(JSONObj *obj)
+{
+ JSONDecoder::decode_json("info", sync_info, obj);
+ JSONDecoder::decode_json("markers", sync_markers, obj);
+}
+
+void rgw_meta_sync_status::dump(Formatter *f) const {
+ encode_json("info", sync_info, f);
+ encode_json("markers", sync_markers, f);
+}
+
+void rgw_sync_error_info::dump(Formatter *f) const {
+ encode_json("source_zone", source_zone, f);
+ encode_json("error_code", error_code, f);
+ encode_json("message", message, f);
+}
+
+void rgw_bucket_shard_full_sync_marker::decode_json(JSONObj *obj)
+{
+ JSONDecoder::decode_json("position", position, obj);
+ JSONDecoder::decode_json("count", count, obj);
+}
+
+void rgw_bucket_shard_full_sync_marker::dump(Formatter *f) const
+{
+ encode_json("position", position, f);
+ encode_json("count", count, f);
+}
+
+void rgw_bucket_shard_inc_sync_marker::decode_json(JSONObj *obj)
+{
+ JSONDecoder::decode_json("position", position, obj);
+}
+
+void rgw_bucket_shard_inc_sync_marker::dump(Formatter *f) const
+{
+ encode_json("position", position, f);
+}
+
+void rgw_bucket_shard_sync_info::decode_json(JSONObj *obj)
+{
+ std::string s;
+ JSONDecoder::decode_json("status", s, obj);
+ if (s == "full-sync") {
+ state = StateFullSync;
+ } else if (s == "incremental-sync") {
+ state = StateIncrementalSync;
+ } else {
+ state = StateInit;
+ }
+ JSONDecoder::decode_json("full_marker", full_marker, obj);
+ JSONDecoder::decode_json("inc_marker", inc_marker, obj);
+}
+
+void rgw_bucket_shard_sync_info::dump(Formatter *f) const
+{
+ const char *s{nullptr};
+ switch ((SyncState)state) {
+ case StateInit:
+ s = "init";
+ break;
+ case StateFullSync:
+ s = "full-sync";
+ break;
+ case StateIncrementalSync:
+ s = "incremental-sync";
+ break;
+ default:
+ s = "unknown";
+ break;
+ }
+ encode_json("status", s, f);
+ encode_json("full_marker", full_marker, f);
+ encode_json("inc_marker", inc_marker, f);
+}
+
+/* This utility function shouldn't conflict with the overload of std::to_string
+ * provided by string_ref since Boost 1.54 as it's defined outside of the std
+ * namespace. I hope we'll remove it soon - just after merging the Matt's PR
+ * for bundled Boost. It would allow us to forget that CentOS 7 has Boost 1.53. */
+static inline std::string to_string(const boost::string_ref& s)
+{
+ return std::string(s.data(), s.length());
+}
+
+void rgw::keystone::AdminTokenRequestVer2::dump(Formatter* const f) const
+{
+ f->open_object_section("token_request");
+ f->open_object_section("auth");
+ f->open_object_section("passwordCredentials");
+ encode_json("username", ::to_string(conf.get_admin_user()), f);
+ encode_json("password", conf.get_admin_password(), f);
+ f->close_section();
+ encode_json("tenantName", ::to_string(conf.get_admin_tenant()), f);
+ f->close_section();
+ f->close_section();
+}
+
+void rgw::keystone::AdminTokenRequestVer3::dump(Formatter* const f) const
+{
+ f->open_object_section("token_request");
+ f->open_object_section("auth");
+ f->open_object_section("identity");
+ f->open_array_section("methods");
+ f->dump_string("", "password");
+ f->close_section();
+ f->open_object_section("password");
+ f->open_object_section("user");
+ f->open_object_section("domain");
+ encode_json("name", ::to_string(conf.get_admin_domain()), f);
+ f->close_section();
+ encode_json("name", ::to_string(conf.get_admin_user()), f);
+ encode_json("password", conf.get_admin_password(), f);
+ f->close_section();
+ f->close_section();
+ f->close_section();
+ f->open_object_section("scope");
+ f->open_object_section("project");
+ if (! conf.get_admin_project().empty()) {
+ encode_json("name", ::to_string(conf.get_admin_project()), f);
+ } else {
+ encode_json("name", ::to_string(conf.get_admin_tenant()), f);
+ }
+ f->open_object_section("domain");
+ encode_json("name", ::to_string(conf.get_admin_domain()), f);
+ f->close_section();
+ f->close_section();
+ f->close_section();
+ f->close_section();
+ f->close_section();
+}
+
+
+void rgw::keystone::BarbicanTokenRequestVer2::dump(Formatter* const f) const
+{
+ f->open_object_section("token_request");
+ f->open_object_section("auth");
+ f->open_object_section("passwordCredentials");
+ encode_json("username", cct->_conf->rgw_keystone_barbican_user, f);
+ encode_json("password", cct->_conf->rgw_keystone_barbican_password, f);
+ f->close_section();
+ encode_json("tenantName", cct->_conf->rgw_keystone_barbican_tenant, f);
+ f->close_section();
+ f->close_section();
+}
+
+void rgw::keystone::BarbicanTokenRequestVer3::dump(Formatter* const f) const
+{
+ f->open_object_section("token_request");
+ f->open_object_section("auth");
+ f->open_object_section("identity");
+ f->open_array_section("methods");
+ f->dump_string("", "password");
+ f->close_section();
+ f->open_object_section("password");
+ f->open_object_section("user");
+ f->open_object_section("domain");
+ encode_json("name", cct->_conf->rgw_keystone_barbican_domain, f);
+ f->close_section();
+ encode_json("name", cct->_conf->rgw_keystone_barbican_user, f);
+ encode_json("password", cct->_conf->rgw_keystone_barbican_password, f);
+ f->close_section();
+ f->close_section();
+ f->close_section();
+ f->open_object_section("scope");
+ f->open_object_section("project");
+ if (!cct->_conf->rgw_keystone_barbican_project.empty()) {
+ encode_json("name", cct->_conf->rgw_keystone_barbican_project, f);
+ } else {
+ encode_json("name", cct->_conf->rgw_keystone_barbican_tenant, f);
+ }
+ f->open_object_section("domain");
+ encode_json("name", cct->_conf->rgw_keystone_barbican_domain, f);
+ f->close_section();
+ f->close_section();
+ f->close_section();
+ f->close_section();
+ f->close_section();
+}
+
+void RGWOrphanSearchStage::dump(Formatter *f) const
+{
+ f->open_object_section("orphan_search_stage");
+ string s;
+ switch(stage){
+ case ORPHAN_SEARCH_STAGE_INIT:
+ s = "init";
+ break;
+ case ORPHAN_SEARCH_STAGE_LSPOOL:
+ s = "lspool";
+ break;
+ case ORPHAN_SEARCH_STAGE_LSBUCKETS:
+ s = "lsbuckets";
+ break;
+ case ORPHAN_SEARCH_STAGE_ITERATE_BI:
+ s = "iterate_bucket_index";
+ break;
+ case ORPHAN_SEARCH_STAGE_COMPARE:
+ s = "comparing";
+ break;
+ default:
+ s = "unknown";
+ }
+ f->dump_string("search_stage", s);
+ f->dump_int("shard",shard);
+ f->dump_string("marker",marker);
+ f->close_section();
+}
+
+void RGWOrphanSearchInfo::dump(Formatter *f) const
+{
+ f->open_object_section("orphan_search_info");
+ f->dump_string("job_name", job_name);
+ encode_json("pool", pool, f);
+ f->dump_int("num_shards", num_shards);
+ encode_json("start_time", start_time, f);
+ f->close_section();
+}
+
+void RGWOrphanSearchState::dump(Formatter *f) const
+{
+ f->open_object_section("orphan_search_state");
+ encode_json("info", info, f);
+ encode_json("stage", stage, f);
+ f->close_section();
+}
+
+void RGWObjTags::dump(Formatter *f) const
+{
+ for (auto& tag: tag_map){
+ f->open_object_section("tag_map");
+ f->dump_string("key", tag.first);
+ f->dump_string("value", tag.second);
+ f->close_section();
+ }
+}
+
+void lc_op::dump(Formatter *f) const
+{
+ f->dump_bool("status", status);
+ f->dump_bool("dm_expiration", dm_expiration);
+
+ f->dump_int("expiration", expiration);
+ f->dump_int("noncur_expiration", noncur_expiration);
+ f->dump_int("mp_expiration", mp_expiration);
+ if (expiration_date) {
+ utime_t ut(*expiration_date);
+ f->dump_stream("expiration_date") << ut;
+ }
+ if (obj_tags) {
+ f->dump_object("obj_tags", *obj_tags);
+ }
+ f->open_object_section("transitions");
+ for(auto& [storage_class, transition] : transitions) {
+ f->dump_object(storage_class.c_str(), transition);
+ }
+ f->close_section();
+
+ f->open_object_section("noncur_transitions");
+ for (auto& [storage_class, transition] : noncur_transitions) {
+ f->dump_object(storage_class.c_str(), transition);
+ }
+ f->close_section();
+}
+
+void LCFilter::dump(Formatter *f) const
+{
+ f->dump_string("prefix", prefix);
+ f->dump_object("obj_tags", obj_tags);
+}
+
+void LCExpiration::dump(Formatter *f) const
+{
+ f->dump_string("days", days);
+ f->dump_string("date", date);
+}
+
+void LCRule::dump(Formatter *f) const
+{
+ f->dump_string("id", id);
+ f->dump_string("prefix", prefix);
+ f->dump_string("status", status);
+ f->dump_object("expiration", expiration);
+ f->dump_object("noncur_expiration", noncur_expiration);
+ f->dump_object("mp_expiration", mp_expiration);
+ f->dump_object("filter", filter);
+ f->open_object_section("transitions");
+ for (auto& [storage_class, transition] : transitions) {
+ f->dump_object(storage_class.c_str(), transition);
+ }
+ f->close_section();
+
+ f->open_object_section("noncur_transitions");
+ for (auto& [storage_class, transition] : noncur_transitions) {
+ f->dump_object(storage_class.c_str(), transition);
+ }
+ f->close_section();
+ f->dump_bool("dm_expiration", dm_expiration);
+}
+
+void RGWLifecycleConfiguration::dump(Formatter *f) const
+{
+ f->open_object_section("prefix_map");
+ for (auto& prefix : prefix_map) {
+ f->dump_object(prefix.first.c_str(), prefix.second);
+ }
+ f->close_section();
+
+ f->open_array_section("rule_map");
+ for (auto& rule : rule_map) {
+ f->open_object_section("entry");
+ f->dump_string("id", rule.first);
+ f->open_object_section("rule");
+ rule.second.dump(f);
+ f->close_section();
+ f->close_section();
+ }
+ f->close_section();
+}
+
+void compression_block::dump(Formatter *f) const
+{
+ f->dump_unsigned("old_ofs", old_ofs);
+ f->dump_unsigned("new_ofs", new_ofs);
+ f->dump_unsigned("len", len);
+}
+
+void RGWCompressionInfo::dump(Formatter *f) const
+{
+ f->dump_string("compression_type", compression_type);
+ f->dump_unsigned("orig_size", orig_size);
+ ::encode_json("blocks", blocks, f);
+}
diff --git a/src/rgw/rgw_jsonparser.cc b/src/rgw/rgw_jsonparser.cc
new file mode 100644
index 00000000..df520140
--- /dev/null
+++ b/src/rgw/rgw_jsonparser.cc
@@ -0,0 +1,132 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <errno.h>
+#include <string.h>
+
+#include <iostream>
+#include <map>
+
+#include "include/types.h"
+
+#include "common/Formatter.h"
+#include "common/ceph_json.h"
+
+#include "rgw_common.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+
+void dump_array(JSONObj *obj)
+{
+
+ JSONObjIter iter = obj->find_first();
+
+ for (; !iter.end(); ++iter) {
+ JSONObj *o = *iter;
+ cout << "data=" << o->get_data() << std::endl;
+ }
+
+}
+
+struct Key {
+ string user;
+ string access_key;
+ string secret_key;
+
+ void decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("user", user, obj);
+ JSONDecoder::decode_json("access_key", access_key, obj);
+ JSONDecoder::decode_json("secret_key", secret_key, obj);
+ }
+};
+
+struct UserInfo {
+ string uid;
+ string display_name;
+ int max_buckets;
+ list<Key> keys;
+
+ void decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("user_id", uid, obj);
+ JSONDecoder::decode_json("display_name", display_name, obj);
+ JSONDecoder::decode_json("max_buckets", max_buckets, obj);
+ JSONDecoder::decode_json("keys", keys, obj);
+ }
+};
+
+
+int main(int argc, char **argv) {
+ JSONParser parser;
+
+ char buf[1024];
+ bufferlist bl;
+
+ for (;;) {
+ int done;
+ int len;
+
+ len = fread(buf, 1, sizeof(buf), stdin);
+ if (ferror(stdin)) {
+ cerr << "read error" << std::endl;
+ exit(-1);
+ }
+ done = feof(stdin);
+
+ bool ret = parser.parse(buf, len);
+ if (!ret)
+ cerr << "parse error" << std::endl;
+
+ if (done) {
+ bl.append(buf, len);
+ break;
+ }
+ }
+
+ JSONObjIter iter = parser.find_first();
+
+ for (; !iter.end(); ++iter) {
+ JSONObj *obj = *iter;
+ cout << "is_object=" << obj->is_object() << std::endl;
+ cout << "is_array=" << obj->is_array() << std::endl;
+ cout << "name=" << obj->get_name() << std::endl;
+ cout << "data=" << obj->get_data() << std::endl;
+ }
+
+ iter = parser.find_first("conditions");
+ if (!iter.end()) {
+ JSONObj *obj = *iter;
+
+ JSONObjIter iter2 = obj->find_first();
+ for (; !iter2.end(); ++iter2) {
+ JSONObj *child = *iter2;
+ cout << "is_object=" << child->is_object() << std::endl;
+ cout << "is_array=" << child->is_array() << std::endl;
+ if (child->is_array()) {
+ dump_array(child);
+ }
+ cout << "name=" << child->get_name() <<std::endl;
+ cout << "data=" << child->get_data() <<std::endl;
+ }
+ }
+
+ RGWUserInfo ui;
+
+ try {
+ ui.decode_json(&parser);
+ } catch (JSONDecoder::err& e) {
+ cout << "failed to decode JSON input: " << e.message << std::endl;
+ exit(1);
+ }
+
+ JSONFormatter formatter(true);
+
+ formatter.open_object_section("user_info");
+ ui.dump(&formatter);
+ formatter.close_section();
+
+ formatter.flush(std::cout);
+
+ std::cout << std::endl;
+}
+
diff --git a/src/rgw/rgw_kafka.cc b/src/rgw/rgw_kafka.cc
new file mode 100644
index 00000000..dfaefdfb
--- /dev/null
+++ b/src/rgw/rgw_kafka.cc
@@ -0,0 +1,719 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#include "rgw_kafka.h"
+#include "rgw_url.h"
+#include <librdkafka/rdkafka.h>
+#include "include/ceph_assert.h"
+#include <sstream>
+#include <cstring>
+#include <unordered_map>
+#include <string>
+#include <vector>
+#include <thread>
+#include <atomic>
+#include <mutex>
+#include <boost/lockfree/queue.hpp>
+#include "common/dout.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+// TODO investigation, not necessarily issues:
+// (1) in case of single threaded writer context use spsc_queue
+// (2) check performance of emptying queue to local list, and go over the list and publish
+// (3) use std::shared_mutex (c++17) or equivalent for the connections lock
+
+// cmparisson operator between topic pointer and name
+bool operator==(const rd_kafka_topic_t* rkt, const std::string& name) {
+ return name == std::string_view(rd_kafka_topic_name(rkt));
+}
+
+namespace rgw::kafka {
+
+// status codes for publishing
+// TODO: use the actual error code (when conn exists) instead of STATUS_CONNECTION_CLOSED when replying to client
+static const int STATUS_CONNECTION_CLOSED = -0x1002;
+static const int STATUS_QUEUE_FULL = -0x1003;
+static const int STATUS_MAX_INFLIGHT = -0x1004;
+static const int STATUS_MANAGER_STOPPED = -0x1005;
+// status code for connection opening
+static const int STATUS_CONF_ALLOC_FAILED = -0x2001;
+
+static const int STATUS_OK = 0x0;
+
+// struct for holding the callback and its tag in the callback list
+struct reply_callback_with_tag_t {
+ uint64_t tag;
+ reply_callback_t cb;
+
+ reply_callback_with_tag_t(uint64_t _tag, reply_callback_t _cb) : tag(_tag), cb(_cb) {}
+
+ bool operator==(uint64_t rhs) {
+ return tag == rhs;
+ }
+};
+
+typedef std::vector<reply_callback_with_tag_t> CallbackList;
+
+// struct for holding the connection state object as well as list of topics
+// it is used inside an intrusive ref counted pointer (boost::intrusive_ptr)
+// since references to deleted objects may still exist in the calling code
+struct connection_t {
+ rd_kafka_t* producer = nullptr;
+ rd_kafka_conf_t* temp_conf = nullptr;
+ std::vector<rd_kafka_topic_t*> topics;
+ bool marked_for_deletion = false;
+ uint64_t delivery_tag = 1;
+ int status;
+ mutable std::atomic<int> ref_count = 0;
+ CephContext* const cct;
+ CallbackList callbacks;
+ const std::string broker;
+ const bool use_ssl;
+ const bool verify_ssl; // TODO currently iognored, not supported in librdkafka v0.11.6
+ const boost::optional<std::string> ca_location;
+ const std::string user;
+ const std::string password;
+
+ // cleanup of all internal connection resource
+ // the object can still remain, and internal connection
+ // resources created again on successful reconnection
+ void destroy(int s) {
+ status = s;
+ // destroy temporary conf (if connection was never established)
+ if (temp_conf) {
+ rd_kafka_conf_destroy(temp_conf);
+ return;
+ }
+ // wait for all remaining acks/nacks
+ rd_kafka_flush(producer, 5*1000 /* wait for max 5 seconds */);
+ // destroy all topics
+ std::for_each(topics.begin(), topics.end(), [](auto topic) {rd_kafka_topic_destroy(topic);});
+ // destroy producer
+ rd_kafka_destroy(producer);
+ // fire all remaining callbacks (if not fired by rd_kafka_flush)
+ std::for_each(callbacks.begin(), callbacks.end(), [this](auto& cb_tag) {
+ cb_tag.cb(status);
+ ldout(cct, 20) << "Kafka destroy: invoking callback with tag=" << cb_tag.tag << dendl;
+ });
+ callbacks.clear();
+ delivery_tag = 1;
+ }
+
+ bool is_ok() const {
+ return (producer != nullptr && !marked_for_deletion);
+ }
+
+ // ctor for setting immutable values
+ connection_t(CephContext* _cct, const std::string& _broker, bool _use_ssl, bool _verify_ssl,
+ const boost::optional<const std::string&>& _ca_location,
+ const std::string& _user, const std::string& _password) :
+ cct(_cct), broker(_broker), use_ssl(_use_ssl), verify_ssl(_verify_ssl), ca_location(_ca_location), user(_user), password(_password) {}
+
+ // dtor also destroys the internals
+ ~connection_t() {
+ destroy(STATUS_CONNECTION_CLOSED);
+ }
+
+ friend void intrusive_ptr_add_ref(const connection_t* p);
+ friend void intrusive_ptr_release(const connection_t* p);
+};
+
+std::string to_string(const connection_ptr_t& conn) {
+ std::string str;
+ str += "\nBroker: " + conn->broker;
+ str += conn->use_ssl ? "\nUse SSL" : "";
+ str += conn->ca_location ? "\nCA Location: " + *(conn->ca_location) : "";
+ return str;
+}
+// these are required interfaces so that connection_t could be used inside boost::intrusive_ptr
+void intrusive_ptr_add_ref(const connection_t* p) {
+ ++p->ref_count;
+}
+void intrusive_ptr_release(const connection_t* p) {
+ if (--p->ref_count == 0) {
+ delete p;
+ }
+}
+
+// convert int status to string - including RGW specific values
+std::string status_to_string(int s) {
+ switch (s) {
+ case STATUS_OK:
+ return "STATUS_OK";
+ case STATUS_CONNECTION_CLOSED:
+ return "RGW_KAFKA_STATUS_CONNECTION_CLOSED";
+ case STATUS_QUEUE_FULL:
+ return "RGW_KAFKA_STATUS_QUEUE_FULL";
+ case STATUS_MAX_INFLIGHT:
+ return "RGW_KAFKA_STATUS_MAX_INFLIGHT";
+ case STATUS_MANAGER_STOPPED:
+ return "RGW_KAFKA_STATUS_MANAGER_STOPPED";
+ case STATUS_CONF_ALLOC_FAILED:
+ return "RGW_KAFKA_STATUS_CONF_ALLOC_FAILED";
+ }
+ return std::string(rd_kafka_err2str((rd_kafka_resp_err_t)s));
+}
+
+void message_callback(rd_kafka_t* rk, const rd_kafka_message_t* rkmessage, void* opaque) {
+ ceph_assert(opaque);
+
+ const auto conn = reinterpret_cast<connection_t*>(opaque);
+ const auto result = rkmessage->err;
+
+ if (!rkmessage->_private) {
+ ldout(conn->cct, 20) << "Kafka run: n/ack received, (no callback) with result=" << result << dendl;
+ return;
+ }
+
+ const auto tag = reinterpret_cast<uint64_t*>(rkmessage->_private);
+ const auto& callbacks_end = conn->callbacks.end();
+ const auto& callbacks_begin = conn->callbacks.begin();
+ const auto tag_it = std::find(callbacks_begin, callbacks_end, *tag);
+ if (tag_it != callbacks_end) {
+ ldout(conn->cct, 20) << "Kafka run: n/ack received, invoking callback with tag=" <<
+ *tag << " and result=" << rd_kafka_err2str(result) << dendl;
+ tag_it->cb(result);
+ conn->callbacks.erase(tag_it);
+ } else {
+ // TODO add counter for acks with no callback
+ ldout(conn->cct, 10) << "Kafka run: unsolicited n/ack received with tag=" <<
+ *tag << dendl;
+ }
+ delete tag;
+ // rkmessage is destroyed automatically by librdkafka
+}
+
+// utility function to create a connection, when the connection object already exists
+connection_ptr_t& create_connection(connection_ptr_t& conn) {
+ // pointer must be valid and not marked for deletion
+ ceph_assert(conn && !conn->marked_for_deletion);
+
+ // reset all status codes
+ conn->status = STATUS_OK;
+ char errstr[512] = {0};
+
+ conn->temp_conf = rd_kafka_conf_new();
+ if (!conn->temp_conf) {
+ conn->status = STATUS_CONF_ALLOC_FAILED;
+ return conn;
+ }
+
+ // get list of brokers based on the bootsrap broker
+ if (rd_kafka_conf_set(conn->temp_conf, "bootstrap.servers", conn->broker.c_str(), errstr, sizeof(errstr)) != RD_KAFKA_CONF_OK) goto conf_error;
+
+ if (conn->use_ssl) {
+ if (!conn->user.empty()) {
+ // use SSL+SASL
+ if (rd_kafka_conf_set(conn->temp_conf, "security.protocol", "SASL_SSL", errstr, sizeof(errstr)) != RD_KAFKA_CONF_OK ||
+ rd_kafka_conf_set(conn->temp_conf, "sasl.mechanism", "PLAIN", errstr, sizeof(errstr)) != RD_KAFKA_CONF_OK ||
+ rd_kafka_conf_set(conn->temp_conf, "sasl.username", conn->user.c_str(), errstr, sizeof(errstr)) != RD_KAFKA_CONF_OK ||
+ rd_kafka_conf_set(conn->temp_conf, "sasl.password", conn->password.c_str(), errstr, sizeof(errstr)) != RD_KAFKA_CONF_OK) goto conf_error;
+ ldout(conn->cct, 20) << "Kafka connect: successfully configured SSL+SASL security" << dendl;
+ } else {
+ // use only SSL
+ if (rd_kafka_conf_set(conn->temp_conf, "security.protocol", "SSL", errstr, sizeof(errstr)) != RD_KAFKA_CONF_OK) goto conf_error;
+ ldout(conn->cct, 20) << "Kafka connect: successfully configured SSL security" << dendl;
+ }
+ if (conn->ca_location) {
+ if (rd_kafka_conf_set(conn->temp_conf, "ssl.ca.location", conn->ca_location->c_str(), errstr, sizeof(errstr)) != RD_KAFKA_CONF_OK) goto conf_error;
+ ldout(conn->cct, 20) << "Kafka connect: successfully configured CA location" << dendl;
+ } else {
+ ldout(conn->cct, 20) << "Kafka connect: using default CA location" << dendl;
+ }
+ // Note: when librdkafka.1.0 is available the following line could be uncommented instead of the callback setting call
+ // if (rd_kafka_conf_set(conn->temp_conf, "enable.ssl.certificate.verification", "0", errstr, sizeof(errstr)) != RD_KAFKA_CONF_OK) goto conf_error;
+
+ ldout(conn->cct, 20) << "Kafka connect: successfully configured security" << dendl;
+ }
+
+ // set the global callback for delivery success/fail
+ rd_kafka_conf_set_dr_msg_cb(conn->temp_conf, message_callback);
+
+ // set the global opaque pointer to be the connection itself
+ rd_kafka_conf_set_opaque(conn->temp_conf, conn.get());
+
+ // create the producer
+ conn->producer = rd_kafka_new(RD_KAFKA_PRODUCER, conn->temp_conf, errstr, sizeof(errstr));
+ if (!conn->producer) {
+ conn->status = rd_kafka_last_error();
+ ldout(conn->cct, 1) << "Kafka connect: failed to create producer: " << errstr << dendl;
+ return conn;
+ }
+ ldout(conn->cct, 20) << "Kafka connect: successfully created new producer" << dendl;
+
+ // conf ownership passed to producer
+ conn->temp_conf = nullptr;
+ return conn;
+
+conf_error:
+ conn->status = rd_kafka_last_error();
+ ldout(conn->cct, 1) << "Kafka connect: configuration failed: " << errstr << dendl;
+ return conn;
+}
+
+// utility function to create a new connection
+connection_ptr_t create_new_connection(const std::string& broker, CephContext* cct,
+ bool use_ssl,
+ bool verify_ssl,
+ boost::optional<const std::string&> ca_location,
+ const std::string& user,
+ const std::string& password) {
+ // create connection state
+ connection_ptr_t conn(new connection_t(cct, broker, use_ssl, verify_ssl, ca_location, user, password));
+ return create_connection(conn);
+}
+
+/// struct used for holding messages in the message queue
+struct message_wrapper_t {
+ connection_ptr_t conn;
+ std::string topic;
+ std::string message;
+ reply_callback_t cb;
+
+ message_wrapper_t(connection_ptr_t& _conn,
+ const std::string& _topic,
+ const std::string& _message,
+ reply_callback_t _cb) : conn(_conn), topic(_topic), message(_message), cb(_cb) {}
+};
+
+typedef std::unordered_map<std::string, connection_ptr_t> ConnectionList;
+typedef boost::lockfree::queue<message_wrapper_t*, boost::lockfree::fixed_sized<true>> MessageQueue;
+
+// macros used inside a loop where an iterator is either incremented or erased
+#define INCREMENT_AND_CONTINUE(IT) \
+ ++IT; \
+ continue;
+
+#define ERASE_AND_CONTINUE(IT,CONTAINER) \
+ IT=CONTAINER.erase(IT); \
+ --connection_count; \
+ continue;
+
+class Manager {
+public:
+ const size_t max_connections;
+ const size_t max_inflight;
+ const size_t max_queue;
+private:
+ std::atomic<size_t> connection_count;
+ bool stopped;
+ int read_timeout_ms;
+ ConnectionList connections;
+ MessageQueue messages;
+ std::atomic<size_t> queued;
+ std::atomic<size_t> dequeued;
+ CephContext* const cct;
+ mutable std::mutex connections_lock;
+ std::thread runner;
+
+ // TODO use rd_kafka_produce_batch for better performance
+ void publish_internal(message_wrapper_t* message) {
+ const std::unique_ptr<message_wrapper_t> msg_owner(message);
+ auto& conn = message->conn;
+
+ if (!conn->is_ok()) {
+ // connection had an issue while message was in the queue
+ // TODO add error stats
+ ldout(conn->cct, 1) << "Kafka publish: connection had an issue while message was in the queue. error: " << status_to_string(conn->status) << dendl;
+ if (message->cb) {
+ message->cb(conn->status);
+ }
+ return;
+ }
+
+ // create a new topic unless it was already created
+ auto topic_it = std::find(conn->topics.begin(), conn->topics.end(), message->topic);
+ rd_kafka_topic_t* topic = nullptr;
+ if (topic_it == conn->topics.end()) {
+ topic = rd_kafka_topic_new(conn->producer, message->topic.c_str(), nullptr);
+ if (!topic) {
+ const auto err = rd_kafka_last_error();
+ ldout(conn->cct, 1) << "Kafka publish: failed to create topic: " << message->topic << " error: " << status_to_string(err) << dendl;
+ if (message->cb) {
+ message->cb(err);
+ }
+ conn->destroy(err);
+ return;
+ }
+ // TODO use the topics list as an LRU cache
+ conn->topics.push_back(topic);
+ ldout(conn->cct, 20) << "Kafka publish: successfully created topic: " << message->topic << dendl;
+ } else {
+ topic = *topic_it;
+ ldout(conn->cct, 20) << "Kafka publish: reused existing topic: " << message->topic << dendl;
+ }
+
+ const auto tag = (message->cb == nullptr ? nullptr : new uint64_t(conn->delivery_tag++));
+ const auto rc = rd_kafka_produce(
+ topic,
+ // TODO: non builtin partitioning
+ RD_KAFKA_PARTITION_UA,
+ // make a copy of the payload
+ // so it is safe to pass the pointer from the string
+ RD_KAFKA_MSG_F_COPY,
+ message->message.data(),
+ message->message.length(),
+ // optional key and its length
+ nullptr,
+ 0,
+ // opaque data: tag, used in the global callback
+ // in order to invoke the real callback
+ // null if no callback exists
+ tag);
+ if (rc == -1) {
+ const auto err = rd_kafka_last_error();
+ ldout(conn->cct, 10) << "Kafka publish: failed to produce: " << rd_kafka_err2str(err) << dendl;
+ // TODO: dont error on full queue, and don't destroy connection, retry instead
+ // immediatly invoke callback on error if needed
+ if (message->cb) {
+ message->cb(err);
+ }
+ conn->destroy(err);
+ delete tag;
+ }
+
+ if (tag) {
+ auto const q_len = conn->callbacks.size();
+ if (q_len < max_inflight) {
+ ldout(conn->cct, 20) << "Kafka publish (with callback, tag=" << *tag << "): OK. Queue has: " << q_len << " callbacks" << dendl;
+ conn->callbacks.emplace_back(*tag, message->cb);
+ } else {
+ // immediately invoke callback with error - this is not a connection error
+ ldout(conn->cct, 1) << "Kafka publish (with callback): failed with error: callback queue full" << dendl;
+ message->cb(STATUS_MAX_INFLIGHT);
+ // tag will be deleted when the global callback is invoked
+ }
+ } else {
+ ldout(conn->cct, 20) << "Kafka publish (no callback): OK" << dendl;
+ }
+ }
+
+ // the managers thread:
+ // (1) empty the queue of messages to be published
+ // (2) loop over all connections and read acks
+ // (3) manages deleted connections
+ // (4) TODO reconnect on connection errors
+ // (5) TODO cleanup timedout callbacks
+ void run() {
+ while (!stopped) {
+
+ // publish all messages in the queue
+ auto reply_count = 0U;
+ const auto send_count = messages.consume_all(std::bind(&Manager::publish_internal, this, std::placeholders::_1));
+ dequeued += send_count;
+ ConnectionList::iterator conn_it;
+ ConnectionList::const_iterator end_it;
+ {
+ // thread safe access to the connection list
+ // once the iterators are fetched they are guaranteed to remain valid
+ std::lock_guard lock(connections_lock);
+ conn_it = connections.begin();
+ end_it = connections.end();
+ }
+ // loop over all connections to read acks
+ for (;conn_it != end_it;) {
+
+ auto& conn = conn_it->second;
+ // delete the connection if marked for deletion
+ if (conn->marked_for_deletion) {
+ ldout(conn->cct, 10) << "Kafka run: connection is deleted" << dendl;
+ conn->destroy(STATUS_CONNECTION_CLOSED);
+ std::lock_guard lock(connections_lock);
+ // erase is safe - does not invalidate any other iterator
+ // lock so no insertion happens at the same time
+ ERASE_AND_CONTINUE(conn_it, connections);
+ }
+
+ // try to reconnect the connection if it has an error
+ if (!conn->is_ok()) {
+ ldout(conn->cct, 10) << "Kafka run: connection status is: " << status_to_string(conn->status) << dendl;
+ const auto& broker = conn_it->first;
+ ldout(conn->cct, 20) << "Kafka run: retry connection" << dendl;
+ if (create_connection(conn)->is_ok() == false) {
+ ldout(conn->cct, 10) << "Kafka run: connection (" << broker << ") retry failed" << dendl;
+ // TODO: add error counter for failed retries
+ // TODO: add exponential backoff for retries
+ } else {
+ ldout(conn->cct, 10) << "Kafka run: connection (" << broker << ") retry successfull" << dendl;
+ }
+ INCREMENT_AND_CONTINUE(conn_it);
+ }
+
+ reply_count += rd_kafka_poll(conn->producer, read_timeout_ms);
+
+ // just increment the iterator
+ ++conn_it;
+ }
+ // if no messages were received or published
+ // across all connection, sleep for 100ms
+ if (send_count == 0 && reply_count == 0) {
+ std::this_thread::sleep_for(std::chrono::milliseconds(100));
+ }
+ }
+ }
+
+ // used in the dtor for message cleanup
+ static void delete_message(const message_wrapper_t* message) {
+ delete message;
+ }
+
+public:
+ Manager(size_t _max_connections,
+ size_t _max_inflight,
+ size_t _max_queue,
+ int _read_timeout_ms,
+ CephContext* _cct) :
+ max_connections(_max_connections),
+ max_inflight(_max_inflight),
+ max_queue(_max_queue),
+ connection_count(0),
+ stopped(false),
+ read_timeout_ms(_read_timeout_ms),
+ connections(_max_connections),
+ messages(max_queue),
+ queued(0),
+ dequeued(0),
+ cct(_cct),
+ runner(&Manager::run, this) {
+ // The hashmap has "max connections" as the initial number of buckets,
+ // and allows for 10 collisions per bucket before rehash.
+ // This is to prevent rehashing so that iterators are not invalidated
+ // when a new connection is added.
+ connections.max_load_factor(10.0);
+ // give the runner thread a name for easier debugging
+ const auto rc = ceph_pthread_setname(runner.native_handle(), "kafka_manager");
+ ceph_assert(rc==0);
+ }
+
+ // non copyable
+ Manager(const Manager&) = delete;
+ const Manager& operator=(const Manager&) = delete;
+
+ // stop the main thread
+ void stop() {
+ stopped = true;
+ }
+
+ // disconnect from a broker
+ bool disconnect(connection_ptr_t& conn) {
+ if (!conn || stopped) {
+ return false;
+ }
+ conn->marked_for_deletion = true;
+ return true;
+ }
+
+ // connect to a broker, or reuse an existing connection if already connected
+ connection_ptr_t connect(const std::string& url,
+ bool use_ssl,
+ bool verify_ssl,
+ boost::optional<const std::string&> ca_location) {
+ if (stopped) {
+ // TODO: increment counter
+ ldout(cct, 1) << "Kafka connect: manager is stopped" << dendl;
+ return nullptr;
+ }
+
+ std::string broker;
+ std::string user;
+ std::string password;
+ if (!parse_url_authority(url, broker, user, password)) {
+ // TODO: increment counter
+ ldout(cct, 1) << "Kafka connect: URL parsing failed" << dendl;
+ return nullptr;
+ }
+
+ // this should be validated by the regex in parse_url()
+ ceph_assert(user.empty() == password.empty());
+
+ if (!user.empty() && !use_ssl) {
+ ldout(cct, 1) << "Kafka connect: user/password are only allowed over secure connection" << dendl;
+ return nullptr;
+ }
+
+ std::lock_guard lock(connections_lock);
+ const auto it = connections.find(broker);
+ // note that ssl vs. non-ssl connection to the same host are two separate conenctions
+ if (it != connections.end()) {
+ if (it->second->marked_for_deletion) {
+ // TODO: increment counter
+ ldout(cct, 1) << "Kafka connect: endpoint marked for deletion" << dendl;
+ return nullptr;
+ }
+ // connection found - return even if non-ok
+ ldout(cct, 20) << "Kafka connect: connection found" << dendl;
+ return it->second;
+ }
+
+ // connection not found, creating a new one
+ if (connection_count >= max_connections) {
+ // TODO: increment counter
+ ldout(cct, 1) << "Kafka connect: max connections exceeded" << dendl;
+ return nullptr;
+ }
+ const auto conn = create_new_connection(broker, cct, use_ssl, verify_ssl, ca_location, user, password);
+ // create_new_connection must always return a connection object
+ // even if error occurred during creation.
+ // in such a case the creation will be retried in the main thread
+ ceph_assert(conn);
+ ++connection_count;
+ ldout(cct, 10) << "Kafka connect: new connection is created. Total connections: " << connection_count << dendl;
+ return connections.emplace(broker, conn).first->second;
+ }
+
+ // TODO publish with confirm is needed in "none" case as well, cb should be invoked publish is ok (no ack)
+ int publish(connection_ptr_t& conn,
+ const std::string& topic,
+ const std::string& message) {
+ if (stopped) {
+ return STATUS_MANAGER_STOPPED;
+ }
+ if (!conn || !conn->is_ok()) {
+ return STATUS_CONNECTION_CLOSED;
+ }
+ if (messages.push(new message_wrapper_t(conn, topic, message, nullptr))) {
+ ++queued;
+ return STATUS_OK;
+ }
+ return STATUS_QUEUE_FULL;
+ }
+
+ int publish_with_confirm(connection_ptr_t& conn,
+ const std::string& topic,
+ const std::string& message,
+ reply_callback_t cb) {
+ if (stopped) {
+ return STATUS_MANAGER_STOPPED;
+ }
+ if (!conn || !conn->is_ok()) {
+ return STATUS_CONNECTION_CLOSED;
+ }
+ if (messages.push(new message_wrapper_t(conn, topic, message, cb))) {
+ ++queued;
+ return STATUS_OK;
+ }
+ return STATUS_QUEUE_FULL;
+ }
+
+ // dtor wait for thread to stop
+ // then connection are cleaned-up
+ ~Manager() {
+ stopped = true;
+ runner.join();
+ messages.consume_all(delete_message);
+ }
+
+ // get the number of connections
+ size_t get_connection_count() const {
+ return connection_count;
+ }
+
+ // get the number of in-flight messages
+ size_t get_inflight() const {
+ size_t sum = 0;
+ std::lock_guard lock(connections_lock);
+ std::for_each(connections.begin(), connections.end(), [&sum](auto& conn_pair) {
+ sum += conn_pair.second->callbacks.size();
+ });
+ return sum;
+ }
+
+ // running counter of the queued messages
+ size_t get_queued() const {
+ return queued;
+ }
+
+ // running counter of the dequeued messages
+ size_t get_dequeued() const {
+ return dequeued;
+ }
+};
+
+// singleton manager
+// note that the manager itself is not a singleton, and multiple instances may co-exist
+// TODO make the pointer atomic in allocation and deallocation to avoid race conditions
+static Manager* s_manager = nullptr;
+
+static const size_t MAX_CONNECTIONS_DEFAULT = 256;
+static const size_t MAX_INFLIGHT_DEFAULT = 8192;
+static const size_t MAX_QUEUE_DEFAULT = 8192;
+static const int READ_TIMEOUT_MS_DEFAULT = 500;
+
+bool init(CephContext* cct) {
+ if (s_manager) {
+ return false;
+ }
+ // TODO: take conf from CephContext
+ s_manager = new Manager(MAX_CONNECTIONS_DEFAULT, MAX_INFLIGHT_DEFAULT, MAX_QUEUE_DEFAULT, READ_TIMEOUT_MS_DEFAULT, cct);
+ return true;
+}
+
+void shutdown() {
+ delete s_manager;
+ s_manager = nullptr;
+}
+
+connection_ptr_t connect(const std::string& url, bool use_ssl, bool verify_ssl,
+ boost::optional<const std::string&> ca_location) {
+ if (!s_manager) return nullptr;
+ return s_manager->connect(url, use_ssl, verify_ssl, ca_location);
+}
+
+int publish(connection_ptr_t& conn,
+ const std::string& topic,
+ const std::string& message) {
+ if (!s_manager) return STATUS_MANAGER_STOPPED;
+ return s_manager->publish(conn, topic, message);
+}
+
+int publish_with_confirm(connection_ptr_t& conn,
+ const std::string& topic,
+ const std::string& message,
+ reply_callback_t cb) {
+ if (!s_manager) return STATUS_MANAGER_STOPPED;
+ return s_manager->publish_with_confirm(conn, topic, message, cb);
+}
+
+size_t get_connection_count() {
+ if (!s_manager) return 0;
+ return s_manager->get_connection_count();
+}
+
+size_t get_inflight() {
+ if (!s_manager) return 0;
+ return s_manager->get_inflight();
+}
+
+size_t get_queued() {
+ if (!s_manager) return 0;
+ return s_manager->get_queued();
+}
+
+size_t get_dequeued() {
+ if (!s_manager) return 0;
+ return s_manager->get_dequeued();
+}
+
+size_t get_max_connections() {
+ if (!s_manager) return MAX_CONNECTIONS_DEFAULT;
+ return s_manager->max_connections;
+}
+
+size_t get_max_inflight() {
+ if (!s_manager) return MAX_INFLIGHT_DEFAULT;
+ return s_manager->max_inflight;
+}
+
+size_t get_max_queue() {
+ if (!s_manager) return MAX_QUEUE_DEFAULT;
+ return s_manager->max_queue;
+}
+
+bool disconnect(connection_ptr_t& conn) {
+ if (!s_manager) return false;
+ return s_manager->disconnect(conn);
+}
+
+} // namespace kafka
+
diff --git a/src/rgw/rgw_kafka.h b/src/rgw/rgw_kafka.h
new file mode 100644
index 00000000..cccdd65b
--- /dev/null
+++ b/src/rgw/rgw_kafka.h
@@ -0,0 +1,81 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#pragma once
+
+#include <string>
+#include <functional>
+#include <boost/smart_ptr/intrusive_ptr.hpp>
+#include <boost/optional.hpp>
+
+class CephContext;
+
+namespace rgw::kafka {
+// forward declaration of connection object
+struct connection_t;
+
+typedef boost::intrusive_ptr<connection_t> connection_ptr_t;
+
+// required interfaces needed so that connection_t could be used inside boost::intrusive_ptr
+void intrusive_ptr_add_ref(const connection_t* p);
+void intrusive_ptr_release(const connection_t* p);
+
+// the reply callback is expected to get an integer parameter
+// indicating the result, and not to return anything
+typedef std::function<void(int)> reply_callback_t;
+
+// initialize the kafka manager
+bool init(CephContext* cct);
+
+// shutdown the kafka manager
+void shutdown();
+
+// connect to a kafka endpoint
+connection_ptr_t connect(const std::string& url, bool use_ssl, bool verify_ssl, boost::optional<const std::string&> ca_location);
+
+// publish a message over a connection that was already created
+int publish(connection_ptr_t& conn,
+ const std::string& topic,
+ const std::string& message);
+
+// publish a message over a connection that was already created
+// and pass a callback that will be invoked (async) when broker confirms
+// receiving the message
+int publish_with_confirm(connection_ptr_t& conn,
+ const std::string& topic,
+ const std::string& message,
+ reply_callback_t cb);
+
+// convert the integer status returned from the "publish" function to a string
+std::string status_to_string(int s);
+
+// number of connections
+size_t get_connection_count();
+
+// return the number of messages that were sent
+// to broker, but were not yet acked/nacked/timedout
+size_t get_inflight();
+
+// running counter of successfully queued messages
+size_t get_queued();
+
+// running counter of dequeued messages
+size_t get_dequeued();
+
+// number of maximum allowed connections
+size_t get_max_connections();
+
+// number of maximum allowed inflight messages
+size_t get_max_inflight();
+
+// maximum number of messages in the queue
+size_t get_max_queue();
+
+// disconnect from a kafka broker
+bool disconnect(connection_ptr_t& conn);
+
+// display connection as string
+std::string to_string(const connection_ptr_t& conn);
+
+}
+
diff --git a/src/rgw/rgw_keystone.cc b/src/rgw/rgw_keystone.cc
new file mode 100644
index 00000000..956ac1bc
--- /dev/null
+++ b/src/rgw/rgw_keystone.cc
@@ -0,0 +1,713 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <errno.h>
+#include <fnmatch.h>
+
+#include <boost/algorithm/string/predicate.hpp>
+#include <boost/algorithm/string.hpp>
+#include <fstream>
+
+#include "common/errno.h"
+#include "common/ceph_json.h"
+#include "include/types.h"
+#include "include/str_list.h"
+
+#include "rgw_common.h"
+#include "rgw_keystone.h"
+#include "common/ceph_crypto_cms.h"
+#include "common/armor.h"
+#include "common/Cond.h"
+#include "rgw_perf_counters.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+int rgw_open_cms_envelope(CephContext * const cct,
+ const std::string& src,
+ std::string& dst) /* out */
+{
+#define BEGIN_CMS "-----BEGIN CMS-----"
+#define END_CMS "-----END CMS-----"
+
+ int start = src.find(BEGIN_CMS);
+ if (start < 0) {
+ ldout(cct, 0) << "failed to find " << BEGIN_CMS << " in response" << dendl;
+ return -EINVAL;
+ }
+ start += sizeof(BEGIN_CMS) - 1;
+
+ int end = src.find(END_CMS);
+ if (end < 0) {
+ ldout(cct, 0) << "failed to find " << END_CMS << " in response" << dendl;
+ return -EINVAL;
+ }
+
+ string s = src.substr(start, end - start);
+
+ int pos = 0;
+
+ do {
+ int next = s.find('\n', pos);
+ if (next < 0) {
+ dst.append(s.substr(pos));
+ break;
+ } else {
+ dst.append(s.substr(pos, next - pos));
+ }
+ pos = next + 1;
+ } while (pos < (int)s.size());
+
+ return 0;
+}
+
+int rgw_decode_b64_cms(CephContext * const cct,
+ const string& signed_b64,
+ bufferlist& bl)
+{
+ bufferptr signed_ber(signed_b64.size() * 2);
+ char *dest = signed_ber.c_str();
+ const char *src = signed_b64.c_str();
+ size_t len = signed_b64.size();
+ char buf[len + 1];
+ buf[len] = '\0';
+
+ for (size_t i = 0; i < len; i++, src++) {
+ if (*src != '-') {
+ buf[i] = *src;
+ } else {
+ buf[i] = '/';
+ }
+ }
+
+ int ret = ceph_unarmor(dest, dest + signed_ber.length(), buf,
+ buf + signed_b64.size());
+ if (ret < 0) {
+ ldout(cct, 0) << "ceph_unarmor() failed, ret=" << ret << dendl;
+ return ret;
+ }
+
+ bufferlist signed_ber_bl;
+ signed_ber_bl.append(signed_ber);
+
+ ret = ceph_decode_cms(cct, signed_ber_bl, bl);
+ if (ret < 0) {
+ ldout(cct, 0) << "ceph_decode_cms returned " << ret << dendl;
+ return ret;
+ }
+
+ return 0;
+}
+
+#define PKI_ANS1_PREFIX "MII"
+
+bool rgw_is_pki_token(const string& token)
+{
+ return token.compare(0, sizeof(PKI_ANS1_PREFIX) - 1, PKI_ANS1_PREFIX) == 0;
+}
+
+void rgw_get_token_id(const string& token, string& token_id)
+{
+ if (!rgw_is_pki_token(token)) {
+ token_id = token;
+ return;
+ }
+
+ unsigned char m[CEPH_CRYPTO_MD5_DIGESTSIZE];
+
+ MD5 hash;
+ hash.Update((const unsigned char *)token.c_str(), token.size());
+ hash.Final(m);
+
+ char calc_md5[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 1];
+ buf_to_hex(m, CEPH_CRYPTO_MD5_DIGESTSIZE, calc_md5);
+ token_id = calc_md5;
+}
+
+bool rgw_decode_pki_token(CephContext * const cct,
+ const string& token,
+ bufferlist& bl)
+{
+ if (!rgw_is_pki_token(token)) {
+ return false;
+ }
+
+ int ret = rgw_decode_b64_cms(cct, token, bl);
+ if (ret < 0) {
+ return false;
+ }
+
+ ldout(cct, 20) << "successfully decoded pki token" << dendl;
+
+ return true;
+}
+
+
+namespace rgw {
+namespace keystone {
+
+ApiVersion CephCtxConfig::get_api_version() const noexcept
+{
+ switch (g_ceph_context->_conf->rgw_keystone_api_version) {
+ case 3:
+ return ApiVersion::VER_3;
+ case 2:
+ return ApiVersion::VER_2;
+ default:
+ dout(0) << "ERROR: wrong Keystone API version: "
+ << g_ceph_context->_conf->rgw_keystone_api_version
+ << "; falling back to v2" << dendl;
+ return ApiVersion::VER_2;
+ }
+}
+
+std::string CephCtxConfig::get_endpoint_url() const noexcept
+{
+ static const std::string url = g_ceph_context->_conf->rgw_keystone_url;
+
+ if (url.empty() || boost::algorithm::ends_with(url, "/")) {
+ return url;
+ } else {
+ static const std::string url_normalised = url + '/';
+ return url_normalised;
+ }
+}
+
+/* secrets */
+const std::string CephCtxConfig::empty{""};
+
+static inline std::string read_secret(const std::string& file_path)
+{
+ using namespace std;
+
+ constexpr int16_t size{1024};
+ char buf[size];
+ string s;
+
+ s.reserve(size);
+ ifstream ifs(file_path, ios::in | ios::binary);
+ if (ifs) {
+ while (true) {
+ auto sbuf = ifs.rdbuf();
+ auto len = sbuf->sgetn(buf, size);
+ if (!len)
+ break;
+ s.append(buf, len);
+ }
+ boost::algorithm::trim(s);
+ if (s.back() == '\n')
+ s.pop_back();
+ }
+ return s;
+}
+
+std::string CephCtxConfig::get_admin_token() const noexcept
+{
+ auto& atv = g_ceph_context->_conf->rgw_keystone_admin_token_path;
+ if (!atv.empty()) {
+ return read_secret(atv);
+ } else {
+ auto& atv = g_ceph_context->_conf->rgw_keystone_admin_token;
+ if (!atv.empty()) {
+ return atv;
+ }
+ }
+ return empty;
+}
+
+std::string CephCtxConfig::get_admin_password() const noexcept {
+ auto& apv = g_ceph_context->_conf->rgw_keystone_admin_password_path;
+ if (!apv.empty()) {
+ return read_secret(apv);
+ } else {
+ auto& apv = g_ceph_context->_conf->rgw_keystone_admin_password;
+ if (!apv.empty()) {
+ return apv;
+ }
+ }
+ return empty;
+}
+
+int Service::get_admin_token(CephContext* const cct,
+ TokenCache& token_cache,
+ const Config& config,
+ std::string& token)
+{
+ /* Let's check whether someone uses the deprecated "admin token" feauture
+ * based on a shared secret from keystone.conf file. */
+ const auto& admin_token = config.get_admin_token();
+ if (! admin_token.empty()) {
+ token = std::string(admin_token.data(), admin_token.length());
+ return 0;
+ }
+
+ TokenEnvelope t;
+
+ /* Try cache first before calling Keystone for a new admin token. */
+ if (token_cache.find_admin(t)) {
+ ldout(cct, 20) << "found cached admin token" << dendl;
+ token = t.token.id;
+ return 0;
+ }
+
+ /* Call Keystone now. */
+ const auto ret = issue_admin_token_request(cct, config, t);
+ if (! ret) {
+ token_cache.add_admin(t);
+ token = t.token.id;
+ }
+
+ return ret;
+}
+
+int Service::issue_admin_token_request(CephContext* const cct,
+ const Config& config,
+ TokenEnvelope& t)
+{
+ std::string token_url = config.get_endpoint_url();
+ if (token_url.empty()) {
+ return -EINVAL;
+ }
+
+ bufferlist token_bl;
+ RGWGetKeystoneAdminToken token_req(cct, "POST", "", &token_bl);
+ token_req.append_header("Content-Type", "application/json");
+ JSONFormatter jf;
+
+ const auto keystone_version = config.get_api_version();
+ if (keystone_version == ApiVersion::VER_2) {
+ AdminTokenRequestVer2 req_serializer(config);
+ req_serializer.dump(&jf);
+
+ std::stringstream ss;
+ jf.flush(ss);
+ token_req.set_post_data(ss.str());
+ token_req.set_send_length(ss.str().length());
+ token_url.append("v2.0/tokens");
+
+ } else if (keystone_version == ApiVersion::VER_3) {
+ AdminTokenRequestVer3 req_serializer(config);
+ req_serializer.dump(&jf);
+
+ std::stringstream ss;
+ jf.flush(ss);
+ token_req.set_post_data(ss.str());
+ token_req.set_send_length(ss.str().length());
+ token_url.append("v3/auth/tokens");
+ } else {
+ return -ENOTSUP;
+ }
+
+ token_req.set_url(token_url);
+
+ const int ret = token_req.process();
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Detect rejection earlier than during the token parsing step. */
+ if (token_req.get_http_status() ==
+ RGWGetKeystoneAdminToken::HTTP_STATUS_UNAUTHORIZED) {
+ return -EACCES;
+ }
+
+ if (t.parse(cct, token_req.get_subject_token(), token_bl,
+ keystone_version) != 0) {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int Service::get_keystone_barbican_token(CephContext * const cct,
+ std::string& token)
+{
+ using keystone_config_t = rgw::keystone::CephCtxConfig;
+ using keystone_cache_t = rgw::keystone::TokenCache;
+
+ auto& config = keystone_config_t::get_instance();
+ auto& token_cache = keystone_cache_t::get_instance<keystone_config_t>();
+
+ std::string token_url = config.get_endpoint_url();
+ if (token_url.empty()) {
+ return -EINVAL;
+ }
+
+ rgw::keystone::TokenEnvelope t;
+
+ /* Try cache first. */
+ if (token_cache.find_barbican(t)) {
+ ldout(cct, 20) << "found cached barbican token" << dendl;
+ token = t.token.id;
+ return 0;
+ }
+
+ bufferlist token_bl;
+ RGWKeystoneHTTPTransceiver token_req(cct, "POST", "", &token_bl);
+ token_req.append_header("Content-Type", "application/json");
+ JSONFormatter jf;
+
+ const auto keystone_version = config.get_api_version();
+ if (keystone_version == ApiVersion::VER_2) {
+ rgw::keystone::BarbicanTokenRequestVer2 req_serializer(cct);
+ req_serializer.dump(&jf);
+
+ std::stringstream ss;
+ jf.flush(ss);
+ token_req.set_post_data(ss.str());
+ token_req.set_send_length(ss.str().length());
+ token_url.append("v2.0/tokens");
+
+ } else if (keystone_version == ApiVersion::VER_3) {
+ BarbicanTokenRequestVer3 req_serializer(cct);
+ req_serializer.dump(&jf);
+
+ std::stringstream ss;
+ jf.flush(ss);
+ token_req.set_post_data(ss.str());
+ token_req.set_send_length(ss.str().length());
+ token_url.append("v3/auth/tokens");
+ } else {
+ return -ENOTSUP;
+ }
+
+ token_req.set_url(token_url);
+
+ ldout(cct, 20) << "Requesting secret from barbican url=" << token_url << dendl;
+ const int ret = token_req.process();
+ if (ret < 0) {
+ ldout(cct, 20) << "Barbican process error:" << token_bl.c_str() << dendl;
+ return ret;
+ }
+
+ /* Detect rejection earlier than during the token parsing step. */
+ if (token_req.get_http_status() ==
+ RGWKeystoneHTTPTransceiver::HTTP_STATUS_UNAUTHORIZED) {
+ return -EACCES;
+ }
+
+ if (t.parse(cct, token_req.get_subject_token(), token_bl,
+ keystone_version) != 0) {
+ return -EINVAL;
+ }
+
+ token_cache.add_barbican(t);
+ token = t.token.id;
+ return 0;
+}
+
+
+bool TokenEnvelope::has_role(const std::string& r) const
+{
+ list<Role>::const_iterator iter;
+ for (iter = roles.cbegin(); iter != roles.cend(); ++iter) {
+ if (fnmatch(r.c_str(), ((*iter).name.c_str()), 0) == 0) {
+ return true;
+ }
+ }
+ return false;
+}
+
+int TokenEnvelope::parse(CephContext* const cct,
+ const std::string& token_str,
+ ceph::bufferlist& bl,
+ const ApiVersion version)
+{
+ JSONParser parser;
+ if (! parser.parse(bl.c_str(), bl.length())) {
+ ldout(cct, 0) << "Keystone token parse error: malformed json" << dendl;
+ return -EINVAL;
+ }
+
+ JSONObjIter token_iter = parser.find_first("token");
+ JSONObjIter access_iter = parser.find_first("access");
+
+ try {
+ if (version == rgw::keystone::ApiVersion::VER_2) {
+ if (! access_iter.end()) {
+ decode_v2(*access_iter);
+ } else if (! token_iter.end()) {
+ /* TokenEnvelope structure doesn't follow Identity API v2, so let's
+ * fallback to v3. Otherwise we can assume it's wrongly formatted.
+ * The whole mechanism is a workaround for s3_token middleware that
+ * speaks in v2 disregarding the promise to go with v3. */
+ decode_v3(*token_iter);
+
+ /* Identity v3 conveys the token inforamtion not as a part of JSON but
+ * in the X-Subject-Token HTTP header we're getting from caller. */
+ token.id = token_str;
+ } else {
+ return -EINVAL;
+ }
+ } else if (version == rgw::keystone::ApiVersion::VER_3) {
+ if (! token_iter.end()) {
+ decode_v3(*token_iter);
+ /* v3 suceeded. We have to fill token.id from external input as it
+ * isn't a part of the JSON response anymore. It has been moved
+ * to X-Subject-Token HTTP header instead. */
+ token.id = token_str;
+ } else if (! access_iter.end()) {
+ /* If the token cannot be parsed according to V3, try V2. */
+ decode_v2(*access_iter);
+ } else {
+ return -EINVAL;
+ }
+ } else {
+ return -ENOTSUP;
+ }
+ } catch (JSONDecoder::err& err) {
+ ldout(cct, 0) << "Keystone token parse error: " << err.message << dendl;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+bool TokenCache::find(const std::string& token_id,
+ rgw::keystone::TokenEnvelope& token)
+{
+ Mutex::Locker l(lock);
+ return find_locked(token_id, token);
+}
+
+bool TokenCache::find_locked(const std::string& token_id,
+ rgw::keystone::TokenEnvelope& token)
+{
+ ceph_assert(lock.is_locked_by_me());
+ map<string, token_entry>::iterator iter = tokens.find(token_id);
+ if (iter == tokens.end()) {
+ if (perfcounter) perfcounter->inc(l_rgw_keystone_token_cache_miss);
+ return false;
+ }
+
+ token_entry& entry = iter->second;
+ tokens_lru.erase(entry.lru_iter);
+
+ if (entry.token.expired()) {
+ tokens.erase(iter);
+ if (perfcounter) perfcounter->inc(l_rgw_keystone_token_cache_hit);
+ return false;
+ }
+ token = entry.token;
+
+ tokens_lru.push_front(token_id);
+ entry.lru_iter = tokens_lru.begin();
+
+ if (perfcounter) perfcounter->inc(l_rgw_keystone_token_cache_hit);
+
+ return true;
+}
+
+bool TokenCache::find_admin(rgw::keystone::TokenEnvelope& token)
+{
+ Mutex::Locker l(lock);
+
+ return find_locked(admin_token_id, token);
+}
+
+bool TokenCache::find_barbican(rgw::keystone::TokenEnvelope& token)
+{
+ Mutex::Locker l(lock);
+
+ return find_locked(barbican_token_id, token);
+}
+
+void TokenCache::add(const std::string& token_id,
+ const rgw::keystone::TokenEnvelope& token)
+{
+ Mutex::Locker l(lock);
+ add_locked(token_id, token);
+}
+
+void TokenCache::add_locked(const std::string& token_id,
+ const rgw::keystone::TokenEnvelope& token)
+{
+ ceph_assert(lock.is_locked_by_me());
+ map<string, token_entry>::iterator iter = tokens.find(token_id);
+ if (iter != tokens.end()) {
+ token_entry& e = iter->second;
+ tokens_lru.erase(e.lru_iter);
+ }
+
+ tokens_lru.push_front(token_id);
+ token_entry& entry = tokens[token_id];
+ entry.token = token;
+ entry.lru_iter = tokens_lru.begin();
+
+ while (tokens_lru.size() > max) {
+ list<string>::reverse_iterator riter = tokens_lru.rbegin();
+ iter = tokens.find(*riter);
+ ceph_assert(iter != tokens.end());
+ tokens.erase(iter);
+ tokens_lru.pop_back();
+ }
+}
+
+void TokenCache::add_admin(const rgw::keystone::TokenEnvelope& token)
+{
+ Mutex::Locker l(lock);
+
+ rgw_get_token_id(token.token.id, admin_token_id);
+ add_locked(admin_token_id, token);
+}
+
+void TokenCache::add_barbican(const rgw::keystone::TokenEnvelope& token)
+{
+ Mutex::Locker l(lock);
+
+ rgw_get_token_id(token.token.id, barbican_token_id);
+ add_locked(barbican_token_id, token);
+}
+
+void TokenCache::invalidate(const std::string& token_id)
+{
+ Mutex::Locker l(lock);
+ map<string, token_entry>::iterator iter = tokens.find(token_id);
+ if (iter == tokens.end())
+ return;
+
+ ldout(cct, 20) << "invalidating revoked token id=" << token_id << dendl;
+ token_entry& e = iter->second;
+ tokens_lru.erase(e.lru_iter);
+ tokens.erase(iter);
+}
+
+int TokenCache::RevokeThread::check_revoked()
+{
+ std::string url;
+ std::string token;
+
+ bufferlist bl;
+ RGWGetRevokedTokens req(cct, "GET", "", &bl);
+
+ if (rgw::keystone::Service::get_admin_token(cct, *cache, config, token) < 0) {
+ return -EINVAL;
+ }
+
+ url = config.get_endpoint_url();
+ if (url.empty()) {
+ return -EINVAL;
+ }
+
+ req.append_header("X-Auth-Token", token);
+
+ const auto keystone_version = config.get_api_version();
+ if (keystone_version == rgw::keystone::ApiVersion::VER_2) {
+ url.append("v2.0/tokens/revoked");
+ } else if (keystone_version == rgw::keystone::ApiVersion::VER_3) {
+ url.append("v3/auth/tokens/OS-PKI/revoked");
+ }
+
+ req.set_url(url);
+
+ req.set_send_length(0);
+ int ret = req.process();
+ if (ret < 0) {
+ return ret;
+ }
+
+ bl.append((char)0); // NULL terminate for debug output
+
+ ldout(cct, 10) << "request returned " << bl.c_str() << dendl;
+
+ JSONParser parser;
+
+ if (!parser.parse(bl.c_str(), bl.length())) {
+ ldout(cct, 0) << "malformed json" << dendl;
+ return -EINVAL;
+ }
+
+ JSONObjIter iter = parser.find_first("signed");
+ if (iter.end()) {
+ ldout(cct, 0) << "revoked tokens response is missing signed section" << dendl;
+ return -EINVAL;
+ }
+
+ JSONObj *signed_obj = *iter;
+ const std::string signed_str = signed_obj->get_data();
+
+ ldout(cct, 10) << "signed=" << signed_str << dendl;
+
+ std::string signed_b64;
+ ret = rgw_open_cms_envelope(cct, signed_str, signed_b64);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ldout(cct, 10) << "content=" << signed_b64 << dendl;
+
+ bufferlist json;
+ ret = rgw_decode_b64_cms(cct, signed_b64, json);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ldout(cct, 10) << "ceph_decode_cms: decoded: " << json.c_str() << dendl;
+
+ JSONParser list_parser;
+ if (!list_parser.parse(json.c_str(), json.length())) {
+ ldout(cct, 0) << "malformed json" << dendl;
+ return -EINVAL;
+ }
+
+ JSONObjIter revoked_iter = list_parser.find_first("revoked");
+ if (revoked_iter.end()) {
+ ldout(cct, 0) << "no revoked section in json" << dendl;
+ return -EINVAL;
+ }
+
+ JSONObj *revoked_obj = *revoked_iter;
+
+ JSONObjIter tokens_iter = revoked_obj->find_first();
+ for (; !tokens_iter.end(); ++tokens_iter) {
+ JSONObj *o = *tokens_iter;
+
+ JSONObj *token = o->find_obj("id");
+ if (!token) {
+ ldout(cct, 0) << "bad token in array, missing id" << dendl;
+ continue;
+ }
+
+ const std::string token_id = token->get_data();
+ cache->invalidate(token_id);
+ }
+
+ return 0;
+}
+
+bool TokenCache::going_down() const
+{
+ return down_flag;
+}
+
+void* TokenCache::RevokeThread::entry()
+{
+ do {
+ ldout(cct, 2) << "keystone revoke thread: start" << dendl;
+ int r = check_revoked();
+ if (r < 0) {
+ ldout(cct, 0) << "ERROR: keystone revocation processing returned error r="
+ << r << dendl;
+ }
+
+ if (cache->going_down()) {
+ break;
+ }
+
+ lock.Lock();
+ cond.WaitInterval(lock,
+ utime_t(cct->_conf->rgw_keystone_revocation_interval, 0));
+ lock.Unlock();
+ } while (!cache->going_down());
+
+ return nullptr;
+}
+
+void TokenCache::RevokeThread::stop()
+{
+ Mutex::Locker l(lock);
+ cond.Signal();
+}
+
+}; /* namespace keystone */
+}; /* namespace rgw */
diff --git a/src/rgw/rgw_keystone.h b/src/rgw/rgw_keystone.h
new file mode 100644
index 00000000..55ad2f94
--- /dev/null
+++ b/src/rgw/rgw_keystone.h
@@ -0,0 +1,373 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_KEYSTONE_H
+#define CEPH_RGW_KEYSTONE_H
+
+#include <type_traits>
+
+#include <boost/optional.hpp>
+#include <boost/utility/string_ref.hpp>
+
+#include "rgw_common.h"
+#include "rgw_http_client.h"
+#include "common/Cond.h"
+#include "global/global_init.h"
+
+#include <atomic>
+
+int rgw_open_cms_envelope(CephContext *cct,
+ const std::string& src,
+ std::string& dst); /* out */
+int rgw_decode_b64_cms(CephContext *cct,
+ const string& signed_b64,
+ bufferlist& bl);
+bool rgw_is_pki_token(const string& token);
+void rgw_get_token_id(const string& token, string& token_id);
+static inline std::string rgw_get_token_id(const string& token)
+{
+ std::string token_id;
+ rgw_get_token_id(token, token_id);
+
+ return token_id;
+}
+bool rgw_decode_pki_token(CephContext *cct,
+ const string& token,
+ bufferlist& bl);
+
+namespace rgw {
+namespace keystone {
+
+enum class ApiVersion {
+ VER_2,
+ VER_3
+};
+
+
+class Config {
+protected:
+ Config() = default;
+ virtual ~Config() = default;
+
+public:
+ virtual std::string get_endpoint_url() const noexcept = 0;
+ virtual ApiVersion get_api_version() const noexcept = 0;
+
+ virtual std::string get_admin_token() const noexcept = 0;
+ virtual boost::string_ref get_admin_user() const noexcept = 0;
+ virtual std::string get_admin_password() const noexcept = 0;
+ virtual boost::string_ref get_admin_tenant() const noexcept = 0;
+ virtual boost::string_ref get_admin_project() const noexcept = 0;
+ virtual boost::string_ref get_admin_domain() const noexcept = 0;
+};
+
+class CephCtxConfig : public Config {
+protected:
+ CephCtxConfig() = default;
+ virtual ~CephCtxConfig() = default;
+
+ const static std::string empty;
+
+public:
+ static CephCtxConfig& get_instance() {
+ static CephCtxConfig instance;
+ return instance;
+ }
+
+ std::string get_endpoint_url() const noexcept override;
+ ApiVersion get_api_version() const noexcept override;
+
+ std::string get_admin_token() const noexcept override;
+
+ boost::string_ref get_admin_user() const noexcept override {
+ return g_ceph_context->_conf->rgw_keystone_admin_user;
+ }
+
+ std::string get_admin_password() const noexcept override;
+
+ boost::string_ref get_admin_tenant() const noexcept override {
+ return g_ceph_context->_conf->rgw_keystone_admin_tenant;
+ }
+
+ boost::string_ref get_admin_project() const noexcept override {
+ return g_ceph_context->_conf->rgw_keystone_admin_project;
+ }
+
+ boost::string_ref get_admin_domain() const noexcept override {
+ return g_ceph_context->_conf->rgw_keystone_admin_domain;
+ }
+};
+
+
+class TokenEnvelope;
+class TokenCache;
+
+class Service {
+public:
+ class RGWKeystoneHTTPTransceiver : public RGWHTTPTransceiver {
+ public:
+ RGWKeystoneHTTPTransceiver(CephContext * const cct,
+ const string& method,
+ const string& url,
+ bufferlist * const token_body_bl)
+ : RGWHTTPTransceiver(cct, method, url, token_body_bl,
+ cct->_conf->rgw_keystone_verify_ssl,
+ { "X-Subject-Token" }) {
+ }
+
+ const header_value_t& get_subject_token() const {
+ try {
+ return get_header_value("X-Subject-Token");
+ } catch (std::out_of_range&) {
+ static header_value_t empty_val;
+ return empty_val;
+ }
+ }
+ };
+
+ typedef RGWKeystoneHTTPTransceiver RGWValidateKeystoneToken;
+ typedef RGWKeystoneHTTPTransceiver RGWGetKeystoneAdminToken;
+ typedef RGWKeystoneHTTPTransceiver RGWGetRevokedTokens;
+
+ static int get_admin_token(CephContext* const cct,
+ TokenCache& token_cache,
+ const Config& config,
+ std::string& token);
+ static int issue_admin_token_request(CephContext* const cct,
+ const Config& config,
+ TokenEnvelope& token);
+ static int get_keystone_barbican_token(CephContext * const cct,
+ std::string& token);
+};
+
+
+class TokenEnvelope {
+public:
+ class Domain {
+ public:
+ string id;
+ string name;
+ void decode_json(JSONObj *obj);
+ };
+ class Project {
+ public:
+ Domain domain;
+ string id;
+ string name;
+ void decode_json(JSONObj *obj);
+ };
+
+ class Token {
+ public:
+ Token() : expires(0) { }
+ string id;
+ time_t expires;
+ Project tenant_v2;
+ void decode_json(JSONObj *obj);
+ };
+
+ class Role {
+ public:
+ string id;
+ string name;
+ void decode_json(JSONObj *obj);
+ };
+
+ class User {
+ public:
+ string id;
+ string name;
+ Domain domain;
+ list<Role> roles_v2;
+ void decode_json(JSONObj *obj);
+ };
+
+ Token token;
+ Project project;
+ User user;
+ list<Role> roles;
+
+ void decode_v3(JSONObj* obj);
+ void decode_v2(JSONObj* obj);
+
+public:
+ /* We really need the default ctor because of the internals of TokenCache. */
+ TokenEnvelope() = default;
+
+ time_t get_expires() const { return token.expires; }
+ const std::string& get_domain_id() const {return project.domain.id;};
+ const std::string& get_domain_name() const {return project.domain.name;};
+ const std::string& get_project_id() const {return project.id;};
+ const std::string& get_project_name() const {return project.name;};
+ const std::string& get_user_id() const {return user.id;};
+ const std::string& get_user_name() const {return user.name;};
+ bool has_role(const string& r) const;
+ bool expired() const {
+ const uint64_t now = ceph_clock_now().sec();
+ return now >= static_cast<uint64_t>(get_expires());
+ }
+ int parse(CephContext* cct,
+ const std::string& token_str,
+ ceph::buffer::list& bl /* in */,
+ ApiVersion version);
+};
+
+
+class TokenCache {
+ struct token_entry {
+ TokenEnvelope token;
+ list<string>::iterator lru_iter;
+ };
+
+ std::atomic<bool> down_flag = { false };
+
+ class RevokeThread : public Thread {
+ friend class TokenCache;
+ typedef RGWPostHTTPData RGWGetRevokedTokens;
+
+ CephContext* const cct;
+ TokenCache* const cache;
+ const rgw::keystone::Config& config;
+
+ Mutex lock;
+ Cond cond;
+
+ RevokeThread(CephContext* const cct,
+ TokenCache* const cache,
+ const rgw::keystone::Config& config)
+ : cct(cct),
+ cache(cache),
+ config(config),
+ lock("rgw::keystone::TokenCache::RevokeThread") {
+ }
+
+ void *entry() override;
+ void stop();
+ int check_revoked();
+ } revocator;
+
+ const boost::intrusive_ptr<CephContext> cct;
+
+ std::string admin_token_id;
+ std::string barbican_token_id;
+ std::map<std::string, token_entry> tokens;
+ std::list<std::string> tokens_lru;
+
+ Mutex lock;
+
+ const size_t max;
+
+ explicit TokenCache(const rgw::keystone::Config& config)
+ : revocator(g_ceph_context, this, config),
+ cct(g_ceph_context),
+ lock("rgw::keystone::TokenCache"),
+ max(cct->_conf->rgw_keystone_token_cache_size) {
+ /* revocation logic needs to be smarter, but meanwhile,
+ * make it optional.
+ * see http://tracker.ceph.com/issues/9493
+ * http://tracker.ceph.com/issues/19499
+ */
+ if (cct->_conf->rgw_keystone_revocation_interval > 0
+ && cct->_conf->rgw_keystone_token_cache_size ) {
+ /* The thread name has been kept for backward compliance. */
+ revocator.create("rgw_swift_k_rev");
+ }
+ }
+
+ ~TokenCache() {
+ down_flag = true;
+
+ // Only stop and join if revocator thread is started.
+ if (revocator.is_started()) {
+ revocator.stop();
+ revocator.join();
+ }
+ }
+
+public:
+ TokenCache(const TokenCache&) = delete;
+ void operator=(const TokenCache&) = delete;
+
+ template<class ConfigT>
+ static TokenCache& get_instance() {
+ static_assert(std::is_base_of<rgw::keystone::Config, ConfigT>::value,
+ "ConfigT must be a subclass of rgw::keystone::Config");
+
+ /* In C++11 this is thread safe. */
+ static TokenCache instance(ConfigT::get_instance());
+ return instance;
+ }
+
+ bool find(const std::string& token_id, TokenEnvelope& token);
+ boost::optional<TokenEnvelope> find(const std::string& token_id) {
+ TokenEnvelope token_envlp;
+ if (find(token_id, token_envlp)) {
+ return token_envlp;
+ }
+ return boost::none;
+ }
+ bool find_admin(TokenEnvelope& token);
+ bool find_barbican(TokenEnvelope& token);
+ void add(const std::string& token_id, const TokenEnvelope& token);
+ void add_admin(const TokenEnvelope& token);
+ void add_barbican(const TokenEnvelope& token);
+ void invalidate(const std::string& token_id);
+ bool going_down() const;
+private:
+ void add_locked(const std::string& token_id, const TokenEnvelope& token);
+ bool find_locked(const std::string& token_id, TokenEnvelope& token);
+
+};
+
+
+class AdminTokenRequest {
+public:
+ virtual ~AdminTokenRequest() = default;
+ virtual void dump(Formatter* f) const = 0;
+};
+
+class AdminTokenRequestVer2 : public AdminTokenRequest {
+ const Config& conf;
+
+public:
+ explicit AdminTokenRequestVer2(const Config& conf)
+ : conf(conf) {
+ }
+ void dump(Formatter *f) const override;
+};
+
+class AdminTokenRequestVer3 : public AdminTokenRequest {
+ const Config& conf;
+
+public:
+ explicit AdminTokenRequestVer3(const Config& conf)
+ : conf(conf) {
+ }
+ void dump(Formatter *f) const override;
+};
+
+class BarbicanTokenRequestVer2 : public AdminTokenRequest {
+ CephContext *cct;
+
+public:
+ explicit BarbicanTokenRequestVer2(CephContext * const _cct)
+ : cct(_cct) {
+ }
+ void dump(Formatter *f) const override;
+};
+
+class BarbicanTokenRequestVer3 : public AdminTokenRequest {
+ CephContext *cct;
+
+public:
+ explicit BarbicanTokenRequestVer3(CephContext * const _cct)
+ : cct(_cct) {
+ }
+ void dump(Formatter *f) const override;
+};
+
+
+}; /* namespace keystone */
+}; /* namespace rgw */
+
+#endif
diff --git a/src/rgw/rgw_lc.cc b/src/rgw/rgw_lc.cc
new file mode 100644
index 00000000..eeb14be1
--- /dev/null
+++ b/src/rgw/rgw_lc.cc
@@ -0,0 +1,1678 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <string.h>
+#include <iostream>
+#include <map>
+
+#include <boost/algorithm/string/split.hpp>
+#include <boost/algorithm/string.hpp>
+#include <boost/algorithm/string/predicate.hpp>
+
+#include "common/Formatter.h"
+#include <common/errno.h>
+#include "include/random.h"
+#include "cls/rgw/cls_rgw_client.h"
+#include "cls/lock/cls_lock_client.h"
+#include "rgw_common.h"
+#include "rgw_bucket.h"
+#include "rgw_lc.h"
+#include "rgw_zone.h"
+#include "rgw_string.h"
+
+// this seems safe to use, at least for now--arguably, we should
+// prefer header-only fmt, in general
+#undef FMT_HEADER_ONLY
+#define FMT_HEADER_ONLY 1
+#include "seastar/fmt/include/fmt/format.h"
+
+#include "services/svc_sys_obj.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+const char* LC_STATUS[] = {
+ "UNINITIAL",
+ "PROCESSING",
+ "FAILED",
+ "COMPLETE"
+};
+
+using namespace librados;
+
+bool LCRule::valid() const
+{
+ if (id.length() > MAX_ID_LEN) {
+ return false;
+ }
+ else if(expiration.empty() && noncur_expiration.empty() && mp_expiration.empty() && !dm_expiration &&
+ transitions.empty() && noncur_transitions.empty()) {
+ return false;
+ }
+ else if (!expiration.valid() || !noncur_expiration.valid() || !mp_expiration.valid()) {
+ return false;
+ }
+ if (!transitions.empty()) {
+ bool using_days = expiration.has_days();
+ bool using_date = expiration.has_date();
+ for (const auto& elem : transitions) {
+ if (!elem.second.valid()) {
+ return false;
+ }
+ using_days = using_days || elem.second.has_days();
+ using_date = using_date || elem.second.has_date();
+ if (using_days && using_date) {
+ return false;
+ }
+ }
+ }
+ for (const auto& elem : noncur_transitions) {
+ if (!elem.second.valid()) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+void LCRule::init_simple_days_rule(std::string_view _id, std::string_view _prefix, int num_days)
+{
+ id = _id;
+ prefix = _prefix;
+ char buf[32];
+ snprintf(buf, sizeof(buf), "%d", num_days);
+ expiration.set_days(buf);
+ set_enabled(true);
+}
+
+void RGWLifecycleConfiguration::add_rule(const LCRule& rule)
+{
+ auto& id = rule.get_id(); // note that this will return false for groups, but that's ok, we won't search groups
+ rule_map.insert(pair<string, LCRule>(id, rule));
+}
+
+bool RGWLifecycleConfiguration::_add_rule(const LCRule& rule)
+{
+ lc_op op(rule.get_id());
+ op.status = rule.is_enabled();
+ if (rule.get_expiration().has_days()) {
+ op.expiration = rule.get_expiration().get_days();
+ }
+ if (rule.get_expiration().has_date()) {
+ op.expiration_date = ceph::from_iso_8601(rule.get_expiration().get_date());
+ }
+ if (rule.get_noncur_expiration().has_days()) {
+ op.noncur_expiration = rule.get_noncur_expiration().get_days();
+ }
+ if (rule.get_mp_expiration().has_days()) {
+ op.mp_expiration = rule.get_mp_expiration().get_days();
+ }
+ op.dm_expiration = rule.get_dm_expiration();
+ for (const auto &elem : rule.get_transitions()) {
+ transition_action action;
+ if (elem.second.has_days()) {
+ action.days = elem.second.get_days();
+ } else {
+ action.date = ceph::from_iso_8601(elem.second.get_date());
+ }
+ action.storage_class = rgw_placement_rule::get_canonical_storage_class(elem.first);
+ op.transitions.emplace(elem.first, std::move(action));
+ }
+ for (const auto &elem : rule.get_noncur_transitions()) {
+ transition_action action;
+ action.days = elem.second.get_days();
+ action.date = ceph::from_iso_8601(elem.second.get_date());
+ action.storage_class = elem.first;
+ op.noncur_transitions.emplace(elem.first, std::move(action));
+ }
+ std::string prefix;
+ if (rule.get_filter().has_prefix()){
+ prefix = rule.get_filter().get_prefix();
+ } else {
+ prefix = rule.get_prefix();
+ }
+
+ if (rule.get_filter().has_tags()){
+ op.obj_tags = rule.get_filter().get_tags();
+ }
+ prefix_map.emplace(std::move(prefix), std::move(op));
+ return true;
+}
+
+int RGWLifecycleConfiguration::check_and_add_rule(const LCRule& rule)
+{
+ if (!rule.valid()) {
+ return -EINVAL;
+ }
+ auto& id = rule.get_id();
+ if (rule_map.find(id) != rule_map.end()) { //id shouldn't be the same
+ return -EINVAL;
+ }
+ rule_map.insert(pair<string, LCRule>(id, rule));
+
+ if (!_add_rule(rule)) {
+ return -ERR_INVALID_REQUEST;
+ }
+ return 0;
+}
+
+bool RGWLifecycleConfiguration::has_same_action(const lc_op& first, const lc_op& second) {
+ if ((first.expiration > 0 || first.expiration_date != boost::none) &&
+ (second.expiration > 0 || second.expiration_date != boost::none)) {
+ return true;
+ } else if (first.noncur_expiration > 0 && second.noncur_expiration > 0) {
+ return true;
+ } else if (first.mp_expiration > 0 && second.mp_expiration > 0) {
+ return true;
+ } else if (!first.transitions.empty() && !second.transitions.empty()) {
+ for (auto &elem : first.transitions) {
+ if (second.transitions.find(elem.first) != second.transitions.end()) {
+ return true;
+ }
+ }
+ } else if (!first.noncur_transitions.empty() && !second.noncur_transitions.empty()) {
+ for (auto &elem : first.noncur_transitions) {
+ if (second.noncur_transitions.find(elem.first) != second.noncur_transitions.end()) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/* Formerly, this method checked for duplicate rules using an invalid
+ * method (prefix uniqueness). */
+bool RGWLifecycleConfiguration::valid()
+{
+ return true;
+}
+
+void *RGWLC::LCWorker::entry() {
+ do {
+ utime_t start = ceph_clock_now();
+ if (should_work(start)) {
+ ldpp_dout(dpp, 2) << "life cycle: start" << dendl;
+ int r = lc->process();
+ if (r < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: do life cycle process() returned error r=" << r << dendl;
+ }
+ ldpp_dout(dpp, 2) << "life cycle: stop" << dendl;
+ }
+ if (lc->going_down())
+ break;
+
+ utime_t end = ceph_clock_now();
+ int secs = schedule_next_start_time(start, end);
+ utime_t next;
+ next.set_from_double(end + secs);
+
+ ldpp_dout(dpp, 5) << "schedule life cycle next start time: " << rgw_to_asctime(next) << dendl;
+
+ lock.Lock();
+ cond.WaitInterval(lock, utime_t(secs, 0));
+ lock.Unlock();
+ } while (!lc->going_down());
+
+ return NULL;
+}
+
+void RGWLC::initialize(CephContext *_cct, RGWRados *_store) {
+ cct = _cct;
+ store = _store;
+ max_objs = cct->_conf->rgw_lc_max_objs;
+ if (max_objs > HASH_PRIME)
+ max_objs = HASH_PRIME;
+
+ obj_names = new string[max_objs];
+
+ for (int i = 0; i < max_objs; i++) {
+ obj_names[i] = lc_oid_prefix;
+ char buf[32];
+ snprintf(buf, 32, ".%d", i);
+ obj_names[i].append(buf);
+ }
+
+#define COOKIE_LEN 16
+ char cookie_buf[COOKIE_LEN + 1];
+ gen_rand_alphanumeric(cct, cookie_buf, sizeof(cookie_buf) - 1);
+ cookie = cookie_buf;
+}
+
+void RGWLC::finalize()
+{
+ delete[] obj_names;
+}
+
+bool RGWLC::if_already_run_today(time_t& start_date)
+{
+ struct tm bdt;
+ time_t begin_of_day;
+ utime_t now = ceph_clock_now();
+ localtime_r(&start_date, &bdt);
+
+ if (cct->_conf->rgw_lc_debug_interval > 0) {
+ if (now - start_date < cct->_conf->rgw_lc_debug_interval)
+ return true;
+ else
+ return false;
+ }
+
+ bdt.tm_hour = 0;
+ bdt.tm_min = 0;
+ bdt.tm_sec = 0;
+ begin_of_day = mktime(&bdt);
+ if (now - begin_of_day < 24*60*60)
+ return true;
+ else
+ return false;
+}
+
+int RGWLC::bucket_lc_prepare(int index)
+{
+ map<string, int > entries;
+
+ string marker;
+
+#define MAX_LC_LIST_ENTRIES 100
+ do {
+ int ret = cls_rgw_lc_list(store->lc_pool_ctx, obj_names[index], marker, MAX_LC_LIST_ENTRIES, entries);
+ if (ret < 0)
+ return ret;
+ map<string, int>::iterator iter;
+ for (iter = entries.begin(); iter != entries.end(); ++iter) {
+ pair<string, int > entry(iter->first, lc_uninitial);
+ ret = cls_rgw_lc_set_entry(store->lc_pool_ctx, obj_names[index], entry);
+ if (ret < 0) {
+ ldpp_dout(this, 0) << "RGWLC::bucket_lc_prepare() failed to set entry on "
+ << obj_names[index] << dendl;
+ return ret;
+ }
+ }
+
+ if (!entries.empty()) {
+ marker = std::move(entries.rbegin()->first);
+ }
+ } while (!entries.empty());
+
+ return 0;
+}
+
+static bool obj_has_expired(CephContext *cct, ceph::real_time mtime, int days, ceph::real_time *expire_time = nullptr)
+{
+ double timediff, cmp;
+ utime_t base_time;
+ if (cct->_conf->rgw_lc_debug_interval <= 0) {
+ /* Normal case, run properly */
+ cmp = double(days)*24*60*60;
+ base_time = ceph_clock_now().round_to_day();
+ } else {
+ /* We're in debug mode; Treat each rgw_lc_debug_interval seconds as a day */
+ cmp = double(days)*cct->_conf->rgw_lc_debug_interval;
+ base_time = ceph_clock_now();
+ }
+ timediff = base_time - ceph::real_clock::to_time_t(mtime);
+
+ if (expire_time) {
+ *expire_time = mtime + make_timespan(cmp);
+ }
+ ldout(cct, 20) << __func__ << "(): mtime=" << mtime << " days=" << days << " base_time=" << base_time << " timediff=" << timediff << " cmp=" << cmp << dendl;
+
+ return (timediff >= cmp);
+}
+
+static bool pass_object_lock_check(RGWRados *store, RGWBucketInfo& bucket_info, rgw_obj& obj, RGWObjectCtx& ctx)
+{
+ if (!bucket_info.obj_lock_enabled()) {
+ return true;
+ }
+ RGWRados::Object op_target(store, bucket_info, ctx, obj);
+ RGWRados::Object::Read read_op(&op_target);
+ map<string, bufferlist> attrs;
+ read_op.params.attrs = &attrs;
+ int ret = read_op.prepare();
+ if (ret < 0) {
+ if (ret == -ENOENT) {
+ return true;
+ } else {
+ return false;
+ }
+ } else {
+ auto iter = attrs.find(RGW_ATTR_OBJECT_RETENTION);
+ if (iter != attrs.end()) {
+ RGWObjectRetention retention;
+ try {
+ decode(retention, iter->second);
+ } catch (buffer::error& err) {
+ ldout(store->ctx(), 0) << "ERROR: failed to decode RGWObjectRetention" << dendl;
+ return false;
+ }
+ if (ceph::real_clock::to_time_t(retention.get_retain_until_date()) > ceph_clock_now()) {
+ return false;
+ }
+ }
+ iter = attrs.find(RGW_ATTR_OBJECT_LEGAL_HOLD);
+ if (iter != attrs.end()) {
+ RGWObjectLegalHold obj_legal_hold;
+ try {
+ decode(obj_legal_hold, iter->second);
+ } catch (buffer::error& err) {
+ ldout(store->ctx(), 0) << "ERROR: failed to decode RGWObjectLegalHold" << dendl;
+ return false;
+ }
+ if (obj_legal_hold.is_enabled()) {
+ return false;
+ }
+ }
+ return true;
+ }
+}
+
+int RGWLC::handle_multipart_expiration(
+ RGWRados::Bucket *target, const multimap<string, lc_op>& prefix_map)
+{
+ MultipartMetaFilter mp_filter;
+ vector<rgw_bucket_dir_entry> objs;
+ RGWMPObj mp_obj;
+ bool is_truncated;
+ int ret;
+ RGWBucketInfo& bucket_info = target->get_bucket_info();
+ RGWRados::Bucket::List list_op(target);
+ auto delay_ms = cct->_conf.get_val<int64_t>("rgw_lc_thread_delay");
+ list_op.params.list_versions = false;
+ /* lifecycle processing does not depend on total order, so can
+ * take advantage of unorderd listing optimizations--such as
+ * operating on one shard at a time */
+ list_op.params.allow_unordered = true;
+ list_op.params.ns = RGW_OBJ_NS_MULTIPART;
+ list_op.params.filter = &mp_filter;
+ for (auto prefix_iter = prefix_map.begin(); prefix_iter != prefix_map.end(); ++prefix_iter) {
+ if (!prefix_iter->second.status || prefix_iter->second.mp_expiration <= 0) {
+ continue;
+ }
+ list_op.params.prefix = prefix_iter->first;
+ do {
+ objs.clear();
+ list_op.params.marker = list_op.get_next_marker();
+ ret = list_op.list_objects(1000, &objs, NULL, &is_truncated);
+ if (ret < 0) {
+ if (ret == (-ENOENT))
+ return 0;
+ ldpp_dout(this, 0) << "ERROR: store->list_objects():" <<dendl;
+ return ret;
+ }
+
+ for (auto obj_iter = objs.begin(); obj_iter != objs.end(); ++obj_iter) {
+ if (obj_has_expired(cct, obj_iter->meta.mtime, prefix_iter->second.mp_expiration)) {
+ rgw_obj_key key(obj_iter->key);
+ if (!mp_obj.from_meta(key.name)) {
+ continue;
+ }
+ RGWObjectCtx rctx(store);
+ ret = abort_multipart_upload(store, cct, &rctx, bucket_info, mp_obj);
+ if (ret < 0 && ret != -ERR_NO_SUCH_UPLOAD) {
+ ldpp_dout(this, 0) << "ERROR: abort_multipart_upload failed, ret=" << ret << ", meta:" << obj_iter->key << dendl;
+ } else if (ret == -ERR_NO_SUCH_UPLOAD) {
+ ldpp_dout(this, 5) << "ERROR: abort_multipart_upload failed, ret=" << ret << ", meta:" << obj_iter->key << dendl;
+ }
+ if (going_down())
+ return 0;
+ }
+ } /* for objs */
+ std::this_thread::sleep_for(std::chrono::milliseconds(delay_ms));
+ } while(is_truncated);
+ }
+ return 0;
+}
+
+static int read_obj_tags(RGWRados *store, RGWBucketInfo& bucket_info, rgw_obj& obj, RGWObjectCtx& ctx, bufferlist& tags_bl)
+{
+ RGWRados::Object op_target(store, bucket_info, ctx, obj);
+ RGWRados::Object::Read read_op(&op_target);
+
+ return read_op.get_attr(RGW_ATTR_TAGS, tags_bl);
+}
+
+static bool is_valid_op(const lc_op& op)
+{
+ return (op.status &&
+ (op.expiration > 0
+ || op.expiration_date != boost::none
+ || op.noncur_expiration > 0
+ || op.dm_expiration
+ || !op.transitions.empty()
+ || !op.noncur_transitions.empty()));
+}
+
+static inline bool has_all_tags(const lc_op& rule_action,
+ const RGWObjTags& object_tags)
+{
+ if(! rule_action.obj_tags)
+ return false;
+ if(object_tags.count() < rule_action.obj_tags->count())
+ return false;
+ size_t tag_count = 0;
+ for (const auto& tag : object_tags.get_tags()) {
+ const auto& rule_tags = rule_action.obj_tags->get_tags();
+ const auto& iter = rule_tags.find(tag.first);
+ if(iter == rule_tags.end())
+ continue;
+ if(iter->second == tag.second)
+ {
+ tag_count++;
+ }
+ /* all tags in the rule appear in obj tags */
+ }
+ return tag_count == rule_action.obj_tags->count();
+}
+
+class LCObjsLister {
+ RGWRados *store;
+ RGWBucketInfo& bucket_info;
+ RGWRados::Bucket target;
+ RGWRados::Bucket::List list_op;
+ bool is_truncated{false};
+ rgw_obj_key next_marker;
+ string prefix;
+ vector<rgw_bucket_dir_entry> objs;
+ vector<rgw_bucket_dir_entry>::iterator obj_iter;
+ rgw_bucket_dir_entry pre_obj;
+ int64_t delay_ms;
+
+public:
+ LCObjsLister(RGWRados *_store, RGWBucketInfo& _bucket_info) :
+ store(_store), bucket_info(_bucket_info),
+ target(store, bucket_info), list_op(&target) {
+ list_op.params.list_versions = bucket_info.versioned();
+ list_op.params.allow_unordered = true;
+ delay_ms = store->ctx()->_conf.get_val<int64_t>("rgw_lc_thread_delay");
+ }
+
+ void set_prefix(const string& p) {
+ prefix = p;
+ list_op.params.prefix = prefix;
+ }
+
+ int init() {
+ return fetch();
+ }
+
+ int fetch() {
+ int ret = list_op.list_objects(1000, &objs, NULL, &is_truncated);
+ if (ret < 0) {
+ return ret;
+ }
+
+ obj_iter = objs.begin();
+
+ return 0;
+ }
+
+ void delay() {
+ std::this_thread::sleep_for(std::chrono::milliseconds(delay_ms));
+ }
+
+ bool get_obj(rgw_bucket_dir_entry *obj) {
+ if (obj_iter == objs.end()) {
+ delay();
+ return false;
+ }
+ if (is_truncated && (obj_iter + 1)==objs.end()) {
+ list_op.params.marker = obj_iter->key;
+
+ int ret = fetch();
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "ERROR: list_op returned ret=" << ret << dendl;
+ return ret;
+ } else {
+ obj_iter = objs.begin();
+ }
+ delay();
+ }
+ *obj = *obj_iter;
+ return true;
+ }
+
+ rgw_bucket_dir_entry get_prev_obj() {
+ return pre_obj;
+ }
+
+ void next() {
+ pre_obj = *obj_iter;
+ ++obj_iter;
+ }
+
+ bool next_has_same_name()
+ {
+ if ((obj_iter + 1) == objs.end()) {
+ /* this should have been called after get_obj() was called, so this should
+ * only happen if is_truncated is false */
+ return false;
+ }
+ return (obj_iter->key.name.compare((obj_iter + 1)->key.name) == 0);
+ }
+};
+
+
+struct op_env {
+ lc_op& op;
+ RGWRados *store;
+ RGWLC *lc;
+ RGWBucketInfo& bucket_info;
+ LCObjsLister& ol;
+
+ op_env(lc_op& _op, RGWRados *_store, RGWLC *_lc, RGWBucketInfo& _bucket_info,
+ LCObjsLister& _ol) : op(_op), store(_store), lc(_lc), bucket_info(_bucket_info), ol(_ol) {}
+};
+
+class LCRuleOp;
+
+struct lc_op_ctx {
+ CephContext *cct;
+ op_env& env;
+ rgw_bucket_dir_entry& o;
+
+ RGWRados *store;
+ RGWBucketInfo& bucket_info;
+ lc_op& op;
+ LCObjsLister& ol;
+
+ rgw_obj obj;
+ RGWObjectCtx rctx;
+
+ lc_op_ctx(op_env& _env, rgw_bucket_dir_entry& _o) : cct(_env.store->ctx()), env(_env), o(_o),
+ store(env.store), bucket_info(env.bucket_info), op(env.op), ol(env.ol),
+ obj(env.bucket_info.bucket, o.key), rctx(env.store) {}
+};
+
+static int remove_expired_obj(lc_op_ctx& oc, bool remove_indeed)
+{
+ auto& store = oc.store;
+ auto& bucket_info = oc.bucket_info;
+ auto& o = oc.o;
+ auto obj_key = o.key;
+ auto& meta = o.meta;
+
+ if (!remove_indeed) {
+ obj_key.instance.clear();
+ } else if (obj_key.instance.empty()) {
+ obj_key.instance = "null";
+ }
+
+ rgw_obj obj(bucket_info.bucket, obj_key);
+ ACLOwner obj_owner;
+ obj_owner.set_id(rgw_user {meta.owner});
+ obj_owner.set_name(meta.owner_display_name);
+
+ RGWRados::Object del_target(store, bucket_info, oc.rctx, obj);
+ RGWRados::Object::Delete del_op(&del_target);
+
+ del_op.params.bucket_owner = bucket_info.owner;
+ del_op.params.versioning_status = bucket_info.versioning_status();
+ del_op.params.obj_owner = obj_owner;
+ del_op.params.unmod_since = meta.mtime;
+
+ return del_op.delete_obj();
+}
+
+class LCOpAction {
+public:
+ virtual ~LCOpAction() {}
+
+ virtual bool check(lc_op_ctx& oc, ceph::real_time *exp_time) {
+ return false;
+ };
+
+ /* called after check(). Check should tell us whether this action
+ * is applicable. If there are multiple actions, we'll end up executing
+ * the latest applicable action
+ * For example:
+ * one action after 10 days, another after 20, third after 40.
+ * After 10 days, the latest applicable action would be the first one,
+ * after 20 days it will be the second one. After 21 days it will still be the
+ * second one. So check() should return true for the second action at that point,
+ * but should_process() if the action has already been applied. In object removal
+ * it doesn't matter, but in object transition it does.
+ */
+ virtual bool should_process() {
+ return true;
+ }
+
+ virtual int process(lc_op_ctx& oc) {
+ return 0;
+ }
+};
+
+class LCOpFilter {
+public:
+virtual ~LCOpFilter() {}
+ virtual bool check(lc_op_ctx& oc) {
+ return false;
+ }
+};
+
+class LCOpRule {
+ friend class LCOpAction;
+
+ op_env& env;
+
+ std::vector<unique_ptr<LCOpFilter> > filters;
+ std::vector<unique_ptr<LCOpAction> > actions;
+
+public:
+ LCOpRule(op_env& _env) : env(_env) {}
+
+ void build();
+ int process(rgw_bucket_dir_entry& o);
+};
+
+static int check_tags(lc_op_ctx& oc, bool *skip)
+{
+ auto& op = oc.op;
+
+ if (op.obj_tags != boost::none) {
+ *skip = true;
+
+ bufferlist tags_bl;
+ int ret = read_obj_tags(oc.store, oc.bucket_info, oc.obj, oc.rctx, tags_bl);
+ if (ret < 0) {
+ if (ret != -ENODATA) {
+ ldout(oc.cct, 5) << "ERROR: read_obj_tags returned r=" << ret << dendl;
+ }
+ return 0;
+ }
+ RGWObjTags dest_obj_tags;
+ try {
+ auto iter = tags_bl.cbegin();
+ dest_obj_tags.decode(iter);
+ } catch (buffer::error& err) {
+ ldout(oc.cct,0) << "ERROR: caught buffer::error, couldn't decode TagSet" << dendl;
+ return -EIO;
+ }
+
+ if (! has_all_tags(op, dest_obj_tags)) {
+ ldout(oc.cct, 20) << __func__ << "() skipping obj " << oc.obj << " as tags do not match in rule: " << op.id << dendl;
+ return 0;
+ }
+ }
+ *skip = false;
+ return 0;
+}
+
+class LCOpFilter_Tags : public LCOpFilter {
+public:
+ bool check(lc_op_ctx& oc) override {
+ auto& o = oc.o;
+
+ if (o.is_delete_marker()) {
+ return true;
+ }
+
+ bool skip;
+
+ int ret = check_tags(oc, &skip);
+ if (ret < 0) {
+ if (ret == -ENOENT) {
+ return false;
+ }
+ ldout(oc.cct, 0) << "ERROR: check_tags on obj=" << oc.obj << " returned ret=" << ret << dendl;
+ return false;
+ }
+
+ return !skip;
+ };
+};
+
+class LCOpAction_CurrentExpiration : public LCOpAction {
+public:
+ bool check(lc_op_ctx& oc, ceph::real_time *exp_time) override {
+ auto& o = oc.o;
+ if (!o.is_current()) {
+ ldout(oc.cct, 20) << __func__ << "(): key=" << o.key << ": not current, skipping" << dendl;
+ return false;
+ }
+ if (o.is_delete_marker()) {
+ if (oc.ol.next_has_same_name()) {
+ return false;
+ } else {
+ *exp_time = real_clock::now();
+ return true;
+ }
+ }
+
+ auto& mtime = o.meta.mtime;
+ bool is_expired;
+ auto& op = oc.op;
+ if (op.expiration <= 0) {
+ if (op.expiration_date == boost::none) {
+ ldout(oc.cct, 20) << __func__ << "(): key=" << o.key << ": no expiration set in rule, skipping" << dendl;
+ return false;
+ }
+ is_expired = ceph_clock_now() >= ceph::real_clock::to_time_t(*op.expiration_date);
+ *exp_time = *op.expiration_date;
+ } else {
+ is_expired = obj_has_expired(oc.cct, mtime, op.expiration, exp_time);
+ }
+
+ ldout(oc.cct, 20) << __func__ << "(): key=" << o.key << ": is_expired=" << (int)is_expired << dendl;
+ return is_expired;
+ }
+
+ int process(lc_op_ctx& oc) {
+ auto& o = oc.o;
+ int r;
+ if (o.is_delete_marker()) {
+ r = remove_expired_obj(oc, true);
+ } else {
+ r = remove_expired_obj(oc, !oc.bucket_info.versioned());
+ }
+ if (r < 0) {
+ ldout(oc.cct, 0) << "ERROR: remove_expired_obj " << dendl;
+ return r;
+ }
+ ldout(oc.cct, 2) << "DELETED:" << oc.bucket_info.bucket << ":" << o.key << dendl;
+ return 0;
+ }
+};
+
+class LCOpAction_NonCurrentExpiration : public LCOpAction {
+public:
+ bool check(lc_op_ctx& oc, ceph::real_time *exp_time) override {
+ auto& o = oc.o;
+ if (o.is_current()) {
+ ldout(oc.cct, 20) << __func__ << "(): key=" << o.key << ": current version, skipping" << dendl;
+ return false;
+ }
+
+ auto mtime = oc.ol.get_prev_obj().meta.mtime;
+ int expiration = oc.op.noncur_expiration;
+ bool is_expired = obj_has_expired(oc.cct, mtime, expiration, exp_time);
+
+ ldout(oc.cct, 20) << __func__ << "(): key=" << o.key << ": is_expired=" << is_expired << dendl;
+ return is_expired && pass_object_lock_check(oc.store, oc.bucket_info, oc.obj, oc.rctx);
+ }
+
+ int process(lc_op_ctx& oc) {
+ auto& o = oc.o;
+ int r = remove_expired_obj(oc, true);
+ if (r < 0) {
+ ldout(oc.cct, 0) << "ERROR: remove_expired_obj " << dendl;
+ return r;
+ }
+ ldout(oc.cct, 2) << "DELETED:" << oc.bucket_info.bucket << ":" << o.key << " (non-current expiration)" << dendl;
+ return 0;
+ }
+};
+
+class LCOpAction_DMExpiration : public LCOpAction {
+public:
+ bool check(lc_op_ctx& oc, ceph::real_time *exp_time) override {
+ auto& o = oc.o;
+ if (!o.is_delete_marker()) {
+ ldout(oc.cct, 20) << __func__ << "(): key=" << o.key << ": not a delete marker, skipping" << dendl;
+ return false;
+ }
+
+ if (oc.ol.next_has_same_name()) {
+ ldout(oc.cct, 20) << __func__ << "(): key=" << o.key << ": next is same object, skipping" << dendl;
+ return false;
+ }
+
+ *exp_time = real_clock::now();
+
+ return true;
+ }
+
+ int process(lc_op_ctx& oc) {
+ auto& o = oc.o;
+ int r = remove_expired_obj(oc, true);
+ if (r < 0) {
+ ldout(oc.cct, 0) << "ERROR: remove_expired_obj " << dendl;
+ return r;
+ }
+ ldout(oc.cct, 2) << "DELETED:" << oc.bucket_info.bucket << ":" << o.key << " (delete marker expiration)" << dendl;
+ return 0;
+ }
+};
+
+class LCOpAction_Transition : public LCOpAction {
+ const transition_action& transition;
+ bool need_to_process{false};
+
+protected:
+ virtual bool check_current_state(bool is_current) = 0;
+ virtual ceph::real_time get_effective_mtime(lc_op_ctx& oc) = 0;
+public:
+ LCOpAction_Transition(const transition_action& _transition) : transition(_transition) {}
+
+ bool check(lc_op_ctx& oc, ceph::real_time *exp_time) override {
+ auto& o = oc.o;
+
+ if (o.is_delete_marker()) {
+ return false;
+ }
+
+ if (!check_current_state(o.is_current())) {
+ return false;
+ }
+
+ auto mtime = get_effective_mtime(oc);
+ bool is_expired;
+ if (transition.days < 0) {
+ if (transition.date == boost::none) {
+ ldout(oc.cct, 20) << __func__ << "(): key=" << o.key << ": no transition day/date set in rule, skipping" << dendl;
+ return false;
+ }
+ is_expired = ceph_clock_now() >= ceph::real_clock::to_time_t(*transition.date);
+ *exp_time = *transition.date;
+ } else {
+ is_expired = obj_has_expired(oc.cct, mtime, transition.days, exp_time);
+ }
+
+ ldout(oc.cct, 20) << __func__ << "(): key=" << o.key << ": is_expired=" << is_expired << dendl;
+
+ need_to_process = (rgw_placement_rule::get_canonical_storage_class(o.meta.storage_class) != transition.storage_class);
+
+ return is_expired;
+ }
+
+ bool should_process() override {
+ return need_to_process;
+ }
+
+ int process(lc_op_ctx& oc) {
+ auto& o = oc.o;
+
+ rgw_placement_rule target_placement;
+ target_placement.inherit_from(oc.bucket_info.placement_rule);
+ target_placement.storage_class = transition.storage_class;
+
+ if (!oc.store->svc.zone->get_zone_params().valid_placement(target_placement)) {
+ ldout(oc.cct, 0) << "ERROR: non existent dest placement: " << target_placement
+ << " bucket="<< oc.bucket_info.bucket
+ << " rule_id=" << oc.op.id << dendl;
+ return -EINVAL;
+ }
+
+ int r = oc.store->transition_obj(oc.rctx, oc.bucket_info, oc.obj,
+ target_placement, o.meta.mtime, o.versioned_epoch);
+ if (r < 0) {
+ ldout(oc.cct, 0) << "ERROR: failed to transition obj (r=" << r << ")" << dendl;
+ return r;
+ }
+ ldout(oc.cct, 2) << "TRANSITIONED:" << oc.bucket_info.bucket << ":" << o.key << " -> " << transition.storage_class << dendl;
+ return 0;
+ }
+};
+
+class LCOpAction_CurrentTransition : public LCOpAction_Transition {
+protected:
+ bool check_current_state(bool is_current) override {
+ return is_current;
+ }
+
+ ceph::real_time get_effective_mtime(lc_op_ctx& oc) override {
+ return oc.o.meta.mtime;
+ }
+public:
+ LCOpAction_CurrentTransition(const transition_action& _transition) : LCOpAction_Transition(_transition) {}
+};
+
+class LCOpAction_NonCurrentTransition : public LCOpAction_Transition {
+protected:
+ bool check_current_state(bool is_current) override {
+ return !is_current;
+ }
+
+ ceph::real_time get_effective_mtime(lc_op_ctx& oc) override {
+ return oc.ol.get_prev_obj().meta.mtime;
+ }
+public:
+ LCOpAction_NonCurrentTransition(const transition_action& _transition) : LCOpAction_Transition(_transition) {}
+};
+
+void LCOpRule::build()
+{
+ filters.emplace_back(new LCOpFilter_Tags);
+
+ auto& op = env.op;
+
+ if (op.expiration > 0 ||
+ op.expiration_date != boost::none) {
+ actions.emplace_back(new LCOpAction_CurrentExpiration);
+ }
+
+ if (op.dm_expiration) {
+ actions.emplace_back(new LCOpAction_DMExpiration);
+ }
+
+ if (op.noncur_expiration > 0) {
+ actions.emplace_back(new LCOpAction_NonCurrentExpiration);
+ }
+
+ for (auto& iter : op.transitions) {
+ actions.emplace_back(new LCOpAction_CurrentTransition(iter.second));
+ }
+
+ for (auto& iter : op.noncur_transitions) {
+ actions.emplace_back(new LCOpAction_NonCurrentTransition(iter.second));
+ }
+}
+
+int LCOpRule::process(rgw_bucket_dir_entry& o)
+{
+ lc_op_ctx ctx(env, o);
+
+ unique_ptr<LCOpAction> *selected = nullptr;
+ real_time exp;
+
+ for (auto& a : actions) {
+ real_time action_exp;
+
+ if (a->check(ctx, &action_exp)) {
+ if (action_exp > exp) {
+ exp = action_exp;
+ selected = &a;
+ }
+ }
+ }
+
+ if (selected &&
+ (*selected)->should_process()) {
+
+ /*
+ * Calling filter checks after action checks because
+ * all action checks (as they are implemented now) do
+ * not access the objects themselves, but return result
+ * from info from bucket index listing. The current tags filter
+ * check does access the objects, so we avoid unnecessary rados calls
+ * having filters check later in the process.
+ */
+
+ bool cont = false;
+ for (auto& f : filters) {
+ if (f->check(ctx)) {
+ cont = true;
+ break;
+ }
+ }
+
+ if (!cont) {
+ ldout(env.store->ctx(), 20) << __func__ << "(): key=" << o.key << ": no rule match, skipping" << dendl;
+ return 0;
+ }
+
+ int r = (*selected)->process(ctx);
+ if (r < 0) {
+ ldout(ctx.cct, 0) << "ERROR: remove_expired_obj " << dendl;
+ return r;
+ }
+ ldout(ctx.cct, 20) << "processed:" << env.bucket_info.bucket << ":" << o.key << dendl;
+ }
+
+ return 0;
+
+}
+
+int RGWLC::bucket_lc_process(string& shard_id)
+{
+ RGWLifecycleConfiguration config(cct);
+ RGWBucketInfo bucket_info;
+ map<string, bufferlist> bucket_attrs;
+ string no_ns, list_versions;
+ vector<rgw_bucket_dir_entry> objs;
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+ vector<std::string> result;
+ boost::split(result, shard_id, boost::is_any_of(":"));
+ string bucket_tenant = result[0];
+ string bucket_name = result[1];
+ string bucket_marker = result[2];
+ int ret = store->get_bucket_info(obj_ctx, bucket_tenant, bucket_name, bucket_info, NULL, &bucket_attrs);
+ if (ret < 0) {
+ ldpp_dout(this, 0) << "LC:get_bucket_info for " << bucket_name << " failed" << dendl;
+ return ret;
+ }
+
+ if (bucket_info.bucket.marker != bucket_marker) {
+ ldpp_dout(this, 1) << "LC: deleting stale entry found for bucket=" << bucket_tenant
+ << ":" << bucket_name << " cur_marker=" << bucket_info.bucket.marker
+ << " orig_marker=" << bucket_marker << dendl;
+ return -ENOENT;
+ }
+
+ RGWRados::Bucket target(store, bucket_info);
+
+ map<string, bufferlist>::iterator aiter = bucket_attrs.find(RGW_ATTR_LC);
+ if (aiter == bucket_attrs.end())
+ return 0;
+
+ bufferlist::const_iterator iter{&aiter->second};
+ try {
+ config.decode(iter);
+ } catch (const buffer::error& e) {
+ ldpp_dout(this, 0) << __func__ << "() decode life cycle config failed" << dendl;
+ return -1;
+ }
+
+ multimap<string, lc_op>& prefix_map = config.get_prefix_map();
+
+ ldpp_dout(this, 10) << __func__ << "() prefix_map size="
+ << prefix_map.size()
+ << dendl;
+
+ rgw_obj_key pre_marker;
+ rgw_obj_key next_marker;
+ for(auto prefix_iter = prefix_map.begin(); prefix_iter != prefix_map.end(); ++prefix_iter) {
+ auto& op = prefix_iter->second;
+ if (!is_valid_op(op)) {
+ continue;
+ }
+ ldpp_dout(this, 20) << __func__ << "(): prefix=" << prefix_iter->first << dendl;
+ if (prefix_iter != prefix_map.begin() &&
+ (prefix_iter->first.compare(0, prev(prefix_iter)->first.length(), prev(prefix_iter)->first) == 0)) {
+ next_marker = pre_marker;
+ } else {
+ pre_marker = next_marker;
+ }
+
+ LCObjsLister ol(store, bucket_info);
+ ol.set_prefix(prefix_iter->first);
+
+ ret = ol.init();
+
+ if (ret < 0) {
+ if (ret == (-ENOENT))
+ return 0;
+ ldpp_dout(this, 0) << "ERROR: store->list_objects():" <<dendl;
+ return ret;
+ }
+
+ op_env oenv(op, store, this, bucket_info, ol);
+
+ LCOpRule orule(oenv);
+
+ orule.build();
+
+ ceph::real_time mtime;
+ rgw_bucket_dir_entry o;
+ for (; ol.get_obj(&o); ol.next()) {
+ ldpp_dout(this, 20) << __func__ << "(): key=" << o.key << dendl;
+ int ret = orule.process(o);
+ if (ret < 0) {
+ ldpp_dout(this, 20) << "ERROR: orule.process() returned ret="
+ << ret
+ << dendl;
+ }
+
+ if (going_down()) {
+ return 0;
+ }
+ }
+ }
+
+ ret = handle_multipart_expiration(&target, prefix_map);
+
+ return ret;
+}
+
+int RGWLC::bucket_lc_post(int index, int max_lock_sec, pair<string, int >& entry, int& result)
+{
+ utime_t lock_duration(cct->_conf->rgw_lc_lock_max_time, 0);
+
+ rados::cls::lock::Lock l(lc_index_lock_name);
+ l.set_cookie(cookie);
+ l.set_duration(lock_duration);
+
+ do {
+ int ret = l.lock_exclusive(&store->lc_pool_ctx, obj_names[index]);
+ if (ret == -EBUSY || ret == -EEXIST) { /* already locked by another lc processor */
+ ldpp_dout(this, 0) << "RGWLC::bucket_lc_post() failed to acquire lock on "
+ << obj_names[index] << ", sleep 5, try again" << dendl;
+ sleep(5);
+ continue;
+ }
+ if (ret < 0)
+ return 0;
+ ldpp_dout(this, 20) << "RGWLC::bucket_lc_post() lock " << obj_names[index] << dendl;
+ if (result == -ENOENT) {
+ ret = cls_rgw_lc_rm_entry(store->lc_pool_ctx, obj_names[index], entry);
+ if (ret < 0) {
+ ldpp_dout(this, 0) << "RGWLC::bucket_lc_post() failed to remove entry "
+ << obj_names[index] << dendl;
+ }
+ goto clean;
+ } else if (result < 0) {
+ entry.second = lc_failed;
+ } else {
+ entry.second = lc_complete;
+ }
+
+ ret = cls_rgw_lc_set_entry(store->lc_pool_ctx, obj_names[index], entry);
+ if (ret < 0) {
+ ldpp_dout(this, 0) << "RGWLC::process() failed to set entry on "
+ << obj_names[index] << dendl;
+ }
+clean:
+ l.unlock(&store->lc_pool_ctx, obj_names[index]);
+ ldpp_dout(this, 20) << "RGWLC::bucket_lc_post() unlock " << obj_names[index] << dendl;
+ return 0;
+ } while (true);
+}
+
+int RGWLC::list_lc_progress(const string& marker, uint32_t max_entries, map<string, int> *progress_map)
+{
+ int index = 0;
+ progress_map->clear();
+ for(; index <max_objs; index++) {
+ map<string, int > entries;
+ int ret = cls_rgw_lc_list(store->lc_pool_ctx, obj_names[index], marker, max_entries, entries);
+ if (ret < 0) {
+ if (ret == -ENOENT) {
+ ldpp_dout(this, 10) << __func__ << "() ignoring unfound lc object="
+ << obj_names[index] << dendl;
+ continue;
+ } else {
+ return ret;
+ }
+ }
+ map<string, int>::iterator iter;
+ for (iter = entries.begin(); iter != entries.end(); ++iter) {
+ progress_map->insert(*iter);
+ }
+ }
+ return 0;
+}
+
+int RGWLC::process()
+{
+ int max_secs = cct->_conf->rgw_lc_lock_max_time;
+
+ const int start = ceph::util::generate_random_number(0, max_objs - 1);
+
+ for (int i = 0; i < max_objs; i++) {
+ int index = (i + start) % max_objs;
+ int ret = process(index, max_secs);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWLC::process(int index, int max_lock_secs)
+{
+ rados::cls::lock::Lock l(lc_index_lock_name);
+ do {
+ utime_t now = ceph_clock_now();
+ pair<string, int > entry;//string = bucket_name:bucket_id ,int = LC_BUCKET_STATUS
+ if (max_lock_secs <= 0)
+ return -EAGAIN;
+
+ utime_t time(max_lock_secs, 0);
+ l.set_duration(time);
+
+ int ret = l.lock_exclusive(&store->lc_pool_ctx, obj_names[index]);
+ if (ret == -EBUSY || ret == -EEXIST) { /* already locked by another lc processor */
+ ldpp_dout(this, 0) << "RGWLC::process() failed to acquire lock on "
+ << obj_names[index] << ", sleep 5, try again" << dendl;
+ sleep(5);
+ continue;
+ }
+ if (ret < 0)
+ return 0;
+
+ cls_rgw_lc_obj_head head;
+ ret = cls_rgw_lc_get_head(store->lc_pool_ctx, obj_names[index], head);
+ if (ret < 0) {
+ ldpp_dout(this, 0) << "RGWLC::process() failed to get obj head "
+ << obj_names[index] << ", ret=" << ret << dendl;
+ goto exit;
+ }
+
+ if(!if_already_run_today(head.start_date)) {
+ head.start_date = now;
+ head.marker.clear();
+ ret = bucket_lc_prepare(index);
+ if (ret < 0) {
+ ldpp_dout(this, 0) << "RGWLC::process() failed to update lc object "
+ << obj_names[index] << ", ret=" << ret << dendl;
+ goto exit;
+ }
+ }
+
+ ret = cls_rgw_lc_get_next_entry(store->lc_pool_ctx, obj_names[index], head.marker, entry);
+ if (ret < 0) {
+ ldpp_dout(this, 0) << "RGWLC::process() failed to get obj entry "
+ << obj_names[index] << dendl;
+ goto exit;
+ }
+
+ if (entry.first.empty())
+ goto exit;
+
+ entry.second = lc_processing;
+ ret = cls_rgw_lc_set_entry(store->lc_pool_ctx, obj_names[index], entry);
+ if (ret < 0) {
+ ldpp_dout(this, 0) << "RGWLC::process() failed to set obj entry " << obj_names[index]
+ << " (" << entry.first << "," << entry.second << ")" << dendl;
+ goto exit;
+ }
+
+ head.marker = entry.first;
+ ret = cls_rgw_lc_put_head(store->lc_pool_ctx, obj_names[index], head);
+ if (ret < 0) {
+ ldpp_dout(this, 0) << "RGWLC::process() failed to put head " << obj_names[index] << dendl;
+ goto exit;
+ }
+ l.unlock(&store->lc_pool_ctx, obj_names[index]);
+ ret = bucket_lc_process(entry.first);
+ bucket_lc_post(index, max_lock_secs, entry, ret);
+ }while(1);
+
+exit:
+ l.unlock(&store->lc_pool_ctx, obj_names[index]);
+ return 0;
+}
+
+void RGWLC::start_processor()
+{
+ worker = new LCWorker(this, cct, this);
+ worker->create("lifecycle_thr");
+}
+
+void RGWLC::stop_processor()
+{
+ down_flag = true;
+ if (worker) {
+ worker->stop();
+ worker->join();
+ }
+ delete worker;
+ worker = NULL;
+}
+
+
+unsigned RGWLC::get_subsys() const
+{
+ return dout_subsys;
+}
+
+std::ostream& RGWLC::gen_prefix(std::ostream& out) const
+{
+ return out << "lifecycle: ";
+}
+
+void RGWLC::LCWorker::stop()
+{
+ Mutex::Locker l(lock);
+ cond.Signal();
+}
+
+bool RGWLC::going_down()
+{
+ return down_flag;
+}
+
+bool RGWLC::LCWorker::should_work(utime_t& now)
+{
+ int start_hour;
+ int start_minute;
+ int end_hour;
+ int end_minute;
+ string worktime = cct->_conf->rgw_lifecycle_work_time;
+ sscanf(worktime.c_str(),"%d:%d-%d:%d",&start_hour, &start_minute, &end_hour, &end_minute);
+ struct tm bdt;
+ time_t tt = now.sec();
+ localtime_r(&tt, &bdt);
+
+ if (cct->_conf->rgw_lc_debug_interval > 0) {
+ /* We're debugging, so say we can run */
+ return true;
+ } else if ((bdt.tm_hour*60 + bdt.tm_min >= start_hour*60 + start_minute) &&
+ (bdt.tm_hour*60 + bdt.tm_min <= end_hour*60 + end_minute)) {
+ return true;
+ } else {
+ return false;
+ }
+
+}
+
+int RGWLC::LCWorker::schedule_next_start_time(utime_t &start, utime_t& now)
+{
+ int secs;
+
+ if (cct->_conf->rgw_lc_debug_interval > 0) {
+ secs = start + cct->_conf->rgw_lc_debug_interval - now;
+ if (secs < 0)
+ secs = 0;
+ return (secs);
+ }
+
+ int start_hour;
+ int start_minute;
+ int end_hour;
+ int end_minute;
+ string worktime = cct->_conf->rgw_lifecycle_work_time;
+ sscanf(worktime.c_str(),"%d:%d-%d:%d",&start_hour, &start_minute, &end_hour, &end_minute);
+ struct tm bdt;
+ time_t tt = now.sec();
+ time_t nt;
+ localtime_r(&tt, &bdt);
+ bdt.tm_hour = start_hour;
+ bdt.tm_min = start_minute;
+ bdt.tm_sec = 0;
+ nt = mktime(&bdt);
+ secs = nt - tt;
+
+ return secs>0 ? secs : secs+24*60*60;
+}
+
+void RGWLifecycleConfiguration::generate_test_instances(list<RGWLifecycleConfiguration*>& o)
+{
+ o.push_back(new RGWLifecycleConfiguration);
+}
+
+void get_lc_oid(CephContext *cct, const string& shard_id, string *oid)
+{
+ int max_objs = (cct->_conf->rgw_lc_max_objs > HASH_PRIME ? HASH_PRIME : cct->_conf->rgw_lc_max_objs);
+ int index = ceph_str_hash_linux(shard_id.c_str(), shard_id.size()) % HASH_PRIME % max_objs;
+ *oid = lc_oid_prefix;
+ char buf[32];
+ snprintf(buf, 32, ".%d", index);
+ oid->append(buf);
+ return;
+}
+
+
+
+static std::string get_lc_shard_name(const rgw_bucket& bucket){
+ return string_join_reserve(':', bucket.tenant, bucket.name, bucket.marker);
+}
+
+template<typename F>
+static int guard_lc_modify(RGWRados* store, const rgw_bucket& bucket, const string& cookie, const F& f) {
+ CephContext *cct = store->ctx();
+
+ string shard_id = get_lc_shard_name(bucket);
+
+ string oid;
+ get_lc_oid(cct, shard_id, &oid);
+
+ pair<string, int> entry(shard_id, lc_uninitial);
+ int max_lock_secs = cct->_conf->rgw_lc_lock_max_time;
+
+ rados::cls::lock::Lock l(lc_index_lock_name);
+ utime_t time(max_lock_secs, 0);
+ l.set_duration(time);
+ l.set_cookie(cookie);
+
+ librados::IoCtx *ctx = store->get_lc_pool_ctx();
+ int ret;
+
+ do {
+ ret = l.lock_exclusive(ctx, oid);
+ if (ret == -EBUSY || ret == -EEXIST) {
+ ldout(cct, 0) << "RGWLC::RGWPutLC() failed to acquire lock on "
+ << oid << ", sleep 5, try again" << dendl;
+ sleep(5); // XXX: return retryable error
+ continue;
+ }
+ if (ret < 0) {
+ ldout(cct, 0) << "RGWLC::RGWPutLC() failed to acquire lock on "
+ << oid << ", ret=" << ret << dendl;
+ break;
+ }
+ ret = f(ctx, oid, entry);
+ if (ret < 0) {
+ ldout(cct, 0) << "RGWLC::RGWPutLC() failed to set entry on "
+ << oid << ", ret=" << ret << dendl;
+ }
+ break;
+ } while(true);
+ l.unlock(ctx, oid);
+ return ret;
+}
+
+int RGWLC::set_bucket_config(RGWBucketInfo& bucket_info,
+ const map<string, bufferlist>& bucket_attrs,
+ RGWLifecycleConfiguration *config)
+{
+ map<string, bufferlist> attrs = bucket_attrs;
+ bufferlist lc_bl;
+ config->encode(lc_bl);
+
+ attrs[RGW_ATTR_LC] = std::move(lc_bl);
+
+ int ret = rgw_bucket_set_attrs(store, bucket_info, attrs, &bucket_info.objv_tracker);
+ if (ret < 0)
+ return ret;
+
+ rgw_bucket& bucket = bucket_info.bucket;
+
+
+ ret = guard_lc_modify(store, bucket, cookie, [&](librados::IoCtx *ctx, const string& oid,
+ const pair<string, int>& entry) {
+ return cls_rgw_lc_set_entry(*ctx, oid, entry);
+ });
+
+ return ret;
+}
+
+int RGWLC::remove_bucket_config(RGWBucketInfo& bucket_info,
+ const map<string, bufferlist>& bucket_attrs)
+{
+ map<string, bufferlist> attrs = bucket_attrs;
+ attrs.erase(RGW_ATTR_LC);
+ int ret = rgw_bucket_set_attrs(store, bucket_info, attrs,
+ &bucket_info.objv_tracker);
+
+ rgw_bucket& bucket = bucket_info.bucket;
+
+ if (ret < 0) {
+ ldout(cct, 0) << "RGWLC::RGWDeleteLC() failed to set attrs on bucket="
+ << bucket.name << " returned err=" << ret << dendl;
+ return ret;
+ }
+
+
+ ret = guard_lc_modify(store, bucket, cookie, [&](librados::IoCtx *ctx, const string& oid,
+ const pair<string, int>& entry) {
+ return cls_rgw_lc_rm_entry(*ctx, oid, entry);
+ });
+
+ return ret;
+}
+
+namespace rgw::lc {
+
+int fix_lc_shard_entry(RGWRados* store, const RGWBucketInfo& bucket_info,
+ const map<std::string,bufferlist>& battrs)
+{
+ if (auto aiter = battrs.find(RGW_ATTR_LC);
+ aiter == battrs.end()) {
+ return 0; // No entry, nothing to fix
+ }
+
+ auto shard_name = get_lc_shard_name(bucket_info.bucket);
+ std::string lc_oid;
+ get_lc_oid(store->ctx(), shard_name, &lc_oid);
+
+ rgw_lc_entry_t entry;
+ // There are multiple cases we need to encounter here
+ // 1. entry exists and is already set to marker, happens in plain buckets & newly resharded buckets
+ // 2. entry doesn't exist, which usually happens when reshard has happened prior to update and next LC process has already dropped the update
+ // 3. entry exists matching the current bucket id which was after a reshard (needs to be updated to the marker)
+ // We are not dropping the old marker here as that would be caught by the next LC process update
+ auto lc_pool_ctx = store->get_lc_pool_ctx();
+ int ret = cls_rgw_lc_get_entry(*lc_pool_ctx,
+ lc_oid, shard_name, entry);
+ if (ret == 0) {
+ ldout(store->ctx(), 5) << "Entry already exists, nothing to do" << dendl;
+ return ret; // entry is already existing correctly set to marker
+ }
+ ldout(store->ctx(), 5) << "cls_rgw_lc_get_entry errored ret code=" << ret << dendl;
+ if (ret == -ENOENT) {
+ ldout(store->ctx(), 1) << "No entry for bucket=" << bucket_info.bucket.name
+ << " creating " << dendl;
+ // TODO: we have too many ppl making cookies like this!
+ char cookie_buf[COOKIE_LEN + 1];
+ gen_rand_alphanumeric(store->ctx(), cookie_buf, sizeof(cookie_buf) - 1);
+ std::string cookie = cookie_buf;
+
+ ret = guard_lc_modify(store, bucket_info.bucket, cookie,
+ [&lc_pool_ctx, &lc_oid](librados::IoCtx *ctx, const string& oid,
+ const pair<string, int>& entry) {
+ return cls_rgw_lc_set_entry(*lc_pool_ctx,
+ lc_oid, entry);
+ });
+
+ }
+
+ return ret;
+}
+
+std::string s3_expiration_header(
+ DoutPrefixProvider* dpp,
+ const rgw_obj_key& obj_key,
+ const RGWObjTags& obj_tagset,
+ const ceph::real_time& mtime,
+ const std::map<std::string, buffer::list>& bucket_attrs)
+{
+ CephContext* cct = dpp->get_cct();
+ RGWLifecycleConfiguration config(cct);
+ std::string hdr{""};
+
+ const auto& aiter = bucket_attrs.find(RGW_ATTR_LC);
+ if (aiter == bucket_attrs.end())
+ return hdr;
+
+ bufferlist::const_iterator iter{&aiter->second};
+ try {
+ config.decode(iter);
+ } catch (const buffer::error& e) {
+ ldpp_dout(dpp, 0) << __func__
+ << "() decode life cycle config failed"
+ << dendl;
+ return hdr;
+ } /* catch */
+
+ /* dump tags at debug level 16 */
+ RGWObjTags::tag_map_t obj_tag_map = obj_tagset.get_tags();
+ if (cct->_conf->subsys.should_gather(ceph_subsys_rgw, 16)) {
+ for (const auto& elt : obj_tag_map) {
+ ldout(cct, 16) << __func__
+ << "() key=" << elt.first << " val=" << elt.second
+ << dendl;
+ }
+ }
+
+ boost::optional<ceph::real_time> expiration_date;
+ boost::optional<std::string> rule_id;
+
+ const auto& rule_map = config.get_rule_map();
+ for (const auto& ri : rule_map) {
+ const auto& rule = ri.second;
+ auto& id = rule.get_id();
+ auto& prefix = rule.get_prefix();
+ auto& filter = rule.get_filter();
+ auto& expiration = rule.get_expiration();
+ auto& noncur_expiration = rule.get_noncur_expiration();
+
+ ldpp_dout(dpp, 10) << "rule: " << ri.first
+ << " prefix: " << prefix
+ << " expiration: "
+ << " date: " << expiration.get_date()
+ << " days: " << expiration.get_days()
+ << " noncur_expiration: "
+ << " date: " << noncur_expiration.get_date()
+ << " days: " << noncur_expiration.get_days()
+ << dendl;
+
+ /* skip if rule !enabled
+ * if rule has prefix, skip iff object !match prefix
+ * if rule has tags, skip iff object !match tags
+ * note if object is current or non-current, compare accordingly
+ * if rule has days, construct date expression and save iff older
+ * than last saved
+ * if rule has date, convert date expression and save iff older
+ * than last saved
+ * if the date accum has a value, format it into hdr
+ */
+
+ if (!rule.is_enabled())
+ continue;
+
+ if(!prefix.empty()) {
+ if (!boost::starts_with(obj_key.name, prefix))
+ continue;
+ }
+
+ if (filter.has_tags()) {
+ bool tag_match = false;
+ const RGWObjTags& rule_tagset = filter.get_tags();
+ for (auto& tag : rule_tagset.get_tags()) {
+ /* remember, S3 tags are {key,value} tuples */
+ tag_match = true;
+ auto obj_tag = obj_tag_map.find(tag.first);
+ if (obj_tag == obj_tag_map.end() || obj_tag->second != tag.second) {
+ ldpp_dout(dpp, 10) << "tag does not match obj_key=" << obj_key
+ << " rule_id=" << id
+ << " tag=" << tag
+ << dendl;
+ tag_match = false;
+ break;
+ }
+ }
+ if (! tag_match)
+ continue;
+ }
+
+ // compute a uniform expiration date
+ boost::optional<ceph::real_time> rule_expiration_date;
+ const LCExpiration& rule_expiration =
+ (obj_key.instance.empty()) ? expiration : noncur_expiration;
+
+ if (rule_expiration.has_date()) {
+ rule_expiration_date =
+ boost::optional<ceph::real_time>(
+ ceph::from_iso_8601(rule.get_expiration().get_date()));
+ rule_id = id;
+ } else {
+ if (rule_expiration.has_days()) {
+ rule_expiration_date =
+ boost::optional<ceph::real_time>(
+ mtime + make_timespan(double(rule_expiration.get_days())*24*60*60));
+ rule_id = id;
+ }
+ }
+
+ // update earliest expiration
+ if (rule_expiration_date) {
+ if ((! expiration_date) ||
+ (*expiration_date < *rule_expiration_date)) {
+ expiration_date =
+ boost::optional<ceph::real_time>(rule_expiration_date);
+ }
+ }
+ }
+
+ // cond format header
+ if (expiration_date && rule_id) {
+ // Fri, 23 Dec 2012 00:00:00 GMT
+ char exp_buf[100];
+ time_t exp = ceph::real_clock::to_time_t(*expiration_date);
+ if (std::strftime(exp_buf, sizeof(exp_buf),
+ "%a, %d %b %Y %T %Z", std::gmtime(&exp))) {
+ hdr = fmt::format("expiry-date=\"{0}\", rule-id=\"{1}\"", exp_buf,
+ *rule_id);
+ } else {
+ ldpp_dout(dpp, 0) << __func__ <<
+ "() strftime of life cycle expiration header failed"
+ << dendl;
+ }
+ }
+
+ return hdr;
+
+} /* rgwlc_s3_expiration_header */
+
+} /* namespace rgw::lc */
diff --git a/src/rgw/rgw_lc.h b/src/rgw/rgw_lc.h
new file mode 100644
index 00000000..6a373502
--- /dev/null
+++ b/src/rgw/rgw_lc.h
@@ -0,0 +1,539 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_LC_H
+#define CEPH_RGW_LC_H
+
+#include <map>
+#include <string>
+#include <iostream>
+
+#include "common/debug.h"
+
+#include "include/types.h"
+#include "include/rados/librados.hpp"
+#include "common/Mutex.h"
+#include "common/Cond.h"
+#include "common/iso_8601.h"
+#include "common/Thread.h"
+#include "rgw_common.h"
+#include "rgw_rados.h"
+#include "rgw_multi.h"
+#include "cls/rgw/cls_rgw_types.h"
+#include "rgw_tag.h"
+
+#include <atomic>
+#include <tuple>
+
+#define HASH_PRIME 7877
+#define MAX_ID_LEN 255
+static string lc_oid_prefix = "lc";
+static string lc_index_lock_name = "lc_process";
+
+extern const char* LC_STATUS[];
+
+typedef enum {
+ lc_uninitial = 0,
+ lc_processing,
+ lc_failed,
+ lc_complete,
+} LC_BUCKET_STATUS;
+
+class LCExpiration
+{
+protected:
+ string days;
+ //At present only current object has expiration date
+ string date;
+public:
+ LCExpiration() {}
+ LCExpiration(const string& _days, const string& _date) : days(_days), date(_date) {}
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(3, 2, bl);
+ encode(days, bl);
+ encode(date, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START_LEGACY_COMPAT_LEN(3, 2, 2, bl);
+ decode(days, bl);
+ if (struct_v >= 3) {
+ decode(date, bl);
+ }
+ DECODE_FINISH(bl);
+ }
+ void dump(Formatter *f) const;
+// static void generate_test_instances(list<ACLOwner*>& o);
+ void set_days(const string& _days) { days = _days; }
+ string get_days_str() const {
+ return days;
+ }
+ int get_days() const {return atoi(days.c_str()); }
+ bool has_days() const {
+ return !days.empty();
+ }
+ void set_date(const string& _date) { date = _date; }
+ string get_date() const {
+ return date;
+ }
+ bool has_date() const {
+ return !date.empty();
+ }
+ bool empty() const {
+ return days.empty() && date.empty();
+ }
+ bool valid() const {
+ if (!days.empty() && !date.empty()) {
+ return false;
+ } else if (!days.empty() && get_days() <= 0) {
+ return false;
+ }
+ //We've checked date in xml parsing
+ return true;
+ }
+};
+WRITE_CLASS_ENCODER(LCExpiration)
+
+class LCTransition
+{
+protected:
+ string days;
+ string date;
+ string storage_class;
+
+public:
+ int get_days() const {
+ return atoi(days.c_str());
+ }
+
+ string get_date() const {
+ return date;
+ }
+
+ string get_storage_class() const {
+ return storage_class;
+ }
+
+ bool has_days() const {
+ return !days.empty();
+ }
+
+ bool has_date() const {
+ return !date.empty();
+ }
+
+ bool empty() const {
+ return days.empty() && date.empty();
+ }
+
+ bool valid() const {
+ if (!days.empty() && !date.empty()) {
+ return false;
+ } else if (!days.empty() && get_days() < 0) {
+ return false;
+ }
+ //We've checked date in xml parsing
+ return true;
+ }
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(days, bl);
+ encode(date, bl);
+ encode(storage_class, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(days, bl);
+ decode(date, bl);
+ decode(storage_class, bl);
+ DECODE_FINISH(bl);
+ }
+ void dump(Formatter *f) const {
+ f->dump_string("days", days);
+ f->dump_string("date", date);
+ f->dump_string("storage_class", storage_class);
+ }
+};
+WRITE_CLASS_ENCODER(LCTransition)
+
+class LCFilter
+{
+ protected:
+ std::string prefix;
+ RGWObjTags obj_tags;
+
+ public:
+
+ const std::string& get_prefix() const {
+ return prefix;
+ }
+
+ const RGWObjTags& get_tags() const {
+ return obj_tags;
+ }
+
+ bool empty() const {
+ return !(has_prefix() || has_tags());
+ }
+
+ // Determine if we need AND tag when creating xml
+ bool has_multi_condition() const {
+ if (obj_tags.count() > 1)
+ return true;
+ else if (has_prefix() && has_tags())
+ return true;
+
+ return false;
+ }
+
+ bool has_prefix() const {
+ return !prefix.empty();
+ }
+
+ bool has_tags() const {
+ return !obj_tags.empty();
+ }
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(2, 1, bl);
+ encode(prefix, bl);
+ encode(obj_tags, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(2, bl);
+ decode(prefix, bl);
+ if (struct_v >= 2) {
+ decode(obj_tags, bl);
+ }
+ DECODE_FINISH(bl);
+ }
+ void dump(Formatter *f) const;
+};
+WRITE_CLASS_ENCODER(LCFilter)
+
+class LCRule
+{
+protected:
+ string id;
+ string prefix;
+ string status;
+ LCExpiration expiration;
+ LCExpiration noncur_expiration;
+ LCExpiration mp_expiration;
+ LCFilter filter;
+ map<string, LCTransition> transitions;
+ map<string, LCTransition> noncur_transitions;
+ bool dm_expiration = false;
+
+public:
+
+ LCRule(){};
+ ~LCRule(){};
+
+ const string& get_id() const {
+ return id;
+ }
+
+ const string& get_status() const {
+ return status;
+ }
+
+ bool is_enabled() const {
+ return status == "Enabled";
+ }
+
+ void set_enabled(bool flag) {
+ status = (flag ? "Enabled" : "Disabled");
+ }
+
+ const string& get_prefix() const {
+ return prefix;
+ }
+
+ const LCFilter& get_filter() const {
+ return filter;
+ }
+
+ const LCExpiration& get_expiration() const {
+ return expiration;
+ }
+
+ const LCExpiration& get_noncur_expiration() const {
+ return noncur_expiration;
+ }
+
+ const LCExpiration& get_mp_expiration() const {
+ return mp_expiration;
+ }
+
+ bool get_dm_expiration() const {
+ return dm_expiration;
+ }
+
+ const map<string, LCTransition>& get_transitions() const {
+ return transitions;
+ }
+
+ const map<string, LCTransition>& get_noncur_transitions() const {
+ return noncur_transitions;
+ }
+
+ void set_id(const string& _id) {
+ id = _id;
+ }
+
+ void set_prefix(const string& _prefix) {
+ prefix = _prefix;
+ }
+
+ void set_status(const string& _status) {
+ status = _status;
+ }
+
+ void set_expiration(const LCExpiration& _expiration) {
+ expiration = _expiration;
+ }
+
+ void set_noncur_expiration(const LCExpiration& _noncur_expiration) {
+ noncur_expiration = _noncur_expiration;
+ }
+
+ void set_mp_expiration(const LCExpiration& _mp_expiration) {
+ mp_expiration = _mp_expiration;
+ }
+
+ void set_dm_expiration(bool _dm_expiration) {
+ dm_expiration = _dm_expiration;
+ }
+
+ bool add_transition(const LCTransition& _transition) {
+ auto ret = transitions.emplace(_transition.get_storage_class(), _transition);
+ return ret.second;
+ }
+
+ bool add_noncur_transition(const LCTransition& _noncur_transition) {
+ auto ret = noncur_transitions.emplace(_noncur_transition.get_storage_class(), _noncur_transition);
+ return ret.second;
+ }
+
+ bool valid() const;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(6, 1, bl);
+ encode(id, bl);
+ encode(prefix, bl);
+ encode(status, bl);
+ encode(expiration, bl);
+ encode(noncur_expiration, bl);
+ encode(mp_expiration, bl);
+ encode(dm_expiration, bl);
+ encode(filter, bl);
+ encode(transitions, bl);
+ encode(noncur_transitions, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START_LEGACY_COMPAT_LEN(6, 1, 1, bl);
+ decode(id, bl);
+ decode(prefix, bl);
+ decode(status, bl);
+ decode(expiration, bl);
+ if (struct_v >=2) {
+ decode(noncur_expiration, bl);
+ }
+ if (struct_v >= 3) {
+ decode(mp_expiration, bl);
+ }
+ if (struct_v >= 4) {
+ decode(dm_expiration, bl);
+ }
+ if (struct_v >= 5) {
+ decode(filter, bl);
+ }
+ if (struct_v >= 6) {
+ decode(transitions, bl);
+ decode(noncur_transitions, bl);
+ }
+ DECODE_FINISH(bl);
+ }
+ void dump(Formatter *f) const;
+
+ void init_simple_days_rule(std::string_view _id, std::string_view _prefix, int num_days);
+};
+WRITE_CLASS_ENCODER(LCRule)
+
+struct transition_action
+{
+ int days;
+ boost::optional<ceph::real_time> date;
+ string storage_class;
+ transition_action() : days(0) {}
+ void dump(Formatter *f) const {
+ if (!date) {
+ f->dump_int("days", days);
+ } else {
+ utime_t ut(*date);
+ f->dump_stream("date") << ut;
+ }
+ }
+};
+
+/* XXX why not LCRule? */
+struct lc_op
+{
+ string id;
+ bool status{false};
+ bool dm_expiration{false};
+ int expiration{0};
+ int noncur_expiration{0};
+ int mp_expiration{0};
+ boost::optional<ceph::real_time> expiration_date;
+ boost::optional<RGWObjTags> obj_tags;
+ map<string, transition_action> transitions;
+ map<string, transition_action> noncur_transitions;
+
+ /* ctors are nice */
+ lc_op() = delete;
+
+ lc_op(const std::string id) : id(id)
+ {}
+
+ void dump(Formatter *f) const;
+};
+
+class RGWLifecycleConfiguration
+{
+protected:
+ CephContext *cct;
+ multimap<string, lc_op> prefix_map;
+ multimap<string, LCRule> rule_map;
+ bool _add_rule(const LCRule& rule);
+ bool has_same_action(const lc_op& first, const lc_op& second);
+public:
+ explicit RGWLifecycleConfiguration(CephContext *_cct) : cct(_cct) {}
+ RGWLifecycleConfiguration() : cct(NULL) {}
+
+ void set_ctx(CephContext *ctx) {
+ cct = ctx;
+ }
+
+ virtual ~RGWLifecycleConfiguration() {}
+
+// int get_perm(string& id, int perm_mask);
+// int get_group_perm(ACLGroupTypeEnum group, int perm_mask);
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(rule_map, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START_LEGACY_COMPAT_LEN(1, 1, 1, bl);
+ decode(rule_map, bl);
+ multimap<string, LCRule>::iterator iter;
+ for (iter = rule_map.begin(); iter != rule_map.end(); ++iter) {
+ LCRule& rule = iter->second;
+ _add_rule(rule);
+ }
+ DECODE_FINISH(bl);
+ }
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<RGWLifecycleConfiguration*>& o);
+
+ void add_rule(const LCRule& rule);
+
+ int check_and_add_rule(const LCRule& rule);
+
+ bool valid();
+
+ multimap<string, LCRule>& get_rule_map() { return rule_map; }
+ multimap<string, lc_op>& get_prefix_map() { return prefix_map; }
+/*
+ void create_default(string id, string name) {
+ ACLGrant grant;
+ grant.set_canon(id, name, RGW_PERM_FULL_CONTROL);
+ add_grant(&grant);
+ }
+*/
+};
+WRITE_CLASS_ENCODER(RGWLifecycleConfiguration)
+
+class RGWLC : public DoutPrefixProvider {
+ CephContext *cct;
+ RGWRados *store;
+ int max_objs{0};
+ string *obj_names{nullptr};
+ std::atomic<bool> down_flag = { false };
+ string cookie;
+
+ class LCWorker : public Thread {
+ const DoutPrefixProvider *dpp;
+ CephContext *cct;
+ RGWLC *lc;
+ Mutex lock;
+ Cond cond;
+
+ public:
+ LCWorker(const DoutPrefixProvider* _dpp, CephContext *_cct, RGWLC *_lc) : dpp(_dpp), cct(_cct), lc(_lc), lock("LCWorker") {}
+ void *entry() override;
+ void stop();
+ bool should_work(utime_t& now);
+ int schedule_next_start_time(utime_t& start, utime_t& now);
+ };
+
+ public:
+ LCWorker *worker;
+ RGWLC() : cct(NULL), store(NULL), worker(NULL) {}
+ ~RGWLC() {
+ stop_processor();
+ finalize();
+ }
+
+ void initialize(CephContext *_cct, RGWRados *_store);
+ void finalize();
+
+ int process();
+ int process(int index, int max_secs);
+ bool if_already_run_today(time_t& start_date);
+ int list_lc_progress(const string& marker, uint32_t max_entries, map<string, int> *progress_map);
+ int bucket_lc_prepare(int index);
+ int bucket_lc_process(string& shard_id);
+ int bucket_lc_post(int index, int max_lock_sec, pair<string, int >& entry, int& result);
+ bool going_down();
+ void start_processor();
+ void stop_processor();
+ int set_bucket_config(RGWBucketInfo& bucket_info,
+ const map<string, bufferlist>& bucket_attrs,
+ RGWLifecycleConfiguration *config);
+ int remove_bucket_config(RGWBucketInfo& bucket_info,
+ const map<string, bufferlist>& bucket_attrs);
+
+ CephContext *get_cct() const override { return store->ctx(); }
+ unsigned get_subsys() const;
+ std::ostream& gen_prefix(std::ostream& out) const;
+
+ private:
+
+ int handle_multipart_expiration(RGWRados::Bucket *target,
+ const multimap<string, lc_op>& prefix_map);
+};
+
+namespace rgw::lc {
+
+int fix_lc_shard_entry(RGWRados *store, const RGWBucketInfo& bucket_info,
+ const map<std::string,bufferlist>& battrs);
+
+std::string s3_expiration_header(
+ DoutPrefixProvider* dpp,
+ const rgw_obj_key& obj_key,
+ const RGWObjTags& obj_tagset,
+ const ceph::real_time& mtime,
+ const std::map<std::string, buffer::list>& bucket_attrs);
+
+} // namespace rgw::lc
+
+#endif
diff --git a/src/rgw/rgw_lc_s3.cc b/src/rgw/rgw_lc_s3.cc
new file mode 100644
index 00000000..09eb216f
--- /dev/null
+++ b/src/rgw/rgw_lc_s3.cc
@@ -0,0 +1,344 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <string.h>
+
+#include <iostream>
+#include <map>
+
+#include "include/types.h"
+
+#include "rgw_user.h"
+#include "rgw_lc_s3.h"
+
+
+#define dout_subsys ceph_subsys_rgw
+
+static bool check_date(const string& _date)
+{
+ boost::optional<ceph::real_time> date = ceph::from_iso_8601(_date);
+ if (boost::none == date) {
+ return false;
+ }
+ struct timespec time = ceph::real_clock::to_timespec(*date);
+ if (time.tv_sec % (24*60*60) || time.tv_nsec) {
+ return false;
+ }
+ return true;
+}
+
+void LCExpiration_S3::dump_xml(Formatter *f) const {
+ if (dm_expiration) {
+ encode_xml("ExpiredObjectDeleteMarker", "true", f);
+ } else if (!days.empty()) {
+ encode_xml("Days", days, f);
+ } else {
+ encode_xml("Date", date, f);
+ }
+}
+
+void LCExpiration_S3::decode_xml(XMLObj *obj)
+{
+ bool has_days = RGWXMLDecoder::decode_xml("Days", days, obj);
+ bool has_date = RGWXMLDecoder::decode_xml("Date", date, obj);
+ string dm;
+ bool has_dm = RGWXMLDecoder::decode_xml("ExpiredObjectDeleteMarker", dm, obj);
+
+ int num = !!has_days + !!has_date + !!has_dm;
+
+ if (num != 1) {
+ throw RGWXMLDecoder::err("bad Expiration section");
+ }
+
+ if (has_date && !check_date(date)) {
+ //We need return xml error according to S3
+ throw RGWXMLDecoder::err("bad date in Date section");
+ }
+
+ if (has_dm) {
+ dm_expiration = (dm == "true");
+ }
+}
+
+void LCNoncurExpiration_S3::decode_xml(XMLObj *obj)
+{
+ RGWXMLDecoder::decode_xml("NoncurrentDays", days, obj, true);
+}
+
+void LCNoncurExpiration_S3::dump_xml(Formatter *f) const
+{
+ encode_xml("NoncurrentDays", days, f);
+}
+
+void LCMPExpiration_S3::decode_xml(XMLObj *obj)
+{
+ RGWXMLDecoder::decode_xml("DaysAfterInitiation", days, obj, true);
+}
+
+void LCMPExpiration_S3::dump_xml(Formatter *f) const
+{
+ encode_xml("DaysAfterInitiation", days, f);
+}
+
+void RGWLifecycleConfiguration_S3::decode_xml(XMLObj *obj)
+{
+ if (!cct) {
+ throw RGWXMLDecoder::err("ERROR: RGWLifecycleConfiguration_S3 can't be decoded without cct initialized");
+ }
+ vector<LCRule_S3> rules;
+
+ RGWXMLDecoder::decode_xml("Rule", rules, obj, true);
+
+ for (auto& rule : rules) {
+ if (rule.get_id().empty()) {
+ // S3 generates a 48 bit random ID, maybe we could generate shorter IDs
+ static constexpr auto LC_ID_LENGTH = 48;
+ string id = gen_rand_alphanumeric_lower(cct, LC_ID_LENGTH);
+ rule.set_id(id);
+ }
+
+ add_rule(rule);
+ }
+
+ if (cct->_conf->rgw_lc_max_rules < rule_map.size()) {
+ stringstream ss;
+ ss << "Warn: The lifecycle config has too many rules, rule number is:"
+ << rule_map.size() << ", max number is:" << cct->_conf->rgw_lc_max_rules;
+ throw RGWXMLDecoder::err(ss.str());
+ }
+}
+
+void LCFilter_S3::dump_xml(Formatter *f) const
+{
+ if (has_prefix()) {
+ encode_xml("Prefix", prefix, f);
+ }
+ bool multi = has_multi_condition();
+ if (multi) {
+ f->open_array_section("And");
+ }
+ if (has_tags()) {
+ const auto& tagset_s3 = static_cast<const RGWObjTagSet_S3 &>(obj_tags);
+ tagset_s3.dump_xml(f);
+ }
+ if (multi) {
+ f->close_section();
+ }
+}
+
+void LCFilter_S3::decode_xml(XMLObj *obj)
+{
+ XMLObj *o = obj->find_first("And");
+ bool single_cond = false;
+ int num_conditions = 0;
+ // If there is an AND condition, every tag is a child of and
+ // else we only support single conditions and return false if we see multiple
+
+ if (o == nullptr){
+ o = obj;
+ single_cond = true;
+ }
+
+ RGWXMLDecoder::decode_xml("Prefix", prefix, o);
+ if (!prefix.empty())
+ num_conditions++;
+ auto tags_iter = o->find("Tag");
+ obj_tags.clear();
+ while (auto tag_xml =tags_iter.get_next()){
+ std::string _key,_val;
+ RGWXMLDecoder::decode_xml("Key", _key, tag_xml);
+ RGWXMLDecoder::decode_xml("Value", _val, tag_xml);
+ obj_tags.emplace_tag(std::move(_key), std::move(_val));
+ num_conditions++;
+ }
+
+ if (single_cond && num_conditions > 1) {
+ throw RGWXMLDecoder::err("Bad filter: badly formed multiple conditions");
+ }
+}
+
+void LCTransition_S3::decode_xml(XMLObj *obj)
+{
+ bool has_days = RGWXMLDecoder::decode_xml("Days", days, obj);
+ bool has_date = RGWXMLDecoder::decode_xml("Date", date, obj);
+ if ((has_days && has_date) || (!has_days && !has_date)) {
+ throw RGWXMLDecoder::err("bad Transition section");
+ }
+
+ if (has_date && !check_date(date)) {
+ //We need return xml error according to S3
+ throw RGWXMLDecoder::err("bad Date in Transition section");
+ }
+
+ if (!RGWXMLDecoder::decode_xml("StorageClass", storage_class, obj)) {
+ throw RGWXMLDecoder::err("missing StorageClass in Transition section");
+ }
+}
+
+void LCTransition_S3::dump_xml(Formatter *f) const {
+ if (!days.empty()) {
+ encode_xml("Days", days, f);
+ } else {
+ encode_xml("Date", date, f);
+ }
+ encode_xml("StorageClass", storage_class, f);
+}
+
+void LCNoncurTransition_S3::decode_xml(XMLObj *obj)
+{
+ if (!RGWXMLDecoder::decode_xml("NoncurrentDays", days, obj)) {
+ throw RGWXMLDecoder::err("missing NoncurrentDays in NoncurrentVersionTransition section");
+ }
+ if (!RGWXMLDecoder::decode_xml("StorageClass", storage_class, obj)) {
+ throw RGWXMLDecoder::err("missing StorageClass in NoncurrentVersionTransition section");
+ }
+}
+
+void LCNoncurTransition_S3::dump_xml(Formatter *f) const
+{
+ encode_xml("NoncurrentDays", days, f);
+ encode_xml("StorageClass", storage_class, f);
+}
+
+void LCRule_S3::decode_xml(XMLObj *obj)
+{
+ id.clear();
+ prefix.clear();
+ status.clear();
+ dm_expiration = false;
+
+ RGWXMLDecoder::decode_xml("ID", id, obj);
+
+ LCFilter_S3 filter_s3;
+ if (!RGWXMLDecoder::decode_xml("Filter", filter_s3, obj)) {
+ // Ideally the following code should be deprecated and we should return
+ // False here, The new S3 LC configuration xml spec. makes Filter mandatory
+ // and Prefix optional. However older clients including boto2 still generate
+ // xml according to the older spec, where Prefix existed outside of Filter
+ // and S3 itself seems to be sloppy on enforcing the mandatory Filter
+ // argument. A day will come when S3 enforces their own xml-spec, but it is
+ // not this day
+
+ if (!RGWXMLDecoder::decode_xml("Prefix", prefix, obj)) {
+ throw RGWXMLDecoder::err("missing Prefix in Filter");
+ }
+ }
+ filter = (LCFilter)filter_s3;
+
+ if (!RGWXMLDecoder::decode_xml("Status", status, obj)) {
+ throw RGWXMLDecoder::err("missing Status in Filter");
+ }
+ if (status.compare("Enabled") != 0 && status.compare("Disabled") != 0) {
+ throw RGWXMLDecoder::err("bad Status in Filter");
+ }
+
+ LCExpiration_S3 s3_expiration;
+ LCNoncurExpiration_S3 s3_noncur_expiration;
+ LCMPExpiration_S3 s3_mp_expiration;
+ LCFilter_S3 s3_filter;
+
+ bool has_expiration = RGWXMLDecoder::decode_xml("Expiration", s3_expiration, obj);
+ bool has_noncur_expiration = RGWXMLDecoder::decode_xml("NoncurrentVersionExpiration", s3_noncur_expiration, obj);
+ bool has_mp_expiration = RGWXMLDecoder::decode_xml("AbortIncompleteMultipartUpload", s3_mp_expiration, obj);
+
+ vector<LCTransition_S3> transitions;
+ vector<LCNoncurTransition_S3> noncur_transitions;
+
+ bool has_transition = RGWXMLDecoder::decode_xml("Transition", transitions, obj);
+ bool has_noncur_transition = RGWXMLDecoder::decode_xml("NoncurrentVersionTransition", noncur_transitions, obj);
+
+ if (!has_expiration &&
+ !has_noncur_expiration &&
+ !has_mp_expiration &&
+ !has_transition &&
+ !has_noncur_transition) {
+ throw RGWXMLDecoder::err("bad Rule");
+ }
+
+ if (has_expiration) {
+ if (s3_expiration.has_days() ||
+ s3_expiration.has_date()) {
+ expiration = s3_expiration;
+ } else {
+ dm_expiration = s3_expiration.get_dm_expiration();
+ }
+ }
+ if (has_noncur_expiration) {
+ noncur_expiration = s3_noncur_expiration;
+ }
+ if (has_mp_expiration) {
+ mp_expiration = s3_mp_expiration;
+ }
+ for (auto& t : transitions) {
+ if (!add_transition(t)) {
+ throw RGWXMLDecoder::err("Failed to add transition");
+ }
+ }
+ for (auto& t : noncur_transitions) {
+ if (!add_noncur_transition(t)) {
+ throw RGWXMLDecoder::err("Failed to add non-current version transition");
+ }
+ }
+}
+
+void LCRule_S3::dump_xml(Formatter *f) const {
+ encode_xml("ID", id, f);
+ // In case of an empty filter and an empty Prefix, we defer to Prefix.
+ if (!filter.empty()) {
+ const LCFilter_S3& lc_filter = static_cast<const LCFilter_S3&>(filter);
+ encode_xml("Filter", lc_filter, f);
+ } else {
+ encode_xml("Prefix", prefix, f);
+ }
+ encode_xml("Status", status, f);
+ if (!expiration.empty() || dm_expiration) {
+ LCExpiration_S3 expir(expiration.get_days_str(), expiration.get_date(), dm_expiration);
+ encode_xml("Expiration", expir, f);
+ }
+ if (!noncur_expiration.empty()) {
+ const LCNoncurExpiration_S3& noncur_expir = static_cast<const LCNoncurExpiration_S3&>(noncur_expiration);
+ encode_xml("NoncurrentVersionExpiration", noncur_expir, f);
+ }
+ if (!mp_expiration.empty()) {
+ const LCMPExpiration_S3& mp_expir = static_cast<const LCMPExpiration_S3&>(mp_expiration);
+ encode_xml("AbortIncompleteMultipartUpload", mp_expir, f);
+ }
+ if (!transitions.empty()) {
+ for (auto &elem : transitions) {
+ const LCTransition_S3& tran = static_cast<const LCTransition_S3&>(elem.second);
+ encode_xml("Transition", tran, f);
+ }
+ }
+ if (!noncur_transitions.empty()) {
+ for (auto &elem : noncur_transitions) {
+ const LCNoncurTransition_S3& noncur_tran = static_cast<const LCNoncurTransition_S3&>(elem.second);
+ encode_xml("NoncurrentVersionTransition", noncur_tran, f);
+ }
+ }
+}
+
+int RGWLifecycleConfiguration_S3::rebuild(RGWRados *store, RGWLifecycleConfiguration& dest)
+{
+ int ret = 0;
+ multimap<string, LCRule>::iterator iter;
+ for (iter = rule_map.begin(); iter != rule_map.end(); ++iter) {
+ LCRule& src_rule = iter->second;
+ ret = dest.check_and_add_rule(src_rule);
+ if (ret < 0)
+ return ret;
+ }
+ if (!dest.valid()) {
+ ret = -ERR_INVALID_REQUEST;
+ }
+ return ret;
+}
+
+
+void RGWLifecycleConfiguration_S3::dump_xml(Formatter *f) const
+{
+ for (auto iter = rule_map.begin(); iter != rule_map.end(); ++iter) {
+ const LCRule_S3& rule = static_cast<const LCRule_S3&>(iter->second);
+ encode_xml("Rule", rule, f);
+ }
+}
+
diff --git a/src/rgw/rgw_lc_s3.h b/src/rgw/rgw_lc_s3.h
new file mode 100644
index 00000000..214ca54c
--- /dev/null
+++ b/src/rgw/rgw_lc_s3.h
@@ -0,0 +1,102 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_LC_S3_H
+#define CEPH_RGW_LC_S3_H
+
+#include <map>
+#include <string>
+#include <iostream>
+#include <include/types.h>
+
+#include "include/str_list.h"
+#include "rgw_lc.h"
+#include "rgw_xml.h"
+#include "rgw_tag_s3.h"
+
+class LCFilter_S3 : public LCFilter
+{
+public:
+ void dump_xml(Formatter *f) const;
+ void decode_xml(XMLObj *obj);
+};
+
+class LCExpiration_S3 : public LCExpiration
+{
+private:
+ bool dm_expiration{false};
+public:
+ LCExpiration_S3() {}
+ LCExpiration_S3(string _days, string _date, bool _dm_expiration) : LCExpiration(_days, _date), dm_expiration(_dm_expiration) {}
+
+ void dump_xml(Formatter *f) const;
+ void decode_xml(XMLObj *obj);
+
+ void set_dm_expiration(bool _dm_expiration) {
+ dm_expiration = _dm_expiration;
+ }
+
+ bool get_dm_expiration() {
+ return dm_expiration;
+ }
+};
+
+class LCNoncurExpiration_S3 : public LCExpiration
+{
+public:
+ LCNoncurExpiration_S3() {}
+
+ void decode_xml(XMLObj *obj);
+ void dump_xml(Formatter *f) const;
+};
+
+class LCMPExpiration_S3 : public LCExpiration
+{
+public:
+ LCMPExpiration_S3() {}
+
+ void decode_xml(XMLObj *obj);
+ void dump_xml(Formatter *f) const;
+};
+
+class LCTransition_S3 : public LCTransition
+{
+public:
+ LCTransition_S3() {}
+
+ void decode_xml(XMLObj *obj);
+ void dump_xml(Formatter *f) const;
+};
+
+class LCNoncurTransition_S3 : public LCTransition
+{
+public:
+ LCNoncurTransition_S3() {}
+ ~LCNoncurTransition_S3() {}
+
+ void decode_xml(XMLObj *obj);
+ void dump_xml(Formatter *f) const;
+};
+
+
+class LCRule_S3 : public LCRule
+{
+public:
+ LCRule_S3() {}
+
+ void dump_xml(Formatter *f) const;
+ void decode_xml(XMLObj *obj);
+};
+
+class RGWLifecycleConfiguration_S3 : public RGWLifecycleConfiguration
+{
+public:
+ explicit RGWLifecycleConfiguration_S3(CephContext *_cct) : RGWLifecycleConfiguration(_cct) {}
+ RGWLifecycleConfiguration_S3() : RGWLifecycleConfiguration(nullptr) {}
+
+ void decode_xml(XMLObj *obj);
+ int rebuild(RGWRados *store, RGWLifecycleConfiguration& dest);
+ void dump_xml(Formatter *f) const;
+};
+
+#endif
diff --git a/src/rgw/rgw_ldap.cc b/src/rgw/rgw_ldap.cc
new file mode 100644
index 00000000..f2009b06
--- /dev/null
+++ b/src/rgw/rgw_ldap.cc
@@ -0,0 +1,128 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "rgw_ldap.h"
+
+#include "common/ceph_crypto.h"
+#include "common/ceph_context.h"
+#include "common/common_init.h"
+#include "common/dout.h"
+#include "common/safe_io.h"
+#include <boost/algorithm/string.hpp>
+
+#include "include/ceph_assert.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+std::string parse_rgw_ldap_bindpw(CephContext* ctx)
+{
+ string ldap_bindpw;
+ string ldap_secret = ctx->_conf->rgw_ldap_secret;
+
+ if (ldap_secret.empty()) {
+ ldout(ctx, 10)
+ << __func__ << " LDAP auth no rgw_ldap_secret file found in conf"
+ << dendl;
+ } else {
+ // FIPS zeroization audit 20191116: this memset is not intended to
+ // wipe out a secret after use.
+ char bindpw[1024];
+ memset(bindpw, 0, 1024);
+ int pwlen = safe_read_file("" /* base */, ldap_secret.c_str(),
+ bindpw, 1023);
+ if (pwlen > 0) {
+ ldap_bindpw = bindpw;
+ boost::algorithm::trim(ldap_bindpw);
+ if (ldap_bindpw.back() == '\n')
+ ldap_bindpw.pop_back();
+ }
+ ::ceph::crypto::zeroize_for_security(bindpw, sizeof(bindpw));
+ }
+
+ return ldap_bindpw;
+}
+
+#if defined(HAVE_OPENLDAP)
+namespace rgw {
+
+ int LDAPHelper::auth(const std::string &uid, const std::string &pwd) {
+ int ret;
+ std::string filter;
+ if (msad) {
+ filter = "(&(objectClass=user)(sAMAccountName=";
+ filter += uid;
+ filter += "))";
+ } else {
+ /* openldap */
+ if (searchfilter.empty()) {
+ /* no search filter provided in config, we construct our own */
+ filter = "(";
+ filter += dnattr;
+ filter += "=";
+ filter += uid;
+ filter += ")";
+ } else {
+ if (searchfilter.find("@USERNAME@") != std::string::npos) {
+ /* we need to substitute the @USERNAME@ placeholder */
+ filter = searchfilter;
+ filter.replace(searchfilter.find("@USERNAME@"), std::string("@USERNAME@").length(), uid);
+ } else {
+ /* no placeholder for username, so we need to append our own username filter to the custom searchfilter */
+ filter = "(&(";
+ filter += searchfilter;
+ filter += ")(";
+ filter += dnattr;
+ filter += "=";
+ filter += uid;
+ filter += "))";
+ }
+ }
+ }
+ ldout(g_ceph_context, 12)
+ << __func__ << " search filter: " << filter
+ << dendl;
+ char *attrs[] = { const_cast<char*>(dnattr.c_str()), nullptr };
+ LDAPMessage *answer = nullptr, *entry = nullptr;
+ bool once = true;
+
+ lock_guard guard(mtx);
+
+ retry_bind:
+ ret = ldap_search_s(ldap, searchdn.c_str(), LDAP_SCOPE_SUBTREE,
+ filter.c_str(), attrs, 0, &answer);
+ if (ret == LDAP_SUCCESS) {
+ entry = ldap_first_entry(ldap, answer);
+ if (entry) {
+ char *dn = ldap_get_dn(ldap, entry);
+ ret = simple_bind(dn, pwd);
+ if (ret != LDAP_SUCCESS) {
+ ldout(g_ceph_context, 10)
+ << __func__ << " simple_bind failed uid=" << uid
+ << "ldap err=" << ret
+ << dendl;
+ }
+ ldap_memfree(dn);
+ } else {
+ ldout(g_ceph_context, 12)
+ << __func__ << " ldap_search_s no user matching uid=" << uid
+ << dendl;
+ ret = LDAP_NO_SUCH_ATTRIBUTE; // fixup result
+ }
+ ldap_msgfree(answer);
+ } else {
+ ldout(g_ceph_context, 5)
+ << __func__ << " ldap_search_s error uid=" << uid
+ << " ldap err=" << ret
+ << dendl;
+ /* search should never fail--try to rebind */
+ if (once) {
+ rebind();
+ once = false;
+ goto retry_bind;
+ }
+ }
+ return (ret == LDAP_SUCCESS) ? ret : -EACCES;
+ } /* LDAPHelper::auth */
+}
+
+#endif /* defined(HAVE_OPENLDAP) */
diff --git a/src/rgw/rgw_ldap.h b/src/rgw/rgw_ldap.h
new file mode 100644
index 00000000..aeb5f613
--- /dev/null
+++ b/src/rgw/rgw_ldap.h
@@ -0,0 +1,143 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RGW_LDAP_H
+#define RGW_LDAP_H
+
+#include "acconfig.h"
+
+#if defined(HAVE_OPENLDAP)
+#define LDAP_DEPRECATED 1
+#include "ldap.h"
+#endif
+
+#include <stdint.h>
+#include <tuple>
+#include <vector>
+#include <string>
+#include <iostream>
+#include <mutex>
+
+namespace rgw {
+
+#if defined(HAVE_OPENLDAP)
+
+ class LDAPHelper
+ {
+ std::string uri;
+ std::string binddn;
+ std::string bindpw;
+ std::string searchdn;
+ std::string searchfilter;
+ std::string dnattr;
+ LDAP *ldap;
+ bool msad = false; /* TODO: possible future specialization */
+ std::mutex mtx;
+
+ public:
+ using lock_guard = std::lock_guard<std::mutex>;
+
+ LDAPHelper(std::string _uri, std::string _binddn, std::string _bindpw,
+ const std::string &_searchdn, const std::string &_searchfilter, const std::string &_dnattr)
+ : uri(std::move(_uri)), binddn(std::move(_binddn)),
+ bindpw(std::move(_bindpw)), searchdn(_searchdn), searchfilter(_searchfilter), dnattr(_dnattr),
+ ldap(nullptr) {
+ // nothing
+ }
+
+ int init() {
+ int ret;
+ ret = ldap_initialize(&ldap, uri.c_str());
+ if (ret == LDAP_SUCCESS) {
+ unsigned long ldap_ver = LDAP_VERSION3;
+ ret = ldap_set_option(ldap, LDAP_OPT_PROTOCOL_VERSION,
+ (void*) &ldap_ver);
+ }
+ if (ret == LDAP_SUCCESS) {
+ ret = ldap_set_option(ldap, LDAP_OPT_REFERRALS, LDAP_OPT_OFF);
+ }
+ return (ret == LDAP_SUCCESS) ? ret : -EINVAL;
+ }
+
+ int bind() {
+ int ret;
+ ret = ldap_simple_bind_s(ldap, binddn.c_str(), bindpw.c_str());
+ return (ret == LDAP_SUCCESS) ? ret : -EINVAL;
+ }
+
+ int rebind() {
+ if (ldap) {
+ (void) ldap_unbind(ldap);
+ (void) init();
+ return bind();
+ }
+ return -EINVAL;
+ }
+
+ int simple_bind(const char *dn, const std::string& pwd) {
+ LDAP* tldap;
+ int ret = ldap_initialize(&tldap, uri.c_str());
+ if (ret == LDAP_SUCCESS) {
+ unsigned long ldap_ver = LDAP_VERSION3;
+ ret = ldap_set_option(tldap, LDAP_OPT_PROTOCOL_VERSION,
+ (void*) &ldap_ver);
+ if (ret == LDAP_SUCCESS) {
+ ret = ldap_simple_bind_s(tldap, dn, pwd.c_str());
+ if (ret == LDAP_SUCCESS) {
+ (void) ldap_unbind(tldap);
+ }
+ }
+ }
+ return ret; // OpenLDAP client error space
+ }
+
+ int auth(const std::string &uid, const std::string &pwd);
+
+ ~LDAPHelper() {
+ if (ldap)
+ (void) ldap_unbind(ldap);
+ }
+
+ }; /* LDAPHelper */
+
+#else
+
+ class LDAPHelper
+ {
+ public:
+ LDAPHelper(const std::string &_uri, const std::string &_binddn, const std::string &_bindpw,
+ const std::string &_searchdn, const std::string &_searchfilter, const std::string &_dnattr)
+ {}
+
+ int init() {
+ return -ENOTSUP;
+ }
+
+ int bind() {
+ return -ENOTSUP;
+ }
+
+ int auth(const std::string &uid, const std::string &pwd) {
+ return -EACCES;
+ }
+
+ ~LDAPHelper() {}
+
+ }; /* LDAPHelper */
+
+
+#endif /* HAVE_OPENLDAP */
+
+} /* namespace rgw */
+
+#include "common/ceph_context.h"
+#include "common/common_init.h"
+#include "common/dout.h"
+#include "common/safe_io.h"
+#include <boost/algorithm/string.hpp>
+
+#include "include/ceph_assert.h"
+
+std::string parse_rgw_ldap_bindpw(CephContext* ctx);
+
+#endif /* RGW_LDAP_H */
diff --git a/src/rgw/rgw_lib.h b/src/rgw/rgw_lib.h
new file mode 100644
index 00000000..9afd8d13
--- /dev/null
+++ b/src/rgw/rgw_lib.h
@@ -0,0 +1,225 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RGW_LIB_H
+#define RGW_LIB_H
+
+#include <mutex>
+#include "include/unordered_map.h"
+#include "global/global_init.h"
+#include "rgw_common.h"
+#include "rgw_client_io.h"
+#include "rgw_rest.h"
+#include "rgw_request.h"
+#include "rgw_frontend.h"
+#include "rgw_process.h"
+#include "rgw_rest_s3.h" // RGW_Auth_S3
+#include "rgw_ldap.h"
+#include "services/svc_zone_utils.h"
+#include "include/ceph_assert.h"
+
+class OpsLogSocket;
+
+namespace rgw {
+
+ class RGWLibFrontend;
+
+ class RGWLib {
+ RGWFrontendConfig* fec;
+ RGWLibFrontend* fe;
+ OpsLogSocket* olog;
+ rgw::LDAPHelper* ldh{nullptr};
+ RGWREST rest; // XXX needed for RGWProcessEnv
+ RGWRados* store;
+ boost::intrusive_ptr<CephContext> cct;
+
+ public:
+ RGWLib() : fec(nullptr), fe(nullptr), olog(nullptr), store(nullptr)
+ {}
+ ~RGWLib() {}
+
+ RGWRados* get_store() { return store; }
+
+ RGWLibFrontend* get_fe() { return fe; }
+
+ rgw::LDAPHelper* get_ldh() { return ldh; }
+
+ int init();
+ int init(vector<const char *>& args);
+ int stop();
+ };
+
+ extern RGWLib rgwlib;
+
+/* request interface */
+
+ class RGWLibIO : public rgw::io::BasicClient,
+ public rgw::io::Accounter
+ {
+ RGWUserInfo user_info;
+ RGWEnv env;
+ public:
+ RGWLibIO() {
+ get_env().set("HTTP_HOST", "");
+ }
+ explicit RGWLibIO(const RGWUserInfo &_user_info)
+ : user_info(_user_info) {}
+
+ int init_env(CephContext *cct) override {
+ env.init(cct);
+ return 0;
+ }
+
+ const RGWUserInfo& get_user() {
+ return user_info;
+ }
+
+ int set_uid(RGWRados* store, const rgw_user& uid);
+
+ int write_data(const char *buf, int len);
+ int read_data(char *buf, int len);
+ int send_status(int status, const char *status_name);
+ int send_100_continue();
+ int complete_header();
+ int send_content_length(uint64_t len);
+
+ RGWEnv& get_env() noexcept override {
+ return env;
+ }
+
+ size_t complete_request() override { /* XXX */
+ return 0;
+ };
+
+ void set_account(bool) override {
+ return;
+ }
+
+ uint64_t get_bytes_sent() const override {
+ return 0;
+ }
+
+ uint64_t get_bytes_received() const override {
+ return 0;
+ }
+
+ }; /* RGWLibIO */
+
+/* XXX */
+ class RGWRESTMgr_Lib : public RGWRESTMgr {
+ public:
+ RGWRESTMgr_Lib() {}
+ ~RGWRESTMgr_Lib() override {}
+ }; /* RGWRESTMgr_Lib */
+
+/* XXX */
+ class RGWHandler_Lib : public RGWHandler {
+ friend class RGWRESTMgr_Lib;
+ public:
+
+ int authorize(const DoutPrefixProvider *dpp) override;
+
+ RGWHandler_Lib() {}
+ ~RGWHandler_Lib() override {}
+ static int init_from_header(struct req_state *s);
+ }; /* RGWHandler_Lib */
+
+ class RGWLibRequest : public RGWRequest,
+ public RGWHandler_Lib {
+ public:
+ CephContext* cct;
+ RGWUserInfo* user;
+ boost::optional<RGWSysObjectCtx> sysobj_ctx;
+
+ /* unambiguiously return req_state */
+ inline struct req_state* get_state() { return this->RGWRequest::s; }
+
+ RGWLibRequest(CephContext* _cct, RGWUserInfo* _user)
+ : RGWRequest(rgwlib.get_store()->get_new_req_id()), cct(_cct),
+ user(_user)
+ {}
+
+ RGWUserInfo* get_user() { return user; }
+
+ int postauth_init() override { return 0; }
+
+ /* descendant equivalent of *REST*::init_from_header(...):
+ * prepare request for execute()--should mean, fixup URI-alikes
+ * and any other expected stat vars in local req_state, for
+ * now */
+ virtual int header_init() = 0;
+
+ /* descendant initializer responsible to call RGWOp::init()--which
+ * descendants are required to inherit */
+ virtual int op_init() = 0;
+
+ using RGWHandler::init;
+
+ int init(const RGWEnv& rgw_env, RGWObjectCtx* rados_ctx,
+ RGWLibIO* io, struct req_state* _s) {
+
+ RGWRequest::init_state(_s);
+ RGWHandler::init(rados_ctx->get_store(), _s, io);
+
+ sysobj_ctx.emplace(store->svc.sysobj);
+
+ get_state()->obj_ctx = rados_ctx;
+ get_state()->sysobj_ctx = &(sysobj_ctx.get());
+ get_state()->req_id = store->svc.zone_utils->unique_id(id);
+ get_state()->trans_id = store->svc.zone_utils->unique_trans_id(id);
+
+ ldpp_dout(_s, 2) << "initializing for trans_id = "
+ << get_state()->trans_id.c_str() << dendl;
+
+ int ret = header_init();
+ if (ret == 0) {
+ ret = init_from_header(_s);
+ }
+ return ret;
+ }
+
+ virtual bool only_bucket() = 0;
+
+ int read_permissions(RGWOp *op) override;
+
+ }; /* RGWLibRequest */
+
+ class RGWLibContinuedReq : public RGWLibRequest {
+ RGWLibIO io_ctx;
+ struct req_state rstate;
+ RGWObjectCtx rados_ctx;
+ public:
+
+ RGWLibContinuedReq(CephContext* _cct, RGWUserInfo* _user)
+ : RGWLibRequest(_cct, _user), io_ctx(),
+ rstate(_cct, &io_ctx.get_env(), _user, id),
+ rados_ctx(rgwlib.get_store(), &rstate)
+ {
+ io_ctx.init(_cct);
+
+ RGWRequest::init_state(&rstate);
+ RGWHandler::init(rados_ctx.get_store(), &rstate, &io_ctx);
+
+ sysobj_ctx.emplace(store->svc.sysobj);
+
+ get_state()->obj_ctx = &rados_ctx;
+ get_state()->sysobj_ctx = &(sysobj_ctx.get());
+ get_state()->req_id = store->svc.zone_utils->unique_id(id);
+ get_state()->trans_id = store->svc.zone_utils->unique_trans_id(id);
+
+ ldpp_dout(get_state(), 2) << "initializing for trans_id = "
+ << get_state()->trans_id.c_str() << dendl;
+ }
+
+ inline RGWRados* get_store() { return store; }
+
+ virtual int execute() final { ceph_abort(); }
+ virtual int exec_start() = 0;
+ virtual int exec_continue() = 0;
+ virtual int exec_finish() = 0;
+
+ }; /* RGWLibContinuedReq */
+
+} /* namespace rgw */
+
+#endif /* RGW_LIB_H */
diff --git a/src/rgw/rgw_lib_frontend.h b/src/rgw/rgw_lib_frontend.h
new file mode 100644
index 00000000..ec4ede04
--- /dev/null
+++ b/src/rgw/rgw_lib_frontend.h
@@ -0,0 +1,115 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RGW_LIB_FRONTEND_H
+#define RGW_LIB_FRONTEND_H
+
+#include <boost/container/flat_map.hpp>
+
+#include "rgw_lib.h"
+#include "rgw_file.h"
+
+namespace rgw {
+
+ class RGWLibProcess : public RGWProcess {
+ RGWAccessKey access_key;
+ std::mutex mtx;
+ std::condition_variable cv;
+ int gen;
+ bool shutdown;
+
+ typedef flat_map<RGWLibFS*, RGWLibFS*> FSMAP;
+ FSMAP mounted_fs;
+
+ using lock_guard = std::lock_guard<std::mutex>;
+ using unique_lock = std::unique_lock<std::mutex>;
+
+ public:
+ RGWLibProcess(CephContext* cct, RGWProcessEnv* pe, int num_threads,
+ RGWFrontendConfig* _conf) :
+ RGWProcess(cct, pe, num_threads, _conf), gen(0), shutdown(false) {}
+
+ void run() override;
+ void checkpoint();
+
+ void stop() {
+ shutdown = true;
+ for (const auto& fs: mounted_fs) {
+ fs.second->stop();
+ }
+ cv.notify_all();
+ }
+
+ void register_fs(RGWLibFS* fs) {
+ lock_guard guard(mtx);
+ mounted_fs.insert(FSMAP::value_type(fs, fs));
+ ++gen;
+ }
+
+ void unregister_fs(RGWLibFS* fs) {
+ lock_guard guard(mtx);
+ FSMAP::iterator it = mounted_fs.find(fs);
+ if (it != mounted_fs.end()) {
+ mounted_fs.erase(it);
+ ++gen;
+ }
+ }
+
+ void enqueue_req(RGWLibRequest* req) {
+
+ lsubdout(g_ceph_context, rgw, 10)
+ << __func__ << " enqueue request req="
+ << hex << req << dec << dendl;
+
+ req_throttle.get(1);
+ req_wq.queue(req);
+ } /* enqueue_req */
+
+ /* "regular" requests */
+ void handle_request(RGWRequest* req) override; // async handler, deletes req
+ int process_request(RGWLibRequest* req);
+ int process_request(RGWLibRequest* req, RGWLibIO* io);
+ void set_access_key(RGWAccessKey& key) { access_key = key; }
+
+ /* requests w/continue semantics */
+ int start_request(RGWLibContinuedReq* req);
+ int finish_request(RGWLibContinuedReq* req);
+ }; /* RGWLibProcess */
+
+ class RGWLibFrontend : public RGWProcessFrontend {
+ public:
+ RGWLibFrontend(RGWProcessEnv& pe, RGWFrontendConfig *_conf)
+ : RGWProcessFrontend(pe, _conf) {}
+
+ int init() override;
+
+ void stop() override {
+ RGWProcessFrontend::stop();
+ get_process()->stop();
+ }
+
+ RGWLibProcess* get_process() {
+ return static_cast<RGWLibProcess*>(pprocess);
+ }
+
+ inline void enqueue_req(RGWLibRequest* req) {
+ static_cast<RGWLibProcess*>(pprocess)->enqueue_req(req); // async
+ }
+
+ inline int execute_req(RGWLibRequest* req) {
+ return static_cast<RGWLibProcess*>(pprocess)->process_request(req); // !async
+ }
+
+ inline int start_req(RGWLibContinuedReq* req) {
+ return static_cast<RGWLibProcess*>(pprocess)->start_request(req);
+ }
+
+ inline int finish_req(RGWLibContinuedReq* req) {
+ return static_cast<RGWLibProcess*>(pprocess)->finish_request(req);
+ }
+
+ }; /* RGWLibFrontend */
+
+} /* namespace rgw */
+
+#endif /* RGW_LIB_FRONTEND_H */
diff --git a/src/rgw/rgw_loadgen.cc b/src/rgw/rgw_loadgen.cc
new file mode 100644
index 00000000..e13520dd
--- /dev/null
+++ b/src/rgw/rgw_loadgen.cc
@@ -0,0 +1,128 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <algorithm>
+#include <sstream>
+#include <string.h>
+
+#include "rgw_loadgen.h"
+#include "rgw_auth_s3.h"
+
+
+#define dout_subsys ceph_subsys_rgw
+
+void RGWLoadGenRequestEnv::set_date(utime_t& tm)
+{
+ date_str = rgw_to_asctime(tm);
+}
+
+int RGWLoadGenRequestEnv::sign(RGWAccessKey& access_key)
+{
+ meta_map_t meta_map;
+ map<string, string> sub_resources;
+
+ string canonical_header;
+ string digest;
+
+ rgw_create_s3_canonical_header(request_method.c_str(),
+ nullptr, /* const char *content_md5 */
+ content_type.c_str(),
+ date_str.c_str(),
+ meta_map,
+ meta_map_t{},
+ uri.c_str(),
+ sub_resources,
+ canonical_header);
+
+ headers["HTTP_DATE"] = date_str;
+ try {
+ /* FIXME(rzarzynski): kill the dependency on g_ceph_context. */
+ const auto signature = static_cast<std::string>(
+ rgw::auth::s3::get_v2_signature(g_ceph_context, canonical_header,
+ access_key.key));
+ headers["HTTP_AUTHORIZATION"] = \
+ std::string("AWS ") + access_key.id + ":" + signature;
+ } catch (int ret) {
+ return ret;
+ }
+
+ return 0;
+}
+
+size_t RGWLoadGenIO::write_data(const char* const buf,
+ const size_t len)
+{
+ return len;
+}
+
+size_t RGWLoadGenIO::read_data(char* const buf, const size_t len)
+{
+ const size_t read_len = std::min(left_to_read,
+ static_cast<uint64_t>(len));
+ left_to_read -= read_len;
+ return read_len;
+}
+
+void RGWLoadGenIO::flush()
+{
+}
+
+size_t RGWLoadGenIO::complete_request()
+{
+ return 0;
+}
+
+int RGWLoadGenIO::init_env(CephContext *cct)
+{
+ env.init(cct);
+
+ left_to_read = req->content_length;
+
+ char buf[32];
+ snprintf(buf, sizeof(buf), "%lld", (long long)req->content_length);
+ env.set("CONTENT_LENGTH", buf);
+
+ env.set("CONTENT_TYPE", req->content_type.c_str());
+ env.set("HTTP_DATE", req->date_str.c_str());
+
+ for (map<string, string>::iterator iter = req->headers.begin(); iter != req->headers.end(); ++iter) {
+ env.set(iter->first.c_str(), iter->second.c_str());
+ }
+
+ env.set("REQUEST_METHOD", req->request_method.c_str());
+ env.set("REQUEST_URI", req->uri.c_str());
+ env.set("QUERY_STRING", req->query_string.c_str());
+ env.set("SCRIPT_URI", req->uri.c_str());
+
+ char port_buf[16];
+ snprintf(port_buf, sizeof(port_buf), "%d", req->port);
+ env.set("SERVER_PORT", port_buf);
+ return 0;
+}
+
+size_t RGWLoadGenIO::send_status(const int status,
+ const char* const status_name)
+{
+ return 0;
+}
+
+size_t RGWLoadGenIO::send_100_continue()
+{
+ return 0;
+}
+
+size_t RGWLoadGenIO::send_header(const boost::string_ref& name,
+ const boost::string_ref& value)
+{
+ return 0;
+}
+
+size_t RGWLoadGenIO::complete_header()
+{
+ return 0;
+}
+
+size_t RGWLoadGenIO::send_content_length(const uint64_t len)
+{
+ return 0;
+}
diff --git a/src/rgw/rgw_loadgen.h b/src/rgw/rgw_loadgen.h
new file mode 100644
index 00000000..72aace76
--- /dev/null
+++ b/src/rgw/rgw_loadgen.h
@@ -0,0 +1,75 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_LOADGEN_H
+#define CEPH_RGW_LOADGEN_H
+
+#include <map>
+#include <string>
+
+#include "rgw_client_io.h"
+
+
+struct RGWLoadGenRequestEnv {
+ int port;
+ uint64_t content_length;
+ std::string content_type;
+ std::string request_method;
+ std::string uri;
+ std::string query_string;
+ std::string date_str;
+
+ std::map<std::string, std::string> headers;
+
+ RGWLoadGenRequestEnv()
+ : port(0),
+ content_length(0) {
+ }
+
+ void set_date(utime_t& tm);
+ int sign(RGWAccessKey& access_key);
+};
+
+/* XXX does RGWLoadGenIO actually want to perform stream/HTTP I/O,
+ * or (e.g) are these NOOPs? */
+class RGWLoadGenIO : public rgw::io::RestfulClient
+{
+ uint64_t left_to_read;
+ RGWLoadGenRequestEnv* req;
+ RGWEnv env;
+
+ int init_env(CephContext *cct) override;
+ size_t read_data(char *buf, size_t len);
+ size_t write_data(const char *buf, size_t len);
+
+public:
+ explicit RGWLoadGenIO(RGWLoadGenRequestEnv* const req)
+ : left_to_read(0),
+ req(req) {
+ }
+
+ size_t send_status(int status, const char *status_name) override;
+ size_t send_100_continue() override;
+ size_t send_header(const boost::string_ref& name,
+ const boost::string_ref& value) override;
+ size_t complete_header() override;
+ size_t send_content_length(uint64_t len) override;
+
+ size_t recv_body(char* buf, size_t max) override {
+ return read_data(buf, max);
+ }
+
+ size_t send_body(const char* buf, size_t len) override {
+ return write_data(buf, len);
+ }
+
+ void flush() override;
+
+ RGWEnv& get_env() noexcept override {
+ return env;
+ }
+
+ size_t complete_request() override;
+};
+
+#endif
diff --git a/src/rgw/rgw_loadgen_process.cc b/src/rgw/rgw_loadgen_process.cc
new file mode 100644
index 00000000..677599f0
--- /dev/null
+++ b/src/rgw/rgw_loadgen_process.cc
@@ -0,0 +1,149 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/errno.h"
+#include "common/Throttle.h"
+#include "common/WorkQueue.h"
+
+#include "rgw_rados.h"
+#include "rgw_rest.h"
+#include "rgw_frontend.h"
+#include "rgw_request.h"
+#include "rgw_process.h"
+#include "rgw_loadgen.h"
+#include "rgw_client_io.h"
+
+#include <atomic>
+
+#define dout_subsys ceph_subsys_rgw
+
+extern void signal_shutdown();
+
+void RGWLoadGenProcess::checkpoint()
+{
+ m_tp.drain(&req_wq);
+}
+
+void RGWLoadGenProcess::run()
+{
+ m_tp.start(); /* start thread pool */
+
+ int i;
+
+ int num_objs;
+
+ conf->get_val("num_objs", 1000, &num_objs);
+
+ int num_buckets;
+ conf->get_val("num_buckets", 1, &num_buckets);
+
+ vector<string> buckets(num_buckets);
+
+ std::atomic<bool> failed = { false };
+
+ for (i = 0; i < num_buckets; i++) {
+ buckets[i] = "/loadgen";
+ string& bucket = buckets[i];
+ append_rand_alpha(cct, bucket, bucket, 16);
+
+ /* first create a bucket */
+ gen_request("PUT", bucket, 0, &failed);
+ checkpoint();
+ }
+
+ string *objs = new string[num_objs];
+
+ if (failed) {
+ derr << "ERROR: bucket creation failed" << dendl;
+ goto done;
+ }
+
+ for (i = 0; i < num_objs; i++) {
+ char buf[16 + 1];
+ gen_rand_alphanumeric(cct, buf, sizeof(buf));
+ buf[16] = '\0';
+ objs[i] = buckets[i % num_buckets] + "/" + buf;
+ }
+
+ for (i = 0; i < num_objs; i++) {
+ gen_request("PUT", objs[i], 4096, &failed);
+ }
+
+ checkpoint();
+
+ if (failed) {
+ derr << "ERROR: bucket creation failed" << dendl;
+ goto done;
+ }
+
+ for (i = 0; i < num_objs; i++) {
+ gen_request("GET", objs[i], 4096, NULL);
+ }
+
+ checkpoint();
+
+ for (i = 0; i < num_objs; i++) {
+ gen_request("DELETE", objs[i], 0, NULL);
+ }
+
+ checkpoint();
+
+ for (i = 0; i < num_buckets; i++) {
+ gen_request("DELETE", buckets[i], 0, NULL);
+ }
+
+done:
+ checkpoint();
+
+ m_tp.stop();
+
+ delete[] objs;
+
+ signal_shutdown();
+} /* RGWLoadGenProcess::run() */
+
+void RGWLoadGenProcess::gen_request(const string& method,
+ const string& resource,
+ int content_length, std::atomic<bool>* fail_flag)
+{
+ RGWLoadGenRequest* req =
+ new RGWLoadGenRequest(store->get_new_req_id(), method, resource,
+ content_length, fail_flag);
+ dout(10) << "allocated request req=" << hex << req << dec << dendl;
+ req_throttle.get(1);
+ req_wq.queue(req);
+} /* RGWLoadGenProcess::gen_request */
+
+void RGWLoadGenProcess::handle_request(RGWRequest* r)
+{
+ RGWLoadGenRequest* req = static_cast<RGWLoadGenRequest*>(r);
+
+ RGWLoadGenRequestEnv env;
+
+ utime_t tm = ceph_clock_now();
+
+ env.port = 80;
+ env.content_length = req->content_length;
+ env.content_type = "binary/octet-stream";
+ env.request_method = req->method;
+ env.uri = req->resource;
+ env.set_date(tm);
+ env.sign(access_key);
+
+ RGWLoadGenIO real_client_io(&env);
+ RGWRestfulIO client_io(cct, &real_client_io);
+
+ int ret = process_request(store, rest, req, uri_prefix,
+ *auth_registry, &client_io, olog,
+ null_yield, nullptr);
+ if (ret < 0) {
+ /* we don't really care about return code */
+ dout(20) << "process_request() returned " << ret << dendl;
+
+ if (req->fail_flag) {
+ req->fail_flag++;
+ }
+ }
+
+ delete req;
+} /* RGWLoadGenProcess::handle_request */
diff --git a/src/rgw/rgw_log.cc b/src/rgw/rgw_log.cc
new file mode 100644
index 00000000..f6722f1b
--- /dev/null
+++ b/src/rgw/rgw_log.cc
@@ -0,0 +1,467 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/Clock.h"
+#include "common/Timer.h"
+#include "common/utf8.h"
+#include "common/OutputDataSocket.h"
+#include "common/Formatter.h"
+
+#include "rgw_bucket.h"
+#include "rgw_log.h"
+#include "rgw_acl.h"
+#include "rgw_rados.h"
+#include "rgw_client_io.h"
+#include "rgw_rest.h"
+#include "rgw_zone.h"
+
+#include "services/svc_zone.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+static void set_param_str(struct req_state *s, const char *name, string& str)
+{
+ const char *p = s->info.env->get(name);
+ if (p)
+ str = p;
+}
+
+string render_log_object_name(const string& format,
+ struct tm *dt, string& bucket_id,
+ const string& bucket_name)
+{
+ string o;
+ for (unsigned i=0; i<format.size(); i++) {
+ if (format[i] == '%' && i+1 < format.size()) {
+ i++;
+ char buf[32];
+ switch (format[i]) {
+ case '%':
+ strcpy(buf, "%");
+ break;
+ case 'Y':
+ sprintf(buf, "%.4d", dt->tm_year + 1900);
+ break;
+ case 'y':
+ sprintf(buf, "%.2d", dt->tm_year % 100);
+ break;
+ case 'm':
+ sprintf(buf, "%.2d", dt->tm_mon + 1);
+ break;
+ case 'd':
+ sprintf(buf, "%.2d", dt->tm_mday);
+ break;
+ case 'H':
+ sprintf(buf, "%.2d", dt->tm_hour);
+ break;
+ case 'I':
+ sprintf(buf, "%.2d", (dt->tm_hour % 12) + 1);
+ break;
+ case 'k':
+ sprintf(buf, "%d", dt->tm_hour);
+ break;
+ case 'l':
+ sprintf(buf, "%d", (dt->tm_hour % 12) + 1);
+ break;
+ case 'M':
+ sprintf(buf, "%.2d", dt->tm_min);
+ break;
+
+ case 'i':
+ o += bucket_id;
+ continue;
+ case 'n':
+ o += bucket_name;
+ continue;
+ default:
+ // unknown code
+ sprintf(buf, "%%%c", format[i]);
+ break;
+ }
+ o += buf;
+ continue;
+ }
+ o += format[i];
+ }
+ return o;
+}
+
+/* usage logger */
+class UsageLogger {
+ CephContext *cct;
+ RGWRados *store;
+ map<rgw_user_bucket, RGWUsageBatch> usage_map;
+ Mutex lock;
+ int32_t num_entries;
+ Mutex timer_lock;
+ SafeTimer timer;
+ utime_t round_timestamp;
+
+ class C_UsageLogTimeout : public Context {
+ UsageLogger *logger;
+ public:
+ explicit C_UsageLogTimeout(UsageLogger *_l) : logger(_l) {}
+ void finish(int r) override {
+ logger->flush();
+ logger->set_timer();
+ }
+ };
+
+ void set_timer() {
+ timer.add_event_after(cct->_conf->rgw_usage_log_tick_interval, new C_UsageLogTimeout(this));
+ }
+public:
+
+ UsageLogger(CephContext *_cct, RGWRados *_store) : cct(_cct), store(_store), lock("UsageLogger"), num_entries(0), timer_lock("UsageLogger::timer_lock"), timer(cct, timer_lock) {
+ timer.init();
+ Mutex::Locker l(timer_lock);
+ set_timer();
+ utime_t ts = ceph_clock_now();
+ recalc_round_timestamp(ts);
+ }
+
+ ~UsageLogger() {
+ Mutex::Locker l(timer_lock);
+ flush();
+ timer.cancel_all_events();
+ timer.shutdown();
+ }
+
+ void recalc_round_timestamp(utime_t& ts) {
+ round_timestamp = ts.round_to_hour();
+ }
+
+ void insert_user(utime_t& timestamp, const rgw_user& user, rgw_usage_log_entry& entry) {
+ lock.Lock();
+ if (timestamp.sec() > round_timestamp + 3600)
+ recalc_round_timestamp(timestamp);
+ entry.epoch = round_timestamp.sec();
+ bool account;
+ string u = user.to_str();
+ rgw_user_bucket ub(u, entry.bucket);
+ real_time rt = round_timestamp.to_real_time();
+ usage_map[ub].insert(rt, entry, &account);
+ if (account)
+ num_entries++;
+ bool need_flush = (num_entries > cct->_conf->rgw_usage_log_flush_threshold);
+ lock.Unlock();
+ if (need_flush) {
+ Mutex::Locker l(timer_lock);
+ flush();
+ }
+ }
+
+ void insert(utime_t& timestamp, rgw_usage_log_entry& entry) {
+ if (entry.payer.empty()) {
+ insert_user(timestamp, entry.owner, entry);
+ } else {
+ insert_user(timestamp, entry.payer, entry);
+ }
+ }
+
+ void flush() {
+ map<rgw_user_bucket, RGWUsageBatch> old_map;
+ lock.Lock();
+ old_map.swap(usage_map);
+ num_entries = 0;
+ lock.Unlock();
+
+ store->log_usage(old_map);
+ }
+};
+
+static UsageLogger *usage_logger = NULL;
+
+void rgw_log_usage_init(CephContext *cct, RGWRados *store)
+{
+ usage_logger = new UsageLogger(cct, store);
+}
+
+void rgw_log_usage_finalize()
+{
+ delete usage_logger;
+ usage_logger = NULL;
+}
+
+static void log_usage(struct req_state *s, const string& op_name)
+{
+ if (s->system_request) /* don't log system user operations */
+ return;
+
+ if (!usage_logger)
+ return;
+
+ rgw_user user;
+ rgw_user payer;
+ string bucket_name;
+
+ bucket_name = s->bucket_name;
+
+ if (!bucket_name.empty()) {
+ user = s->bucket_owner.get_id();
+ if (s->bucket_info.requester_pays) {
+ payer = s->user->user_id;
+ }
+ } else {
+ user = s->user->user_id;
+ }
+
+ bool error = s->err.is_err();
+ if (error && s->err.http_ret == 404) {
+ bucket_name = "-"; /* bucket not found, use the invalid '-' as bucket name */
+ }
+
+ string u = user.to_str();
+ string p = payer.to_str();
+ rgw_usage_log_entry entry(u, p, bucket_name);
+
+ uint64_t bytes_sent = ACCOUNTING_IO(s)->get_bytes_sent();
+ uint64_t bytes_received = ACCOUNTING_IO(s)->get_bytes_received();
+
+ rgw_usage_data data(bytes_sent, bytes_received);
+
+ data.ops = 1;
+ if (!s->is_err())
+ data.successful_ops = 1;
+
+ ldout(s->cct, 30) << "log_usage: bucket_name=" << bucket_name
+ << " tenant=" << s->bucket_tenant
+ << ", bytes_sent=" << bytes_sent << ", bytes_received="
+ << bytes_received << ", success=" << data.successful_ops << dendl;
+
+ entry.add(op_name, data);
+
+ utime_t ts = ceph_clock_now();
+
+ usage_logger->insert(ts, entry);
+}
+
+void rgw_format_ops_log_entry(struct rgw_log_entry& entry, Formatter *formatter)
+{
+ formatter->open_object_section("log_entry");
+ formatter->dump_string("bucket", entry.bucket);
+ {
+ auto t = utime_t{entry.time};
+ t.gmtime(formatter->dump_stream("time")); // UTC
+ t.localtime(formatter->dump_stream("time_local"));
+ }
+ formatter->dump_string("remote_addr", entry.remote_addr);
+ string obj_owner = entry.object_owner.to_str();
+ if (obj_owner.length())
+ formatter->dump_string("object_owner", obj_owner);
+ formatter->dump_string("user", entry.user);
+ formatter->dump_string("operation", entry.op);
+ formatter->dump_string("uri", entry.uri);
+ formatter->dump_string("http_status", entry.http_status);
+ formatter->dump_string("error_code", entry.error_code);
+ formatter->dump_int("bytes_sent", entry.bytes_sent);
+ formatter->dump_int("bytes_received", entry.bytes_received);
+ formatter->dump_int("object_size", entry.obj_size);
+ {
+ using namespace std::chrono;
+ uint64_t total_time = duration_cast<milliseconds>(entry.total_time).count();
+ formatter->dump_int("total_time", total_time);
+ }
+ formatter->dump_string("user_agent", entry.user_agent);
+ formatter->dump_string("referrer", entry.referrer);
+ if (entry.x_headers.size() > 0) {
+ formatter->open_array_section("http_x_headers");
+ for (const auto& iter: entry.x_headers) {
+ formatter->open_object_section(iter.first.c_str());
+ formatter->dump_string(iter.first.c_str(), iter.second);
+ formatter->close_section();
+ }
+ formatter->close_section();
+ }
+ formatter->close_section();
+}
+
+void OpsLogSocket::formatter_to_bl(bufferlist& bl)
+{
+ stringstream ss;
+ formatter->flush(ss);
+ const string& s = ss.str();
+
+ bl.append(s);
+}
+
+void OpsLogSocket::init_connection(bufferlist& bl)
+{
+ bl.append("[");
+}
+
+OpsLogSocket::OpsLogSocket(CephContext *cct, uint64_t _backlog) : OutputDataSocket(cct, _backlog), lock("OpsLogSocket")
+{
+ formatter = new JSONFormatter;
+ delim.append(",\n");
+}
+
+OpsLogSocket::~OpsLogSocket()
+{
+ delete formatter;
+}
+
+void OpsLogSocket::log(struct rgw_log_entry& entry)
+{
+ bufferlist bl;
+
+ lock.Lock();
+ rgw_format_ops_log_entry(entry, formatter);
+ formatter_to_bl(bl);
+ lock.Unlock();
+
+ append_output(bl);
+}
+
+int rgw_log_op(RGWRados *store, RGWREST* const rest, struct req_state *s,
+ const string& op_name, OpsLogSocket *olog)
+{
+ struct rgw_log_entry entry;
+ string bucket_id;
+
+ if (s->enable_usage_log)
+ log_usage(s, op_name);
+
+ if (!s->enable_ops_log)
+ return 0;
+
+ if (s->bucket_name.empty()) {
+ ldout(s->cct, 5) << "nothing to log for operation" << dendl;
+ return -EINVAL;
+ }
+ if (s->err.ret == -ERR_NO_SUCH_BUCKET) {
+ if (!s->cct->_conf->rgw_log_nonexistent_bucket) {
+ ldout(s->cct, 5) << "bucket " << s->bucket << " doesn't exist, not logging" << dendl;
+ return 0;
+ }
+ bucket_id = "";
+ } else {
+ bucket_id = s->bucket.bucket_id;
+ }
+ rgw_make_bucket_entry_name(s->bucket_tenant, s->bucket_name, entry.bucket);
+
+ if (check_utf8(entry.bucket.c_str(), entry.bucket.size()) != 0) {
+ ldout(s->cct, 5) << "not logging op on bucket with non-utf8 name" << dendl;
+ return 0;
+ }
+
+ if (!s->object.empty()) {
+ entry.obj = s->object;
+ } else {
+ entry.obj = rgw_obj_key("-");
+ }
+
+ entry.obj_size = s->obj_size;
+
+ if (s->cct->_conf->rgw_remote_addr_param.length())
+ set_param_str(s, s->cct->_conf->rgw_remote_addr_param.c_str(),
+ entry.remote_addr);
+ else
+ set_param_str(s, "REMOTE_ADDR", entry.remote_addr);
+ set_param_str(s, "HTTP_USER_AGENT", entry.user_agent);
+ // legacy apps are still using misspelling referer, such as curl -e option
+ if (s->info.env->exists("HTTP_REFERRER"))
+ set_param_str(s, "HTTP_REFERRER", entry.referrer);
+ else
+ set_param_str(s, "HTTP_REFERER", entry.referrer);
+
+ std::string uri;
+ if (s->info.env->exists("REQUEST_METHOD")) {
+ uri.append(s->info.env->get("REQUEST_METHOD"));
+ uri.append(" ");
+ }
+
+ if (s->info.env->exists("REQUEST_URI")) {
+ uri.append(s->info.env->get("REQUEST_URI"));
+ }
+
+ if (s->info.env->exists("QUERY_STRING")) {
+ const char* qs = s->info.env->get("QUERY_STRING");
+ if(qs && (*qs != '\0')) {
+ uri.append("?");
+ uri.append(qs);
+ }
+ }
+
+ if (s->info.env->exists("HTTP_VERSION")) {
+ uri.append(" ");
+ uri.append("HTTP/");
+ uri.append(s->info.env->get("HTTP_VERSION"));
+ }
+
+ entry.uri = std::move(uri);
+
+ entry.op = op_name;
+
+ /* custom header logging */
+ if (rest) {
+ if (rest->log_x_headers()) {
+ for (const auto& iter : s->info.env->get_map()) {
+ if (rest->log_x_header(iter.first)) {
+ entry.x_headers.insert(
+ rgw_log_entry::headers_map::value_type(iter.first, iter.second));
+ }
+ }
+ }
+ }
+
+ entry.user = s->user->user_id.to_str();
+ if (s->object_acl)
+ entry.object_owner = s->object_acl->get_owner().get_id();
+ entry.bucket_owner = s->bucket_owner.get_id();
+
+ uint64_t bytes_sent = ACCOUNTING_IO(s)->get_bytes_sent();
+ uint64_t bytes_received = ACCOUNTING_IO(s)->get_bytes_received();
+
+ entry.time = s->time;
+ entry.total_time = s->time_elapsed();
+ entry.bytes_sent = bytes_sent;
+ entry.bytes_received = bytes_received;
+ if (s->err.http_ret) {
+ char buf[16];
+ snprintf(buf, sizeof(buf), "%d", s->err.http_ret);
+ entry.http_status = buf;
+ } else
+ entry.http_status = "200"; // default
+
+ entry.error_code = s->err.err_code;
+ entry.bucket_id = bucket_id;
+
+ bufferlist bl;
+ encode(entry, bl);
+
+ struct tm bdt;
+ time_t t = req_state::Clock::to_time_t(entry.time);
+ if (s->cct->_conf->rgw_log_object_name_utc)
+ gmtime_r(&t, &bdt);
+ else
+ localtime_r(&t, &bdt);
+
+ int ret = 0;
+
+ if (s->cct->_conf->rgw_ops_log_rados) {
+ string oid = render_log_object_name(s->cct->_conf->rgw_log_object_name, &bdt,
+ s->bucket.bucket_id, entry.bucket);
+
+ rgw_raw_obj obj(store->svc.zone->get_zone_params().log_pool, oid);
+
+ ret = store->append_async(obj, bl.length(), bl);
+ if (ret == -ENOENT) {
+ ret = store->create_pool(store->svc.zone->get_zone_params().log_pool);
+ if (ret < 0)
+ goto done;
+ // retry
+ ret = store->append_async(obj, bl.length(), bl);
+ }
+ }
+
+ if (olog) {
+ olog->log(entry);
+ }
+done:
+ if (ret < 0)
+ ldout(s->cct, 0) << "ERROR: failed to log entry" << dendl;
+
+ return ret;
+}
+
diff --git a/src/rgw/rgw_log.h b/src/rgw/rgw_log.h
new file mode 100644
index 00000000..9614624d
--- /dev/null
+++ b/src/rgw/rgw_log.h
@@ -0,0 +1,144 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_LOG_H
+#define CEPH_RGW_LOG_H
+#include <boost/container/flat_map.hpp>
+#include "rgw_common.h"
+#include "common/Formatter.h"
+#include "common/OutputDataSocket.h"
+
+class RGWRados;
+
+struct rgw_log_entry {
+
+ using headers_map = boost::container::flat_map<std::string, std::string>;
+ using Clock = req_state::Clock;
+
+ rgw_user object_owner;
+ rgw_user bucket_owner;
+ string bucket;
+ Clock::time_point time;
+ string remote_addr;
+ string user;
+ rgw_obj_key obj;
+ string op;
+ string uri;
+ string http_status;
+ string error_code;
+ uint64_t bytes_sent;
+ uint64_t bytes_received;
+ uint64_t obj_size;
+ Clock::duration total_time;
+ string user_agent;
+ string referrer;
+ string bucket_id;
+ headers_map x_headers;
+
+ void encode(bufferlist &bl) const {
+ ENCODE_START(9, 5, bl);
+ encode(object_owner.id, bl);
+ encode(bucket_owner.id, bl);
+ encode(bucket, bl);
+ encode(time, bl);
+ encode(remote_addr, bl);
+ encode(user, bl);
+ encode(obj.name, bl);
+ encode(op, bl);
+ encode(uri, bl);
+ encode(http_status, bl);
+ encode(error_code, bl);
+ encode(bytes_sent, bl);
+ encode(obj_size, bl);
+ encode(total_time, bl);
+ encode(user_agent, bl);
+ encode(referrer, bl);
+ encode(bytes_received, bl);
+ encode(bucket_id, bl);
+ encode(obj, bl);
+ encode(object_owner, bl);
+ encode(bucket_owner, bl);
+ encode(x_headers, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::const_iterator &p) {
+ DECODE_START_LEGACY_COMPAT_LEN(8, 5, 5, p);
+ decode(object_owner.id, p);
+ if (struct_v > 3)
+ decode(bucket_owner.id, p);
+ decode(bucket, p);
+ decode(time, p);
+ decode(remote_addr, p);
+ decode(user, p);
+ decode(obj.name, p);
+ decode(op, p);
+ decode(uri, p);
+ decode(http_status, p);
+ decode(error_code, p);
+ decode(bytes_sent, p);
+ decode(obj_size, p);
+ decode(total_time, p);
+ decode(user_agent, p);
+ decode(referrer, p);
+ if (struct_v >= 2)
+ decode(bytes_received, p);
+ else
+ bytes_received = 0;
+
+ if (struct_v >= 3) {
+ if (struct_v <= 5) {
+ uint64_t id;
+ decode(id, p);
+ char buf[32];
+ snprintf(buf, sizeof(buf), "%" PRIu64, id);
+ bucket_id = buf;
+ } else {
+ decode(bucket_id, p);
+ }
+ } else {
+ bucket_id = "";
+ }
+ if (struct_v >= 7) {
+ decode(obj, p);
+ }
+ if (struct_v >= 8) {
+ decode(object_owner, p);
+ decode(bucket_owner, p);
+ }
+ if (struct_v >= 9) {
+ decode(x_headers, p);
+ }
+ DECODE_FINISH(p);
+ }
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<rgw_log_entry*>& o);
+};
+WRITE_CLASS_ENCODER(rgw_log_entry)
+
+class OpsLogSocket : public OutputDataSocket {
+ Formatter *formatter;
+ Mutex lock;
+
+ void formatter_to_bl(bufferlist& bl);
+
+protected:
+ void init_connection(bufferlist& bl) override;
+
+public:
+ OpsLogSocket(CephContext *cct, uint64_t _backlog);
+ ~OpsLogSocket() override;
+
+ void log(struct rgw_log_entry& entry);
+};
+
+class RGWREST;
+
+int rgw_log_op(RGWRados *store, RGWREST* const rest, struct req_state *s,
+ const string& op_name, OpsLogSocket *olog);
+void rgw_log_usage_init(CephContext *cct, RGWRados *store);
+void rgw_log_usage_finalize();
+void rgw_format_ops_log_entry(struct rgw_log_entry& entry,
+ Formatter *formatter);
+
+#endif /* CEPH_RGW_LOG_H */
+
diff --git a/src/rgw/rgw_main.cc b/src/rgw/rgw_main.cc
new file mode 100644
index 00000000..c0c43a6e
--- /dev/null
+++ b/src/rgw/rgw_main.cc
@@ -0,0 +1,637 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/ceph_argparse.h"
+#include "global/global_init.h"
+#include "global/signal_handler.h"
+#include "common/config.h"
+#include "common/errno.h"
+#include "common/Timer.h"
+#include "common/safe_io.h"
+#include "common/TracepointProvider.h"
+#include "include/compat.h"
+#include "include/str_list.h"
+#include "include/stringify.h"
+#include "rgw_common.h"
+#include "rgw_rados.h"
+#include "rgw_otp.h"
+#include "rgw_period_pusher.h"
+#include "rgw_realm_reloader.h"
+#include "rgw_rest.h"
+#include "rgw_rest_s3.h"
+#include "rgw_rest_swift.h"
+#include "rgw_rest_admin.h"
+#include "rgw_rest_usage.h"
+#include "rgw_rest_user.h"
+#include "rgw_rest_bucket.h"
+#include "rgw_rest_metadata.h"
+#include "rgw_rest_log.h"
+#include "rgw_rest_config.h"
+#include "rgw_rest_realm.h"
+#include "rgw_rest_sts.h"
+#include "rgw_swift_auth.h"
+#include "rgw_log.h"
+#include "rgw_tools.h"
+#include "rgw_resolve.h"
+#include "rgw_request.h"
+#include "rgw_process.h"
+#include "rgw_frontend.h"
+#include "rgw_http_client_curl.h"
+#include "rgw_perf_counters.h"
+#ifdef WITH_RADOSGW_AMQP_ENDPOINT
+#include "rgw_amqp.h"
+#endif
+#ifdef WITH_RADOSGW_KAFKA_ENDPOINT
+#include "rgw_kafka.h"
+#endif
+#if defined(WITH_RADOSGW_BEAST_FRONTEND)
+#include "rgw_asio_frontend.h"
+#endif /* WITH_RADOSGW_BEAST_FRONTEND */
+
+#include "rgw_dmclock_scheduler_ctx.h"
+
+#include "services/svc_zone.h"
+
+#ifdef HAVE_SYS_PRCTL_H
+#include <sys/prctl.h>
+#endif
+
+#define dout_subsys ceph_subsys_rgw
+
+namespace {
+TracepointProvider::Traits rgw_op_tracepoint_traits("librgw_op_tp.so",
+ "rgw_op_tracing");
+TracepointProvider::Traits rgw_rados_tracepoint_traits("librgw_rados_tp.so",
+ "rgw_rados_tracing");
+}
+
+static sig_t sighandler_alrm;
+
+class RGWProcess;
+
+static int signal_fd[2] = {0, 0};
+
+void signal_shutdown()
+{
+ int val = 0;
+ int ret = write(signal_fd[0], (char *)&val, sizeof(val));
+ if (ret < 0) {
+ derr << "ERROR: " << __func__ << ": write() returned "
+ << cpp_strerror(errno) << dendl;
+ }
+}
+
+static void wait_shutdown()
+{
+ int val;
+ int r = safe_read_exact(signal_fd[1], &val, sizeof(val));
+ if (r < 0) {
+ derr << "safe_read_exact returned with error" << dendl;
+ }
+}
+
+static int signal_fd_init()
+{
+ return socketpair(AF_UNIX, SOCK_STREAM, 0, signal_fd);
+}
+
+static void signal_fd_finalize()
+{
+ close(signal_fd[0]);
+ close(signal_fd[1]);
+}
+
+static void handle_sigterm(int signum)
+{
+ dout(1) << __func__ << dendl;
+#if defined(WITH_RADOSGW_FCGI_FRONTEND)
+ FCGX_ShutdownPending();
+#endif
+
+ // send a signal to make fcgi's accept(2) wake up. unfortunately the
+ // initial signal often isn't sufficient because we race with accept's
+ // check of the flag wet by ShutdownPending() above.
+ if (signum != SIGUSR1) {
+ signal_shutdown();
+
+ // safety net in case we get stuck doing an orderly shutdown.
+ uint64_t secs = g_ceph_context->_conf->rgw_exit_timeout_secs;
+ if (secs)
+ alarm(secs);
+ dout(1) << __func__ << " set alarm for " << secs << dendl;
+ }
+
+}
+
+static void godown_alarm(int signum)
+{
+ _exit(0);
+}
+
+
+class C_InitTimeout : public Context {
+public:
+ C_InitTimeout() {}
+ void finish(int r) override {
+ derr << "Initialization timeout, failed to initialize" << dendl;
+ exit(1);
+ }
+};
+
+static int usage()
+{
+ cout << "usage: radosgw [options...]" << std::endl;
+ cout << "options:\n";
+ cout << " --rgw-region=<region> region in which radosgw runs\n";
+ cout << " --rgw-zone=<zone> zone in which radosgw runs\n";
+ cout << " --rgw-socket-path=<path> specify a unix domain socket path\n";
+ cout << " -m monaddress[:port] connect to specified monitor\n";
+ cout << " --keyring=<path> path to radosgw keyring\n";
+ cout << " --logfile=<logfile> file to log debug output\n";
+ cout << " --debug-rgw=<log-level>/<memory-level> set radosgw debug level\n";
+ generic_server_usage();
+
+ return 0;
+}
+
+static RGWRESTMgr *set_logging(RGWRESTMgr *mgr)
+{
+ mgr->set_logging(true);
+ return mgr;
+}
+
+static RGWRESTMgr *rest_filter(RGWRados *store, int dialect, RGWRESTMgr *orig)
+{
+ RGWSyncModuleInstanceRef sync_module = store->get_sync_module();
+ if (sync_module) {
+ return sync_module->get_rest_filter(dialect, orig);
+ } else {
+ return orig;
+ }
+}
+
+/*
+ * start up the RADOS connection and then handle HTTP messages as they come in
+ */
+int main(int argc, const char **argv)
+{
+ // dout() messages will be sent to stderr, but FCGX wants messages on stdout
+ // Redirect stderr to stdout.
+ TEMP_FAILURE_RETRY(close(STDERR_FILENO));
+ if (TEMP_FAILURE_RETRY(dup2(STDOUT_FILENO, STDERR_FILENO)) < 0) {
+ int err = errno;
+ cout << "failed to redirect stderr to stdout: " << cpp_strerror(err)
+ << std::endl;
+ return ENOSYS;
+ }
+
+ /* alternative default for module */
+ map<string,string> defaults = {
+ { "debug_rgw", "1/5" },
+ { "keyring", "$rgw_data/keyring" },
+ { "objecter_inflight_ops", "24576" }
+ };
+
+ vector<const char*> args;
+ argv_to_vec(argc, argv, args);
+ if (args.empty()) {
+ cerr << argv[0] << ": -h or --help for usage" << std::endl;
+ exit(1);
+ }
+ if (ceph_argparse_need_usage(args)) {
+ usage();
+ exit(0);
+ }
+
+ // First, let's determine which frontends are configured.
+ int flags = CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS;
+ global_pre_init(
+ &defaults, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_DAEMON,
+ flags);
+
+ list<string> frontends;
+ g_conf().early_expand_meta(g_conf()->rgw_frontends, &cerr);
+ get_str_list(g_conf()->rgw_frontends, ",", frontends);
+ multimap<string, RGWFrontendConfig *> fe_map;
+ list<RGWFrontendConfig *> configs;
+ if (frontends.empty()) {
+ frontends.push_back("civetweb");
+ }
+ for (list<string>::iterator iter = frontends.begin(); iter != frontends.end(); ++iter) {
+ string& f = *iter;
+
+ if (f.find("civetweb") != string::npos || f.find("beast") != string::npos) {
+ // If civetweb or beast is configured as a frontend, prevent global_init() from
+ // dropping permissions by setting the appropriate flag.
+ flags |= CINIT_FLAG_DEFER_DROP_PRIVILEGES;
+ if (f.find("port") != string::npos) {
+ // check for the most common ws problems
+ if ((f.find("port=") == string::npos) ||
+ (f.find("port= ") != string::npos)) {
+ derr << "WARNING: radosgw frontend config found unexpected spacing around 'port' "
+ << "(ensure frontend port parameter has the form 'port=80' with no spaces "
+ << "before or after '=')" << dendl;
+ }
+ }
+ }
+
+ RGWFrontendConfig *config = new RGWFrontendConfig(f);
+ int r = config->init();
+ if (r < 0) {
+ delete config;
+ cerr << "ERROR: failed to init config: " << f << std::endl;
+ return EINVAL;
+ }
+
+ configs.push_back(config);
+
+ string framework = config->get_framework();
+ fe_map.insert(pair<string, RGWFrontendConfig*>(framework, config));
+ }
+
+ // Now that we've determined which frontend(s) to use, continue with global
+ // initialization. Passing false as the final argument ensures that
+ // global_pre_init() is not invoked twice.
+ // claim the reference and release it after subsequent destructors have fired
+ auto cct = global_init(&defaults, args, CEPH_ENTITY_TYPE_CLIENT,
+ CODE_ENVIRONMENT_DAEMON,
+ flags, "rgw_data", false);
+
+ // maintain existing region root pool for new multisite objects
+ if (!g_conf()->rgw_region_root_pool.empty()) {
+ const char *root_pool = g_conf()->rgw_region_root_pool.c_str();
+ if (g_conf()->rgw_zonegroup_root_pool.empty()) {
+ g_conf().set_val_or_die("rgw_zonegroup_root_pool", root_pool);
+ }
+ if (g_conf()->rgw_period_root_pool.empty()) {
+ g_conf().set_val_or_die("rgw_period_root_pool", root_pool);
+ }
+ if (g_conf()->rgw_realm_root_pool.empty()) {
+ g_conf().set_val_or_die("rgw_realm_root_pool", root_pool);
+ }
+ }
+
+ // for region -> zonegroup conversion (must happen before common_init_finish())
+ if (!g_conf()->rgw_region.empty() && g_conf()->rgw_zonegroup.empty()) {
+ g_conf().set_val_or_die("rgw_zonegroup", g_conf()->rgw_region.c_str());
+ }
+
+ if (g_conf()->daemonize) {
+ global_init_daemonize(g_ceph_context);
+ }
+ Mutex mutex("main");
+ SafeTimer init_timer(g_ceph_context, mutex);
+ init_timer.init();
+ mutex.Lock();
+ init_timer.add_event_after(g_conf()->rgw_init_timeout, new C_InitTimeout);
+ mutex.Unlock();
+
+ common_init_finish(g_ceph_context);
+
+ init_async_signal_handler();
+ register_async_signal_handler(SIGHUP, sighup_handler);
+
+ TracepointProvider::initialize<rgw_rados_tracepoint_traits>(g_ceph_context);
+ TracepointProvider::initialize<rgw_op_tracepoint_traits>(g_ceph_context);
+
+ int r = rgw_tools_init(g_ceph_context);
+ if (r < 0) {
+ derr << "ERROR: unable to initialize rgw tools" << dendl;
+ return -r;
+ }
+
+ rgw_init_resolver();
+ rgw::curl::setup_curl(fe_map);
+ rgw_http_client_init(g_ceph_context);
+
+#if defined(WITH_RADOSGW_FCGI_FRONTEND)
+ FCGX_Init();
+#endif
+
+ RGWRados *store =
+ RGWStoreManager::get_storage(g_ceph_context,
+ g_conf()->rgw_enable_gc_threads,
+ g_conf()->rgw_enable_lc_threads,
+ g_conf()->rgw_enable_quota_threads,
+ g_conf()->rgw_run_sync_thread,
+ g_conf().get_val<bool>("rgw_dynamic_resharding"),
+ g_conf()->rgw_cache_enabled);
+ if (!store) {
+ mutex.Lock();
+ init_timer.cancel_all_events();
+ init_timer.shutdown();
+ mutex.Unlock();
+
+ derr << "Couldn't init storage provider (RADOS)" << dendl;
+ return EIO;
+ }
+ r = rgw_perf_start(g_ceph_context);
+ if (r < 0) {
+ derr << "ERROR: failed starting rgw perf" << dendl;
+ return -r;
+ }
+
+ rgw_rest_init(g_ceph_context, store, store->svc.zone->get_zonegroup());
+
+ mutex.Lock();
+ init_timer.cancel_all_events();
+ init_timer.shutdown();
+ mutex.Unlock();
+
+ rgw_user_init(store);
+ rgw_bucket_init(store->meta_mgr);
+ rgw_otp_init(store);
+ rgw_log_usage_init(g_ceph_context, store);
+
+ RGWREST rest;
+
+ list<string> apis;
+
+ get_str_list(g_conf()->rgw_enable_apis, apis);
+
+ map<string, bool> apis_map;
+ for (list<string>::iterator li = apis.begin(); li != apis.end(); ++li) {
+ apis_map[*li] = true;
+ }
+
+ /* warn about insecure keystone secret config options */
+ if (!(g_ceph_context->_conf->rgw_keystone_admin_token.empty() ||
+ g_ceph_context->_conf->rgw_keystone_admin_password.empty())) {
+ dout(0) << "WARNING: rgw_keystone_admin_token and rgw_keystone_admin_password should be avoided as they can expose secrets. Prefer the new rgw_keystone_admin_token_path and rgw_keystone_admin_password_path options, which read their secrets from files." << dendl;
+ }
+
+ // S3 website mode is a specialization of S3
+ const bool s3website_enabled = apis_map.count("s3website") > 0;
+ const bool sts_enabled = apis_map.count("sts") > 0;
+ const bool iam_enabled = apis_map.count("iam") > 0;
+ const bool pubsub_enabled = apis_map.count("pubsub") > 0;
+ // Swift API entrypoint could placed in the root instead of S3
+ const bool swift_at_root = g_conf()->rgw_swift_url_prefix == "/";
+ if (apis_map.count("s3") > 0 || s3website_enabled) {
+ if (! swift_at_root) {
+ rest.register_default_mgr(set_logging(rest_filter(store, RGW_REST_S3,
+ new RGWRESTMgr_S3(s3website_enabled, sts_enabled, iam_enabled, pubsub_enabled))));
+ } else {
+ derr << "Cannot have the S3 or S3 Website enabled together with "
+ << "Swift API placed in the root of hierarchy" << dendl;
+ return EINVAL;
+ }
+ }
+
+ if (pubsub_enabled) {
+#ifdef WITH_RADOSGW_AMQP_ENDPOINT
+ if (!rgw::amqp::init(cct.get())) {
+ dout(1) << "ERROR: failed to initialize AMQP manager" << dendl;
+ }
+#endif
+#ifdef WITH_RADOSGW_KAFKA_ENDPOINT
+ if (!rgw::kafka::init(cct.get())) {
+ dout(1) << "ERROR: failed to initialize Kafka manager" << dendl;
+ }
+#endif
+ }
+
+ if (apis_map.count("swift") > 0) {
+ RGWRESTMgr_SWIFT* const swift_resource = new RGWRESTMgr_SWIFT;
+
+ if (! g_conf()->rgw_cross_domain_policy.empty()) {
+ swift_resource->register_resource("crossdomain.xml",
+ set_logging(new RGWRESTMgr_SWIFT_CrossDomain));
+ }
+
+ swift_resource->register_resource("healthcheck",
+ set_logging(new RGWRESTMgr_SWIFT_HealthCheck));
+
+ swift_resource->register_resource("info",
+ set_logging(new RGWRESTMgr_SWIFT_Info));
+
+ if (! swift_at_root) {
+ rest.register_resource(g_conf()->rgw_swift_url_prefix,
+ set_logging(rest_filter(store, RGW_REST_SWIFT,
+ swift_resource)));
+ } else {
+ if (store->svc.zone->get_zonegroup().zones.size() > 1) {
+ derr << "Placing Swift API in the root of URL hierarchy while running"
+ << " multi-site configuration requires another instance of RadosGW"
+ << " with S3 API enabled!" << dendl;
+ }
+
+ rest.register_default_mgr(set_logging(swift_resource));
+ }
+ }
+
+ if (apis_map.count("swift_auth") > 0) {
+ rest.register_resource(g_conf()->rgw_swift_auth_entry,
+ set_logging(new RGWRESTMgr_SWIFT_Auth));
+ }
+
+ if (apis_map.count("admin") > 0) {
+ RGWRESTMgr_Admin *admin_resource = new RGWRESTMgr_Admin;
+ admin_resource->register_resource("usage", new RGWRESTMgr_Usage);
+ admin_resource->register_resource("user", new RGWRESTMgr_User);
+ admin_resource->register_resource("bucket", new RGWRESTMgr_Bucket);
+
+ /*Registering resource for /admin/metadata */
+ admin_resource->register_resource("metadata", new RGWRESTMgr_Metadata);
+ admin_resource->register_resource("log", new RGWRESTMgr_Log);
+ admin_resource->register_resource("config", new RGWRESTMgr_Config);
+ admin_resource->register_resource("realm", new RGWRESTMgr_Realm);
+ rest.register_resource(g_conf()->rgw_admin_entry, admin_resource);
+ }
+
+ /* Initialize the registry of auth strategies which will coordinate
+ * the dynamic reconfiguration. */
+ rgw::auth::ImplicitTenants implicit_tenant_context{g_conf()};
+ g_conf().add_observer(&implicit_tenant_context);
+ auto auth_registry = \
+ rgw::auth::StrategyRegistry::create(g_ceph_context, implicit_tenant_context, store);
+
+ /* Header custom behavior */
+ rest.register_x_headers(g_conf()->rgw_log_http_headers);
+
+ if (cct->_conf.get_val<std::string>("rgw_scheduler_type") == "dmclock" &&
+ !cct->check_experimental_feature_enabled("dmclock")){
+ derr << "dmclock scheduler type is experimental and needs to be"
+ << "set in the option enable experimental data corrupting features"
+ << dendl;
+ return EINVAL;
+ }
+
+ rgw::dmclock::SchedulerCtx sched_ctx{cct.get()};
+
+ OpsLogSocket *olog = NULL;
+
+ if (!g_conf()->rgw_ops_log_socket_path.empty()) {
+ olog = new OpsLogSocket(g_ceph_context, g_conf()->rgw_ops_log_data_backlog);
+ olog->init(g_conf()->rgw_ops_log_socket_path);
+ }
+
+ r = signal_fd_init();
+ if (r < 0) {
+ derr << "ERROR: unable to initialize signal fds" << dendl;
+ exit(1);
+ }
+
+ register_async_signal_handler(SIGTERM, handle_sigterm);
+ register_async_signal_handler(SIGINT, handle_sigterm);
+ register_async_signal_handler(SIGUSR1, handle_sigterm);
+ sighandler_alrm = signal(SIGALRM, godown_alarm);
+
+ map<string, string> service_map_meta;
+ service_map_meta["pid"] = stringify(getpid());
+
+ list<RGWFrontend *> fes;
+
+ int fe_count = 0;
+
+ for (multimap<string, RGWFrontendConfig *>::iterator fiter = fe_map.begin();
+ fiter != fe_map.end(); ++fiter, ++fe_count) {
+ RGWFrontendConfig *config = fiter->second;
+ string framework = config->get_framework();
+ RGWFrontend *fe = NULL;
+
+ if (framework == "civetweb" || framework == "mongoose") {
+ framework = "civetweb";
+ std::string uri_prefix;
+ config->get_val("prefix", "", &uri_prefix);
+
+ RGWProcessEnv env = { store, &rest, olog, 0, uri_prefix, auth_registry };
+ //TODO: move all of scheduler initializations to frontends?
+
+ fe = new RGWCivetWebFrontend(env, config, sched_ctx);
+ }
+ else if (framework == "loadgen") {
+ int port;
+ config->get_val("port", 80, &port);
+ std::string uri_prefix;
+ config->get_val("prefix", "", &uri_prefix);
+
+ RGWProcessEnv env = { store, &rest, olog, port, uri_prefix, auth_registry };
+
+ fe = new RGWLoadGenFrontend(env, config);
+ }
+#if defined(WITH_RADOSGW_BEAST_FRONTEND)
+ else if (framework == "beast") {
+ int port;
+ config->get_val("port", 80, &port);
+ std::string uri_prefix;
+ config->get_val("prefix", "", &uri_prefix);
+ RGWProcessEnv env{ store, &rest, olog, port, uri_prefix, auth_registry };
+ fe = new RGWAsioFrontend(env, config, sched_ctx);
+ }
+#endif /* WITH_RADOSGW_BEAST_FRONTEND */
+#if defined(WITH_RADOSGW_FCGI_FRONTEND)
+ else if (framework == "fastcgi" || framework == "fcgi") {
+ framework = "fastcgi";
+ std::string uri_prefix;
+ config->get_val("prefix", "", &uri_prefix);
+ RGWProcessEnv fcgi_pe = { store, &rest, olog, 0, uri_prefix, auth_registry };
+
+ fe = new RGWFCGXFrontend(fcgi_pe, config);
+ }
+#endif /* WITH_RADOSGW_FCGI_FRONTEND */
+
+ service_map_meta["frontend_type#" + stringify(fe_count)] = framework;
+ service_map_meta["frontend_config#" + stringify(fe_count)] = config->get_config();
+
+ if (fe == NULL) {
+ dout(0) << "WARNING: skipping unknown framework: " << framework << dendl;
+ continue;
+ }
+
+ dout(0) << "starting handler: " << fiter->first << dendl;
+ int r = fe->init();
+ if (r < 0) {
+ derr << "ERROR: failed initializing frontend" << dendl;
+ return -r;
+ }
+ r = fe->run();
+ if (r < 0) {
+ derr << "ERROR: failed run" << dendl;
+ return -r;
+ }
+
+ fes.push_back(fe);
+ }
+
+ r = store->register_to_service_map("rgw", service_map_meta);
+ if (r < 0) {
+ derr << "ERROR: failed to register to service map: " << cpp_strerror(-r) << dendl;
+
+ /* ignore error */
+ }
+
+
+ // add a watcher to respond to realm configuration changes
+ RGWPeriodPusher pusher(store);
+ RGWFrontendPauser pauser(fes, implicit_tenant_context, &pusher);
+ auto reloader = std::make_unique<RGWRealmReloader>(store,
+ service_map_meta, &pauser);
+
+ RGWRealmWatcher realm_watcher(g_ceph_context, store->svc.zone->get_realm());
+ realm_watcher.add_watcher(RGWRealmNotify::Reload, *reloader);
+ realm_watcher.add_watcher(RGWRealmNotify::ZonesNeedPeriod, pusher);
+
+#if defined(HAVE_SYS_PRCTL_H)
+ if (prctl(PR_SET_DUMPABLE, 1) == -1) {
+ cerr << "warning: unable to set dumpable flag: " << cpp_strerror(errno) << std::endl;
+ }
+#endif
+
+ wait_shutdown();
+
+ derr << "shutting down" << dendl;
+
+ reloader.reset(); // stop the realm reloader
+
+ for (list<RGWFrontend *>::iterator liter = fes.begin(); liter != fes.end();
+ ++liter) {
+ RGWFrontend *fe = *liter;
+ fe->stop();
+ }
+
+ for (list<RGWFrontend *>::iterator liter = fes.begin(); liter != fes.end();
+ ++liter) {
+ RGWFrontend *fe = *liter;
+ fe->join();
+ delete fe;
+ }
+
+ for (list<RGWFrontendConfig *>::iterator liter = configs.begin();
+ liter != configs.end(); ++liter) {
+ RGWFrontendConfig *fec = *liter;
+ delete fec;
+ }
+
+ unregister_async_signal_handler(SIGHUP, sighup_handler);
+ unregister_async_signal_handler(SIGTERM, handle_sigterm);
+ unregister_async_signal_handler(SIGINT, handle_sigterm);
+ unregister_async_signal_handler(SIGUSR1, handle_sigterm);
+ shutdown_async_signal_handler();
+
+ rgw_log_usage_finalize();
+
+ delete olog;
+
+ RGWStoreManager::close_storage(store);
+ rgw::auth::s3::LDAPEngine::shutdown();
+ rgw_tools_cleanup();
+ rgw_shutdown_resolver();
+ rgw_http_client_cleanup();
+ rgw::curl::cleanup_curl();
+#ifdef WITH_RADOSGW_AMQP_ENDPOINT
+ rgw::amqp::shutdown();
+#endif
+#ifdef WITH_RADOSGW_KAFKA_ENDPOINT
+ rgw::kafka::shutdown();
+#endif
+ g_conf().remove_observer(&implicit_tenant_context);
+
+ rgw_perf_stop(g_ceph_context);
+
+ dout(1) << "final shutdown" << dendl;
+
+ signal_fd_finalize();
+
+ return 0;
+}
diff --git a/src/rgw/rgw_meta_sync_status.h b/src/rgw/rgw_meta_sync_status.h
new file mode 100644
index 00000000..a3174e3e
--- /dev/null
+++ b/src/rgw/rgw_meta_sync_status.h
@@ -0,0 +1,124 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RGW_META_SYNC_STATUS_H
+#define RGW_META_SYNC_STATUS_H
+
+#include <string>
+
+#include "common/ceph_time.h"
+
+struct rgw_meta_sync_info {
+ enum SyncState {
+ StateInit = 0,
+ StateBuildingFullSyncMaps = 1,
+ StateSync = 2,
+ };
+
+ uint16_t state;
+ uint32_t num_shards;
+ std::string period; //< period id of current metadata log
+ epoch_t realm_epoch = 0; //< realm epoch of period
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(2, 1, bl);
+ encode(state, bl);
+ encode(num_shards, bl);
+ encode(period, bl);
+ encode(realm_epoch, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(state, bl);
+ decode(num_shards, bl);
+ if (struct_v >= 2) {
+ decode(period, bl);
+ decode(realm_epoch, bl);
+ }
+ DECODE_FINISH(bl);
+ }
+
+ void decode_json(JSONObj *obj);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(std::list<rgw_meta_sync_info*>& ls);
+
+ rgw_meta_sync_info() : state((int)StateInit), num_shards(0) {}
+};
+WRITE_CLASS_ENCODER(rgw_meta_sync_info)
+
+struct rgw_meta_sync_marker {
+ enum SyncState {
+ FullSync = 0,
+ IncrementalSync = 1,
+ };
+ uint16_t state;
+ string marker;
+ string next_step_marker;
+ uint64_t total_entries;
+ uint64_t pos;
+ real_time timestamp;
+ epoch_t realm_epoch{0}; //< realm_epoch of period marker
+
+ rgw_meta_sync_marker() : state(FullSync), total_entries(0), pos(0) {}
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(2, 1, bl);
+ encode(state, bl);
+ encode(marker, bl);
+ encode(next_step_marker, bl);
+ encode(total_entries, bl);
+ encode(pos, bl);
+ encode(timestamp, bl);
+ encode(realm_epoch, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(2, bl);
+ decode(state, bl);
+ decode(marker, bl);
+ decode(next_step_marker, bl);
+ decode(total_entries, bl);
+ decode(pos, bl);
+ decode(timestamp, bl);
+ if (struct_v >= 2) {
+ decode(realm_epoch, bl);
+ }
+ DECODE_FINISH(bl);
+ }
+
+ void decode_json(JSONObj *obj);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(std::list<rgw_meta_sync_marker*>& ls);
+};
+WRITE_CLASS_ENCODER(rgw_meta_sync_marker)
+
+struct rgw_meta_sync_status {
+ rgw_meta_sync_info sync_info;
+ map<uint32_t, rgw_meta_sync_marker> sync_markers;
+
+ rgw_meta_sync_status() {}
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(sync_info, bl);
+ encode(sync_markers, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(sync_info, bl);
+ decode(sync_markers, bl);
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+ static void generate_test_instances(std::list<rgw_meta_sync_status*>& ls);
+};
+WRITE_CLASS_ENCODER(rgw_meta_sync_status)
+
+#endif
diff --git a/src/rgw/rgw_metadata.cc b/src/rgw/rgw_metadata.cc
new file mode 100644
index 00000000..9741cba9
--- /dev/null
+++ b/src/rgw/rgw_metadata.cc
@@ -0,0 +1,1178 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <boost/intrusive_ptr.hpp>
+#include "common/ceph_json.h"
+#include "common/errno.h"
+#include "rgw_metadata.h"
+#include "rgw_coroutine.h"
+#include "cls/version/cls_version_types.h"
+
+#include "rgw_rados.h"
+#include "rgw_zone.h"
+#include "rgw_tools.h"
+
+#include "rgw_cr_rados.h"
+
+#include "services/svc_zone.h"
+
+#include "include/ceph_assert.h"
+
+#include <boost/asio/yield.hpp>
+
+#define dout_subsys ceph_subsys_rgw
+
+void LogStatusDump::dump(Formatter *f) const {
+ string s;
+ switch (status) {
+ case MDLOG_STATUS_WRITE:
+ s = "write";
+ break;
+ case MDLOG_STATUS_SETATTRS:
+ s = "set_attrs";
+ break;
+ case MDLOG_STATUS_REMOVE:
+ s = "remove";
+ break;
+ case MDLOG_STATUS_COMPLETE:
+ s = "complete";
+ break;
+ case MDLOG_STATUS_ABORT:
+ s = "abort";
+ break;
+ default:
+ s = "unknown";
+ break;
+ }
+ encode_json("status", s, f);
+}
+
+void RGWMetadataLogData::encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(read_version, bl);
+ encode(write_version, bl);
+ uint32_t s = (uint32_t)status;
+ encode(s, bl);
+ ENCODE_FINISH(bl);
+}
+
+void RGWMetadataLogData::decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(read_version, bl);
+ decode(write_version, bl);
+ uint32_t s;
+ decode(s, bl);
+ status = (RGWMDLogStatus)s;
+ DECODE_FINISH(bl);
+}
+
+void RGWMetadataLogData::dump(Formatter *f) const {
+ encode_json("read_version", read_version, f);
+ encode_json("write_version", write_version, f);
+ encode_json("status", LogStatusDump(status), f);
+}
+
+void decode_json_obj(RGWMDLogStatus& status, JSONObj *obj) {
+ string s;
+ JSONDecoder::decode_json("status", s, obj);
+ if (s == "complete") {
+ status = MDLOG_STATUS_COMPLETE;
+ } else if (s == "write") {
+ status = MDLOG_STATUS_WRITE;
+ } else if (s == "remove") {
+ status = MDLOG_STATUS_REMOVE;
+ } else if (s == "set_attrs") {
+ status = MDLOG_STATUS_SETATTRS;
+ } else if (s == "abort") {
+ status = MDLOG_STATUS_ABORT;
+ } else {
+ status = MDLOG_STATUS_UNKNOWN;
+ }
+}
+
+void RGWMetadataLogData::decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("read_version", read_version, obj);
+ JSONDecoder::decode_json("write_version", write_version, obj);
+ JSONDecoder::decode_json("status", status, obj);
+}
+
+
+int RGWMetadataLog::add_entry(RGWMetadataHandler *handler, const string& section, const string& key, bufferlist& bl) {
+ if (!store->svc.zone->need_to_log_metadata())
+ return 0;
+
+ string oid;
+
+ string hash_key;
+ handler->get_hash_key(section, key, hash_key);
+
+ int shard_id;
+ store->shard_name(prefix, cct->_conf->rgw_md_log_max_shards, hash_key, oid, &shard_id);
+ mark_modified(shard_id);
+ real_time now = real_clock::now();
+ return store->time_log_add(oid, now, section, key, bl);
+}
+
+int RGWMetadataLog::store_entries_in_shard(list<cls_log_entry>& entries, int shard_id, librados::AioCompletion *completion)
+{
+ string oid;
+
+ mark_modified(shard_id);
+ store->shard_name(prefix, shard_id, oid);
+ return store->time_log_add(oid, entries, completion, false);
+}
+
+void RGWMetadataLog::init_list_entries(int shard_id, const real_time& from_time, const real_time& end_time,
+ string& marker, void **handle)
+{
+ LogListCtx *ctx = new LogListCtx();
+
+ ctx->cur_shard = shard_id;
+ ctx->from_time = from_time;
+ ctx->end_time = end_time;
+ ctx->marker = marker;
+
+ get_shard_oid(ctx->cur_shard, ctx->cur_oid);
+
+ *handle = (void *)ctx;
+}
+
+void RGWMetadataLog::complete_list_entries(void *handle) {
+ LogListCtx *ctx = static_cast<LogListCtx *>(handle);
+ delete ctx;
+}
+
+int RGWMetadataLog::list_entries(void *handle,
+ int max_entries,
+ list<cls_log_entry>& entries,
+ string *last_marker,
+ bool *truncated) {
+ LogListCtx *ctx = static_cast<LogListCtx *>(handle);
+
+ if (!max_entries) {
+ *truncated = false;
+ return 0;
+ }
+
+ std::string next_marker;
+ int ret = store->time_log_list(ctx->cur_oid, ctx->from_time, ctx->end_time,
+ max_entries, entries, ctx->marker,
+ &next_marker, truncated);
+ if ((ret < 0) && (ret != -ENOENT))
+ return ret;
+
+ ctx->marker = std::move(next_marker);
+ if (last_marker) {
+ *last_marker = ctx->marker;
+ }
+
+ if (ret == -ENOENT)
+ *truncated = false;
+
+ return 0;
+}
+
+int RGWMetadataLog::get_info(int shard_id, RGWMetadataLogInfo *info)
+{
+ string oid;
+ get_shard_oid(shard_id, oid);
+
+ cls_log_header header;
+
+ int ret = store->time_log_info(oid, &header);
+ if ((ret < 0) && (ret != -ENOENT))
+ return ret;
+
+ info->marker = header.max_marker;
+ info->last_update = header.max_time.to_real_time();
+
+ return 0;
+}
+
+static void _mdlog_info_completion(librados::completion_t cb, void *arg)
+{
+ auto infoc = static_cast<RGWMetadataLogInfoCompletion *>(arg);
+ infoc->finish(cb);
+ infoc->put(); // drop the ref from get_info_async()
+}
+
+RGWMetadataLogInfoCompletion::RGWMetadataLogInfoCompletion(info_callback_t cb)
+ : completion(librados::Rados::aio_create_completion((void *)this, nullptr,
+ _mdlog_info_completion)),
+ callback(cb)
+{
+}
+
+RGWMetadataLogInfoCompletion::~RGWMetadataLogInfoCompletion()
+{
+ completion->release();
+}
+
+int RGWMetadataLog::get_info_async(int shard_id, RGWMetadataLogInfoCompletion *completion)
+{
+ string oid;
+ get_shard_oid(shard_id, oid);
+
+ completion->get(); // hold a ref until the completion fires
+
+ return store->time_log_info_async(completion->get_io_ctx(), oid,
+ &completion->get_header(),
+ completion->get_completion());
+}
+
+int RGWMetadataLog::trim(int shard_id, const real_time& from_time, const real_time& end_time,
+ const string& start_marker, const string& end_marker)
+{
+ string oid;
+ get_shard_oid(shard_id, oid);
+
+ return store->time_log_trim(oid, from_time, end_time, start_marker,
+ end_marker, nullptr);
+}
+
+int RGWMetadataLog::lock_exclusive(int shard_id, timespan duration, string& zone_id, string& owner_id) {
+ string oid;
+ get_shard_oid(shard_id, oid);
+
+ return store->lock_exclusive(store->svc.zone->get_zone_params().log_pool, oid, duration, zone_id, owner_id);
+}
+
+int RGWMetadataLog::unlock(int shard_id, string& zone_id, string& owner_id) {
+ string oid;
+ get_shard_oid(shard_id, oid);
+
+ return store->unlock(store->svc.zone->get_zone_params().log_pool, oid, zone_id, owner_id);
+}
+
+void RGWMetadataLog::mark_modified(int shard_id)
+{
+ lock.get_read();
+ if (modified_shards.find(shard_id) != modified_shards.end()) {
+ lock.unlock();
+ return;
+ }
+ lock.unlock();
+
+ RWLock::WLocker wl(lock);
+ modified_shards.insert(shard_id);
+}
+
+void RGWMetadataLog::read_clear_modified(set<int> &modified)
+{
+ RWLock::WLocker wl(lock);
+ modified.swap(modified_shards);
+ modified_shards.clear();
+}
+
+obj_version& RGWMetadataObject::get_version()
+{
+ return objv;
+}
+
+class RGWMetadataTopHandler : public RGWMetadataHandler {
+ struct iter_data {
+ set<string> sections;
+ set<string>::iterator iter;
+ };
+
+public:
+ RGWMetadataTopHandler() {}
+
+ string get_type() override { return string(); }
+
+ int get(RGWRados *store, string& entry, RGWMetadataObject **obj) override { return -ENOTSUP; }
+ int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker,
+ real_time mtime, JSONObj *obj, sync_type_t sync_type) override { return -ENOTSUP; }
+
+ virtual void get_pool_and_oid(RGWRados *store, const string& key, rgw_pool& pool, string& oid) override {}
+
+ int remove(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker) override { return -ENOTSUP; }
+
+ int list_keys_init(RGWRados *store, const string& marker, void **phandle) override {
+ iter_data *data = new iter_data;
+ list<string> sections;
+ store->meta_mgr->get_sections(sections);
+ for (auto& s : sections) {
+ data->sections.insert(s);
+ }
+ data->iter = data->sections.lower_bound(marker);
+
+ *phandle = data;
+
+ return 0;
+ }
+ int list_keys_next(void *handle, int max, list<string>& keys, bool *truncated) override {
+ iter_data *data = static_cast<iter_data *>(handle);
+ for (int i = 0; i < max && data->iter != data->sections.end(); ++i, ++(data->iter)) {
+ keys.push_back(*data->iter);
+ }
+
+ *truncated = (data->iter != data->sections.end());
+
+ return 0;
+ }
+ void list_keys_complete(void *handle) override {
+ iter_data *data = static_cast<iter_data *>(handle);
+
+ delete data;
+ }
+
+ virtual string get_marker(void *handle) override {
+ iter_data *data = static_cast<iter_data *>(handle);
+
+ if (data->iter != data->sections.end()) {
+ return *(data->iter);
+ }
+
+ return string();
+ }
+};
+
+static RGWMetadataTopHandler md_top_handler;
+
+
+RGWMetadataManager::RGWMetadataManager(CephContext *_cct, RGWRados *_store)
+ : cct(_cct), store(_store)
+{
+}
+
+RGWMetadataManager::~RGWMetadataManager()
+{
+ map<string, RGWMetadataHandler *>::iterator iter;
+
+ for (iter = handlers.begin(); iter != handlers.end(); ++iter) {
+ delete iter->second;
+ }
+
+ handlers.clear();
+}
+
+const std::string RGWMetadataLogHistory::oid = "meta.history";
+
+namespace {
+
+int read_history(RGWRados *store, RGWMetadataLogHistory *state,
+ RGWObjVersionTracker *objv_tracker)
+{
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+ auto& pool = store->svc.zone->get_zone_params().log_pool;
+ const auto& oid = RGWMetadataLogHistory::oid;
+ bufferlist bl;
+ int ret = rgw_get_system_obj(store, obj_ctx, pool, oid, bl, objv_tracker, nullptr);
+ if (ret < 0) {
+ return ret;
+ }
+ if (bl.length() == 0) {
+ /* bad history object, remove it */
+ rgw_raw_obj obj(pool, oid);
+ auto sysobj = obj_ctx.get_obj(obj);
+ ret = sysobj.wop().remove();
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "ERROR: meta history is empty, but cannot remove it (" << cpp_strerror(-ret) << ")" << dendl;
+ return ret;
+ }
+ return -ENOENT;
+ }
+ try {
+ auto p = bl.cbegin();
+ state->decode(p);
+ } catch (buffer::error& e) {
+ ldout(store->ctx(), 1) << "failed to decode the mdlog history: "
+ << e.what() << dendl;
+ return -EIO;
+ }
+ return 0;
+}
+
+int write_history(RGWRados *store, const RGWMetadataLogHistory& state,
+ RGWObjVersionTracker *objv_tracker, bool exclusive = false)
+{
+ bufferlist bl;
+ state.encode(bl);
+
+ auto& pool = store->svc.zone->get_zone_params().log_pool;
+ const auto& oid = RGWMetadataLogHistory::oid;
+ return rgw_put_system_obj(store, pool, oid, bl,
+ exclusive, objv_tracker, real_time{});
+}
+
+using Cursor = RGWPeriodHistory::Cursor;
+
+/// read the mdlog history and use it to initialize the given cursor
+class ReadHistoryCR : public RGWCoroutine {
+ RGWRados *store;
+ Cursor *cursor;
+ RGWObjVersionTracker *objv_tracker;
+ RGWMetadataLogHistory state;
+ public:
+ ReadHistoryCR(RGWRados *store, Cursor *cursor,
+ RGWObjVersionTracker *objv_tracker)
+ : RGWCoroutine(store->ctx()), store(store), cursor(cursor),
+ objv_tracker(objv_tracker)
+ {}
+
+ int operate() {
+ reenter(this) {
+ yield {
+ rgw_raw_obj obj{store->svc.zone->get_zone_params().log_pool,
+ RGWMetadataLogHistory::oid};
+ constexpr bool empty_on_enoent = false;
+
+ using ReadCR = RGWSimpleRadosReadCR<RGWMetadataLogHistory>;
+ call(new ReadCR(store->get_async_rados(), store->svc.sysobj, obj,
+ &state, empty_on_enoent, objv_tracker));
+ }
+ if (retcode < 0) {
+ ldout(cct, 1) << "failed to read mdlog history: "
+ << cpp_strerror(retcode) << dendl;
+ return set_cr_error(retcode);
+ }
+ *cursor = store->period_history->lookup(state.oldest_realm_epoch);
+ if (!*cursor) {
+ return set_cr_error(cursor->get_error());
+ }
+
+ ldout(cct, 10) << "read mdlog history with oldest period id="
+ << state.oldest_period_id << " realm_epoch="
+ << state.oldest_realm_epoch << dendl;
+ return set_cr_done();
+ }
+ return 0;
+ }
+};
+
+/// write the given cursor to the mdlog history
+class WriteHistoryCR : public RGWCoroutine {
+ RGWRados *store;
+ Cursor cursor;
+ RGWObjVersionTracker *objv;
+ RGWMetadataLogHistory state;
+ public:
+ WriteHistoryCR(RGWRados *store, const Cursor& cursor,
+ RGWObjVersionTracker *objv)
+ : RGWCoroutine(store->ctx()), store(store), cursor(cursor), objv(objv)
+ {}
+
+ int operate() {
+ reenter(this) {
+ state.oldest_period_id = cursor.get_period().get_id();
+ state.oldest_realm_epoch = cursor.get_epoch();
+
+ yield {
+ rgw_raw_obj obj{store->svc.zone->get_zone_params().log_pool,
+ RGWMetadataLogHistory::oid};
+
+ using WriteCR = RGWSimpleRadosWriteCR<RGWMetadataLogHistory>;
+ call(new WriteCR(store->get_async_rados(), store->svc.sysobj, obj, state, objv));
+ }
+ if (retcode < 0) {
+ ldout(cct, 1) << "failed to write mdlog history: "
+ << cpp_strerror(retcode) << dendl;
+ return set_cr_error(retcode);
+ }
+
+ ldout(cct, 10) << "wrote mdlog history with oldest period id="
+ << state.oldest_period_id << " realm_epoch="
+ << state.oldest_realm_epoch << dendl;
+ return set_cr_done();
+ }
+ return 0;
+ }
+};
+
+/// update the mdlog history to reflect trimmed logs
+class TrimHistoryCR : public RGWCoroutine {
+ RGWRados *store;
+ const Cursor cursor; //< cursor to trimmed period
+ RGWObjVersionTracker *objv; //< to prevent racing updates
+ Cursor next; //< target cursor for oldest log period
+ Cursor existing; //< existing cursor read from disk
+
+ public:
+ TrimHistoryCR(RGWRados *store, Cursor cursor, RGWObjVersionTracker *objv)
+ : RGWCoroutine(store->ctx()),
+ store(store), cursor(cursor), objv(objv), next(cursor)
+ {
+ next.next(); // advance past cursor
+ }
+
+ int operate() {
+ reenter(this) {
+ // read an existing history, and write the new history if it's newer
+ yield call(new ReadHistoryCR(store, &existing, objv));
+ if (retcode < 0) {
+ return set_cr_error(retcode);
+ }
+ // reject older trims with ECANCELED
+ if (cursor.get_epoch() < existing.get_epoch()) {
+ ldout(cct, 4) << "found oldest log epoch=" << existing.get_epoch()
+ << ", rejecting trim at epoch=" << cursor.get_epoch() << dendl;
+ return set_cr_error(-ECANCELED);
+ }
+ // overwrite with updated history
+ yield call(new WriteHistoryCR(store, next, objv));
+ if (retcode < 0) {
+ return set_cr_error(retcode);
+ }
+ return set_cr_done();
+ }
+ return 0;
+ }
+};
+
+// traverse all the way back to the beginning of the period history, and
+// return a cursor to the first period in a fully attached history
+Cursor find_oldest_period(RGWRados *store)
+{
+ auto cct = store->ctx();
+ auto cursor = store->period_history->get_current();
+
+ while (cursor) {
+ // advance to the period's predecessor
+ if (!cursor.has_prev()) {
+ auto& predecessor = cursor.get_period().get_predecessor();
+ if (predecessor.empty()) {
+ // this is the first period, so our logs must start here
+ ldout(cct, 10) << "find_oldest_period returning first "
+ "period " << cursor.get_period().get_id() << dendl;
+ return cursor;
+ }
+ // pull the predecessor and add it to our history
+ RGWPeriod period;
+ int r = store->period_puller->pull(predecessor, period);
+ if (r < 0) {
+ return cursor;
+ }
+ auto prev = store->period_history->insert(std::move(period));
+ if (!prev) {
+ return prev;
+ }
+ ldout(cct, 20) << "find_oldest_period advancing to "
+ "predecessor period " << predecessor << dendl;
+ ceph_assert(cursor.has_prev());
+ }
+ cursor.prev();
+ }
+ ldout(cct, 10) << "find_oldest_period returning empty cursor" << dendl;
+ return cursor;
+}
+
+} // anonymous namespace
+
+Cursor RGWMetadataManager::init_oldest_log_period()
+{
+ // read the mdlog history
+ RGWMetadataLogHistory state;
+ RGWObjVersionTracker objv;
+ int ret = read_history(store, &state, &objv);
+
+ if (ret == -ENOENT) {
+ // initialize the mdlog history and write it
+ ldout(cct, 10) << "initializing mdlog history" << dendl;
+ auto cursor = find_oldest_period(store);
+ if (!cursor) {
+ return cursor;
+ }
+
+ // write the initial history
+ state.oldest_realm_epoch = cursor.get_epoch();
+ state.oldest_period_id = cursor.get_period().get_id();
+
+ constexpr bool exclusive = true; // don't overwrite
+ int ret = write_history(store, state, &objv, exclusive);
+ if (ret < 0 && ret != -EEXIST) {
+ ldout(cct, 1) << "failed to write mdlog history: "
+ << cpp_strerror(ret) << dendl;
+ return Cursor{ret};
+ }
+ return cursor;
+ } else if (ret < 0) {
+ ldout(cct, 1) << "failed to read mdlog history: "
+ << cpp_strerror(ret) << dendl;
+ return Cursor{ret};
+ }
+
+ // if it's already in the history, return it
+ auto cursor = store->period_history->lookup(state.oldest_realm_epoch);
+ if (cursor) {
+ return cursor;
+ } else {
+ cursor = find_oldest_period(store);
+ state.oldest_realm_epoch = cursor.get_epoch();
+ state.oldest_period_id = cursor.get_period().get_id();
+ ldout(cct, 10) << "rewriting mdlog history" << dendl;
+ ret = write_history(store, state, &objv);
+ if (ret < 0 && ret != -ECANCELED) {
+ ldout(cct, 1) << "failed to write mdlog history: "
+ << cpp_strerror(ret) << dendl;
+ return Cursor{ret};
+ }
+ return cursor;
+ }
+
+ // pull the oldest period by id
+ RGWPeriod period;
+ ret = store->period_puller->pull(state.oldest_period_id, period);
+ if (ret < 0) {
+ ldout(cct, 1) << "failed to read period id=" << state.oldest_period_id
+ << " for mdlog history: " << cpp_strerror(ret) << dendl;
+ return Cursor{ret};
+ }
+ // verify its realm_epoch
+ if (period.get_realm_epoch() != state.oldest_realm_epoch) {
+ ldout(cct, 1) << "inconsistent mdlog history: read period id="
+ << period.get_id() << " with realm_epoch=" << period.get_realm_epoch()
+ << ", expected realm_epoch=" << state.oldest_realm_epoch << dendl;
+ return Cursor{-EINVAL};
+ }
+ // attach the period to our history
+ return store->period_history->attach(std::move(period));
+}
+
+Cursor RGWMetadataManager::read_oldest_log_period() const
+{
+ RGWMetadataLogHistory state;
+ int ret = read_history(store, &state, nullptr);
+ if (ret < 0) {
+ ldout(store->ctx(), 1) << "failed to read mdlog history: "
+ << cpp_strerror(ret) << dendl;
+ return Cursor{ret};
+ }
+
+ ldout(store->ctx(), 10) << "read mdlog history with oldest period id="
+ << state.oldest_period_id << " realm_epoch="
+ << state.oldest_realm_epoch << dendl;
+
+ return store->period_history->lookup(state.oldest_realm_epoch);
+}
+
+RGWCoroutine* RGWMetadataManager::read_oldest_log_period_cr(Cursor *period,
+ RGWObjVersionTracker *objv) const
+{
+ return new ReadHistoryCR(store, period, objv);
+}
+
+RGWCoroutine* RGWMetadataManager::trim_log_period_cr(Cursor period,
+ RGWObjVersionTracker *objv) const
+{
+ return new TrimHistoryCR(store, period, objv);
+}
+
+int RGWMetadataManager::init(const std::string& current_period)
+{
+ // open a log for the current period
+ current_log = get_log(current_period);
+ return 0;
+}
+
+RGWMetadataLog* RGWMetadataManager::get_log(const std::string& period)
+{
+ // construct the period's log in place if it doesn't exist
+ auto insert = md_logs.emplace(std::piecewise_construct,
+ std::forward_as_tuple(period),
+ std::forward_as_tuple(cct, store, period));
+ return &insert.first->second;
+}
+
+int RGWMetadataManager::register_handler(RGWMetadataHandler *handler)
+{
+ string type = handler->get_type();
+
+ if (handlers.find(type) != handlers.end())
+ return -EINVAL;
+
+ handlers[type] = handler;
+
+ return 0;
+}
+
+RGWMetadataHandler *RGWMetadataManager::get_handler(const string& type)
+{
+ map<string, RGWMetadataHandler *>::iterator iter = handlers.find(type);
+ if (iter == handlers.end())
+ return NULL;
+
+ return iter->second;
+}
+
+void RGWMetadataManager::parse_metadata_key(const string& metadata_key, string& type, string& entry)
+{
+ auto pos = metadata_key.find(':');
+ if (pos == string::npos) {
+ type = metadata_key;
+ } else {
+ type = metadata_key.substr(0, pos);
+ entry = metadata_key.substr(pos + 1);
+ }
+}
+
+int RGWMetadataManager::find_handler(const string& metadata_key, RGWMetadataHandler **handler, string& entry)
+{
+ string type;
+
+ parse_metadata_key(metadata_key, type, entry);
+
+ if (type.empty()) {
+ *handler = &md_top_handler;
+ return 0;
+ }
+
+ map<string, RGWMetadataHandler *>::iterator iter = handlers.find(type);
+ if (iter == handlers.end())
+ return -ENOENT;
+
+ *handler = iter->second;
+
+ return 0;
+
+}
+
+int RGWMetadataManager::get(string& metadata_key, Formatter *f)
+{
+ RGWMetadataHandler *handler;
+ string entry;
+ int ret = find_handler(metadata_key, &handler, entry);
+ if (ret < 0) {
+ return ret;
+ }
+
+ RGWMetadataObject *obj;
+
+ ret = handler->get(store, entry, &obj);
+ if (ret < 0) {
+ return ret;
+ }
+
+ f->open_object_section("metadata_info");
+ encode_json("key", metadata_key, f);
+ encode_json("ver", obj->get_version(), f);
+ real_time mtime = obj->get_mtime();
+ if (!real_clock::is_zero(mtime)) {
+ utime_t ut(mtime);
+ encode_json("mtime", ut, f);
+ }
+ encode_json("data", *obj, f);
+ f->close_section();
+
+ delete obj;
+
+ return 0;
+}
+
+int RGWMetadataManager::put(string& metadata_key, bufferlist& bl,
+ RGWMetadataHandler::sync_type_t sync_type,
+ obj_version *existing_version)
+{
+ RGWMetadataHandler *handler;
+ string entry;
+
+ int ret = find_handler(metadata_key, &handler, entry);
+ if (ret < 0) {
+ return ret;
+ }
+
+ JSONParser parser;
+ if (!parser.parse(bl.c_str(), bl.length())) {
+ return -EINVAL;
+ }
+
+ RGWObjVersionTracker objv_tracker;
+
+ obj_version *objv = &objv_tracker.write_version;
+
+ utime_t mtime;
+
+ try {
+ JSONDecoder::decode_json("key", metadata_key, &parser);
+ JSONDecoder::decode_json("ver", *objv, &parser);
+ JSONDecoder::decode_json("mtime", mtime, &parser);
+ } catch (JSONDecoder::err& e) {
+ return -EINVAL;
+ }
+
+ JSONObj *jo = parser.find_obj("data");
+ if (!jo) {
+ return -EINVAL;
+ }
+
+ ret = handler->put(store, entry, objv_tracker, mtime.to_real_time(), jo, sync_type);
+ if (existing_version) {
+ *existing_version = objv_tracker.read_version;
+ }
+ return ret;
+}
+
+int RGWMetadataManager::prepare_mutate(RGWRados *store,
+ rgw_pool& pool, const string& oid,
+ const real_time& mtime,
+ RGWObjVersionTracker *objv_tracker,
+ RGWMetadataHandler::sync_type_t sync_mode)
+{
+ bufferlist bl;
+ real_time orig_mtime;
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+ int ret = rgw_get_system_obj(store, obj_ctx, pool, oid,
+ bl, objv_tracker, &orig_mtime,
+ nullptr, nullptr);
+ if (ret < 0 && ret != -ENOENT) {
+ return ret;
+ }
+ if (ret != -ENOENT &&
+ !RGWMetadataHandler::check_versions(objv_tracker->read_version, orig_mtime,
+ objv_tracker->write_version, mtime, sync_mode)) {
+ return STATUS_NO_APPLY;
+ }
+
+ if (objv_tracker->write_version.tag.empty()) {
+ if (objv_tracker->read_version.tag.empty()) {
+ objv_tracker->generate_new_write_ver(store->ctx());
+ } else {
+ objv_tracker->write_version = objv_tracker->read_version;
+ objv_tracker->write_version.ver++;
+ }
+ }
+ return 0;
+}
+
+int RGWMetadataManager::remove(string& metadata_key)
+{
+ RGWMetadataHandler *handler;
+ string entry;
+
+ int ret = find_handler(metadata_key, &handler, entry);
+ if (ret < 0) {
+ return ret;
+ }
+
+ RGWMetadataObject *obj;
+ ret = handler->get(store, entry, &obj);
+ if (ret < 0) {
+ return ret;
+ }
+ RGWObjVersionTracker objv_tracker;
+ objv_tracker.read_version = obj->get_version();
+ delete obj;
+
+ return handler->remove(store, entry, objv_tracker);
+}
+
+int RGWMetadataManager::lock_exclusive(string& metadata_key, timespan duration, string& owner_id) {
+ RGWMetadataHandler *handler;
+ string entry;
+ string zone_id;
+
+ int ret = find_handler(metadata_key, &handler, entry);
+ if (ret < 0)
+ return ret;
+
+ rgw_pool pool;
+ string oid;
+
+ handler->get_pool_and_oid(store, entry, pool, oid);
+
+ return store->lock_exclusive(pool, oid, duration, zone_id, owner_id);
+}
+
+int RGWMetadataManager::unlock(string& metadata_key, string& owner_id) {
+ librados::IoCtx io_ctx;
+ RGWMetadataHandler *handler;
+ string entry;
+ string zone_id;
+
+ int ret = find_handler(metadata_key, &handler, entry);
+ if (ret < 0)
+ return ret;
+
+ rgw_pool pool;
+ string oid;
+
+ handler->get_pool_and_oid(store, entry, pool, oid);
+
+ return store->unlock(pool, oid, zone_id, owner_id);
+}
+
+struct list_keys_handle {
+ void *handle;
+ RGWMetadataHandler *handler;
+};
+
+int RGWMetadataManager::list_keys_init(const string& section, void **handle)
+{
+ return list_keys_init(section, string(), handle);
+}
+
+int RGWMetadataManager::list_keys_init(const string& section,
+ const string& marker, void **handle)
+{
+ string entry;
+ RGWMetadataHandler *handler;
+
+ int ret;
+
+ ret = find_handler(section, &handler, entry);
+ if (ret < 0) {
+ return -ENOENT;
+ }
+
+ list_keys_handle *h = new list_keys_handle;
+ h->handler = handler;
+ ret = handler->list_keys_init(store, marker, &h->handle);
+ if (ret < 0) {
+ delete h;
+ return ret;
+ }
+
+ *handle = (void *)h;
+
+ return 0;
+}
+
+int RGWMetadataManager::list_keys_next(void *handle, int max, list<string>& keys, bool *truncated)
+{
+ list_keys_handle *h = static_cast<list_keys_handle *>(handle);
+
+ RGWMetadataHandler *handler = h->handler;
+
+ return handler->list_keys_next(h->handle, max, keys, truncated);
+}
+
+void RGWMetadataManager::list_keys_complete(void *handle)
+{
+ list_keys_handle *h = static_cast<list_keys_handle *>(handle);
+
+ RGWMetadataHandler *handler = h->handler;
+
+ handler->list_keys_complete(h->handle);
+ delete h;
+}
+
+string RGWMetadataManager::get_marker(void *handle)
+{
+ list_keys_handle *h = static_cast<list_keys_handle *>(handle);
+
+ return h->handler->get_marker(h->handle);
+}
+
+void RGWMetadataManager::dump_log_entry(cls_log_entry& entry, Formatter *f)
+{
+ f->open_object_section("entry");
+ f->dump_string("id", entry.id);
+ f->dump_string("section", entry.section);
+ f->dump_string("name", entry.name);
+ entry.timestamp.gmtime_nsec(f->dump_stream("timestamp"));
+
+ try {
+ RGWMetadataLogData log_data;
+ auto iter = entry.data.cbegin();
+ decode(log_data, iter);
+
+ encode_json("data", log_data, f);
+ } catch (buffer::error& err) {
+ lderr(cct) << "failed to decode log entry: " << entry.section << ":" << entry.name<< " ts=" << entry.timestamp << dendl;
+ }
+ f->close_section();
+}
+
+void RGWMetadataManager::get_sections(list<string>& sections)
+{
+ for (map<string, RGWMetadataHandler *>::iterator iter = handlers.begin(); iter != handlers.end(); ++iter) {
+ sections.push_back(iter->first);
+ }
+}
+
+int RGWMetadataManager::pre_modify(RGWMetadataHandler *handler, string& section, const string& key,
+ RGWMetadataLogData& log_data, RGWObjVersionTracker *objv_tracker,
+ RGWMDLogStatus op_type)
+{
+ section = handler->get_type();
+
+ /* if write version has not been set, and there's a read version, set it so that we can
+ * log it
+ */
+ if (objv_tracker) {
+ if (objv_tracker->read_version.ver && !objv_tracker->write_version.ver) {
+ objv_tracker->write_version = objv_tracker->read_version;
+ objv_tracker->write_version.ver++;
+ }
+ log_data.read_version = objv_tracker->read_version;
+ log_data.write_version = objv_tracker->write_version;
+ }
+
+ log_data.status = op_type;
+
+ bufferlist logbl;
+ encode(log_data, logbl);
+
+ ceph_assert(current_log); // must have called init()
+ int ret = current_log->add_entry(handler, section, key, logbl);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+int RGWMetadataManager::post_modify(RGWMetadataHandler *handler, const string& section, const string& key, RGWMetadataLogData& log_data,
+ RGWObjVersionTracker *objv_tracker, int ret)
+{
+ if (ret >= 0)
+ log_data.status = MDLOG_STATUS_COMPLETE;
+ else
+ log_data.status = MDLOG_STATUS_ABORT;
+
+ bufferlist logbl;
+ encode(log_data, logbl);
+
+ ceph_assert(current_log); // must have called init()
+ int r = current_log->add_entry(handler, section, key, logbl);
+ if (ret < 0)
+ return ret;
+
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+string RGWMetadataManager::heap_oid(RGWMetadataHandler *handler, const string& key, const obj_version& objv)
+{
+ char buf[objv.tag.size() + 32];
+ snprintf(buf, sizeof(buf), "%s:%lld", objv.tag.c_str(), (long long)objv.ver);
+ return string(".meta:") + handler->get_type() + ":" + key + ":" + buf;
+}
+
+int RGWMetadataManager::store_in_heap(RGWMetadataHandler *handler, const string& key, bufferlist& bl,
+ RGWObjVersionTracker *objv_tracker, real_time mtime,
+ map<string, bufferlist> *pattrs)
+{
+ if (!objv_tracker) {
+ return -EINVAL;
+ }
+
+ rgw_pool heap_pool(store->svc.zone->get_zone_params().metadata_heap);
+
+ if (heap_pool.empty()) {
+ return 0;
+ }
+
+ RGWObjVersionTracker otracker;
+ otracker.write_version = objv_tracker->write_version;
+ string oid = heap_oid(handler, key, objv_tracker->write_version);
+ int ret = rgw_put_system_obj(store, heap_pool, oid,
+ bl, false, &otracker, mtime, pattrs);
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "ERROR: rgw_put_system_obj() oid=" << oid << " returned ret=" << ret << dendl;
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWMetadataManager::remove_from_heap(RGWMetadataHandler *handler, const string& key, RGWObjVersionTracker *objv_tracker)
+{
+ if (!objv_tracker) {
+ return -EINVAL;
+ }
+
+ rgw_pool heap_pool(store->svc.zone->get_zone_params().metadata_heap);
+
+ if (heap_pool.empty()) {
+ return 0;
+ }
+
+ string oid = heap_oid(handler, key, objv_tracker->write_version);
+ rgw_raw_obj obj(heap_pool, oid);
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+ auto sysobj = obj_ctx.get_obj(obj);
+ int ret = sysobj.wop().remove();
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "ERROR: sysobj.wop().remove() oid=" << oid << " returned ret=" << ret << dendl;
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWMetadataManager::put_entry(RGWMetadataHandler *handler, const string& key, bufferlist& bl, bool exclusive,
+ RGWObjVersionTracker *objv_tracker, real_time mtime, map<string, bufferlist> *pattrs)
+{
+ string section;
+ RGWMetadataLogData log_data;
+ int ret = pre_modify(handler, section, key, log_data, objv_tracker, MDLOG_STATUS_WRITE);
+ if (ret < 0)
+ return ret;
+
+ string oid;
+ rgw_pool pool;
+
+ handler->get_pool_and_oid(store, key, pool, oid);
+
+ ret = store_in_heap(handler, key, bl, objv_tracker, mtime, pattrs);
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "ERROR: " << __func__ << ": store_in_heap() key=" << key << " returned ret=" << ret << dendl;
+ goto done;
+ }
+
+ ret = rgw_put_system_obj(store, pool, oid, bl, exclusive,
+ objv_tracker, mtime, pattrs);
+
+ if (ret < 0) {
+ int r = remove_from_heap(handler, key, objv_tracker);
+ if (r < 0) {
+ ldout(store->ctx(), 0) << "ERROR: " << __func__ << ": remove_from_heap() key=" << key << " returned ret=" << r << dendl;
+ }
+ }
+done:
+ /* cascading ret into post_modify() */
+
+ ret = post_modify(handler, section, key, log_data, objv_tracker, ret);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+int RGWMetadataManager::remove_entry(RGWMetadataHandler *handler,
+ const string& key,
+ RGWObjVersionTracker *objv_tracker)
+{
+ string section;
+ RGWMetadataLogData log_data;
+ int ret = pre_modify(handler, section, key, log_data, objv_tracker, MDLOG_STATUS_REMOVE);
+ if (ret < 0) {
+ return ret;
+ }
+
+ string oid;
+ rgw_pool pool;
+
+ handler->get_pool_and_oid(store, key, pool, oid);
+
+ rgw_raw_obj obj(pool, oid);
+
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+ auto sysobj = obj_ctx.get_obj(obj);
+ ret = sysobj.wop()
+ .set_objv_tracker(objv_tracker)
+ .remove();
+ /* cascading ret into post_modify() */
+
+ ret = post_modify(handler, section, key, log_data, objv_tracker, ret);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWMetadataManager::get_log_shard_id(const string& section,
+ const string& key, int *shard_id)
+{
+ RGWMetadataHandler *handler = get_handler(section);
+ if (!handler) {
+ return -EINVAL;
+ }
+ string hash_key;
+ handler->get_hash_key(section, key, hash_key);
+ *shard_id = store->key_to_shard_id(hash_key, cct->_conf->rgw_md_log_max_shards);
+ return 0;
+}
diff --git a/src/rgw/rgw_metadata.h b/src/rgw/rgw_metadata.h
new file mode 100644
index 00000000..e4107677
--- /dev/null
+++ b/src/rgw/rgw_metadata.h
@@ -0,0 +1,426 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_METADATA_H
+#define CEPH_RGW_METADATA_H
+
+#include <string>
+#include <utility>
+#include <boost/optional.hpp>
+
+#include "include/types.h"
+#include "rgw_common.h"
+#include "rgw_period_history.h"
+#include "cls/version/cls_version_types.h"
+#include "cls/log/cls_log_types.h"
+#include "common/RWLock.h"
+#include "common/RefCountedObj.h"
+#include "common/ceph_time.h"
+
+
+class RGWRados;
+class RGWCoroutine;
+class JSONObj;
+struct RGWObjVersionTracker;
+
+struct obj_version;
+
+
+enum RGWMDLogStatus {
+ MDLOG_STATUS_UNKNOWN,
+ MDLOG_STATUS_WRITE,
+ MDLOG_STATUS_SETATTRS,
+ MDLOG_STATUS_REMOVE,
+ MDLOG_STATUS_COMPLETE,
+ MDLOG_STATUS_ABORT,
+};
+
+class RGWMetadataObject {
+protected:
+ obj_version objv;
+ ceph::real_time mtime;
+
+public:
+ RGWMetadataObject() {}
+ virtual ~RGWMetadataObject() {}
+ obj_version& get_version();
+ real_time get_mtime() { return mtime; }
+
+ virtual void dump(Formatter *f) const = 0;
+};
+
+class RGWMetadataManager;
+
+class RGWMetadataHandler {
+ friend class RGWMetadataManager;
+
+public:
+ enum sync_type_t {
+ APPLY_ALWAYS,
+ APPLY_UPDATES,
+ APPLY_NEWER
+ };
+ static bool string_to_sync_type(const string& sync_string,
+ sync_type_t& type) {
+ if (sync_string.compare("update-by-version") == 0)
+ type = APPLY_UPDATES;
+ else if (sync_string.compare("update-by-timestamp") == 0)
+ type = APPLY_NEWER;
+ else if (sync_string.compare("always") == 0)
+ type = APPLY_ALWAYS;
+ else
+ return false;
+ return true;
+ }
+
+ virtual ~RGWMetadataHandler() {}
+ virtual string get_type() = 0;
+
+ virtual int get(RGWRados *store, string& entry, RGWMetadataObject **obj) = 0;
+ virtual int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker,
+ real_time mtime, JSONObj *obj, sync_type_t type) = 0;
+ virtual int remove(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker) = 0;
+
+ virtual int list_keys_init(RGWRados *store, const string& marker, void **phandle) = 0;
+ virtual int list_keys_next(void *handle, int max, list<string>& keys, bool *truncated) = 0;
+ virtual void list_keys_complete(void *handle) = 0;
+
+ virtual string get_marker(void *handle) = 0;
+
+ /* key to use for hashing entries for log shard placement */
+ virtual void get_hash_key(const string& section, const string& key, string& hash_key) {
+ hash_key = section + ":" + key;
+ }
+
+protected:
+ virtual void get_pool_and_oid(RGWRados *store, const string& key, rgw_pool& pool, string& oid) = 0;
+ /**
+ * Compare an incoming versus on-disk tag/version+mtime combo against
+ * the sync mode to see if the new one should replace the on-disk one.
+ *
+ * @return true if the update should proceed, false otherwise.
+ */
+ static bool check_versions(const obj_version& ondisk, const real_time& ondisk_time,
+ const obj_version& incoming, const real_time& incoming_time,
+ sync_type_t sync_mode) {
+ switch (sync_mode) {
+ case APPLY_UPDATES:
+ if ((ondisk.tag != incoming.tag) ||
+ (ondisk.ver >= incoming.ver))
+ return false;
+ break;
+ case APPLY_NEWER:
+ if (ondisk_time >= incoming_time)
+ return false;
+ break;
+ case APPLY_ALWAYS: //deliberate fall-thru -- we always apply!
+ default: break;
+ }
+ return true;
+ }
+
+ /*
+ * The tenant_name is always returned on purpose. May be empty, of course.
+ */
+ static void parse_bucket(const string& bucket,
+ string *tenant_name,
+ string *bucket_name,
+ string *bucket_instance = nullptr /* optional */)
+ {
+ int pos = bucket.find('/');
+ if (pos >= 0) {
+ *tenant_name = bucket.substr(0, pos);
+ } else {
+ tenant_name->clear();
+ }
+ string bn = bucket.substr(pos + 1);
+ pos = bn.find (':');
+ if (pos < 0) {
+ *bucket_name = std::move(bn);
+ return;
+ }
+ *bucket_name = bn.substr(0, pos);
+ if (bucket_instance) {
+ *bucket_instance = bn.substr(pos + 1);
+ }
+ }
+};
+
+#define META_LOG_OBJ_PREFIX "meta.log."
+
+struct RGWMetadataLogInfo {
+ string marker;
+ real_time last_update;
+
+ void dump(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+};
+
+class RGWCompletionManager;
+
+class RGWMetadataLogInfoCompletion : public RefCountedObject {
+ public:
+ using info_callback_t = std::function<void(int, const cls_log_header&)>;
+ private:
+ cls_log_header header;
+ librados::IoCtx io_ctx;
+ librados::AioCompletion *completion;
+ std::mutex mutex; //< protects callback between cancel/complete
+ boost::optional<info_callback_t> callback; //< cleared on cancel
+ public:
+ explicit RGWMetadataLogInfoCompletion(info_callback_t callback);
+ ~RGWMetadataLogInfoCompletion() override;
+
+ librados::IoCtx& get_io_ctx() { return io_ctx; }
+ cls_log_header& get_header() { return header; }
+ librados::AioCompletion* get_completion() { return completion; }
+
+ void finish(librados::completion_t cb) {
+ std::lock_guard<std::mutex> lock(mutex);
+ if (callback) {
+ (*callback)(completion->get_return_value(), header);
+ }
+ }
+ void cancel() {
+ std::lock_guard<std::mutex> lock(mutex);
+ callback = boost::none;
+ }
+};
+
+class RGWMetadataLog {
+ CephContext *cct;
+ RGWRados *store;
+ const string prefix;
+
+ static std::string make_prefix(const std::string& period) {
+ if (period.empty())
+ return META_LOG_OBJ_PREFIX;
+ return META_LOG_OBJ_PREFIX + period + ".";
+ }
+
+ RWLock lock;
+ set<int> modified_shards;
+
+ void mark_modified(int shard_id);
+public:
+ RGWMetadataLog(CephContext *_cct, RGWRados *_store, const std::string& period)
+ : cct(_cct), store(_store),
+ prefix(make_prefix(period)),
+ lock("RGWMetaLog::lock") {}
+
+ void get_shard_oid(int id, string& oid) const {
+ char buf[16];
+ snprintf(buf, sizeof(buf), "%d", id);
+ oid = prefix + buf;
+ }
+
+ int add_entry(RGWMetadataHandler *handler, const string& section, const string& key, bufferlist& bl);
+ int store_entries_in_shard(list<cls_log_entry>& entries, int shard_id, librados::AioCompletion *completion);
+
+ struct LogListCtx {
+ int cur_shard;
+ string marker;
+ real_time from_time;
+ real_time end_time;
+
+ string cur_oid;
+
+ bool done;
+
+ LogListCtx() : cur_shard(0), done(false) {}
+ };
+
+ void init_list_entries(int shard_id, const real_time& from_time, const real_time& end_time, string& marker, void **handle);
+ void complete_list_entries(void *handle);
+ int list_entries(void *handle,
+ int max_entries,
+ list<cls_log_entry>& entries,
+ string *out_marker,
+ bool *truncated);
+
+ int trim(int shard_id, const real_time& from_time, const real_time& end_time, const string& start_marker, const string& end_marker);
+ int get_info(int shard_id, RGWMetadataLogInfo *info);
+ int get_info_async(int shard_id, RGWMetadataLogInfoCompletion *completion);
+ int lock_exclusive(int shard_id, timespan duration, string&zone_id, string& owner_id);
+ int unlock(int shard_id, string& zone_id, string& owner_id);
+
+ int update_shards(list<int>& shards);
+
+ void read_clear_modified(set<int> &modified);
+};
+
+struct LogStatusDump {
+ RGWMDLogStatus status;
+
+ explicit LogStatusDump(RGWMDLogStatus _status) : status(_status) {}
+ void dump(Formatter *f) const;
+};
+
+struct RGWMetadataLogData {
+ obj_version read_version;
+ obj_version write_version;
+ RGWMDLogStatus status;
+
+ RGWMetadataLogData() : status(MDLOG_STATUS_UNKNOWN) {}
+
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::const_iterator& bl);
+ void dump(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+};
+WRITE_CLASS_ENCODER(RGWMetadataLogData)
+
+struct RGWMetadataLogHistory {
+ epoch_t oldest_realm_epoch;
+ std::string oldest_period_id;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(oldest_realm_epoch, bl);
+ encode(oldest_period_id, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::const_iterator& p) {
+ DECODE_START(1, p);
+ decode(oldest_realm_epoch, p);
+ decode(oldest_period_id, p);
+ DECODE_FINISH(p);
+ }
+
+ static const std::string oid;
+};
+WRITE_CLASS_ENCODER(RGWMetadataLogHistory)
+
+class RGWMetadataManager {
+ map<string, RGWMetadataHandler *> handlers;
+ CephContext *cct;
+ RGWRados *store;
+
+ // maintain a separate metadata log for each period
+ std::map<std::string, RGWMetadataLog> md_logs;
+ // use the current period's log for mutating operations
+ RGWMetadataLog* current_log = nullptr;
+
+ int find_handler(const string& metadata_key, RGWMetadataHandler **handler, string& entry);
+ int pre_modify(RGWMetadataHandler *handler, string& section, const string& key,
+ RGWMetadataLogData& log_data, RGWObjVersionTracker *objv_tracker,
+ RGWMDLogStatus op_type);
+ int post_modify(RGWMetadataHandler *handler, const string& section, const string& key, RGWMetadataLogData& log_data,
+ RGWObjVersionTracker *objv_tracker, int ret);
+
+ string heap_oid(RGWMetadataHandler *handler, const string& key, const obj_version& objv);
+ int store_in_heap(RGWMetadataHandler *handler, const string& key, bufferlist& bl,
+ RGWObjVersionTracker *objv_tracker, real_time mtime,
+ map<string, bufferlist> *pattrs);
+ int remove_from_heap(RGWMetadataHandler *handler, const string& key, RGWObjVersionTracker *objv_tracker);
+ int prepare_mutate(RGWRados *store, rgw_pool& pool, const string& oid,
+ const real_time& mtime,
+ RGWObjVersionTracker *objv_tracker,
+ RGWMetadataHandler::sync_type_t sync_mode);
+
+public:
+ RGWMetadataManager(CephContext *_cct, RGWRados *_store);
+ ~RGWMetadataManager();
+
+ RGWRados* get_store() { return store; }
+
+ int init(const std::string& current_period);
+
+ /// initialize the oldest log period if it doesn't exist, and attach it to
+ /// our current history
+ RGWPeriodHistory::Cursor init_oldest_log_period();
+
+ /// read the oldest log period, and return a cursor to it in our existing
+ /// period history
+ RGWPeriodHistory::Cursor read_oldest_log_period() const;
+
+ /// read the oldest log period asynchronously and write its result to the
+ /// given cursor pointer
+ RGWCoroutine* read_oldest_log_period_cr(RGWPeriodHistory::Cursor *period,
+ RGWObjVersionTracker *objv) const;
+
+ /// try to advance the oldest log period when the given period is trimmed,
+ /// using a rados lock to provide atomicity
+ RGWCoroutine* trim_log_period_cr(RGWPeriodHistory::Cursor period,
+ RGWObjVersionTracker *objv) const;
+
+ /// find or create the metadata log for the given period
+ RGWMetadataLog* get_log(const std::string& period);
+
+ int register_handler(RGWMetadataHandler *handler);
+
+ template <typename F>
+ int mutate(RGWMetadataHandler *handler, const string& key,
+ const ceph::real_time& mtime, RGWObjVersionTracker *objv_tracker,
+ RGWMDLogStatus op_type,
+ RGWMetadataHandler::sync_type_t sync_mode,
+ F&& f);
+
+ RGWMetadataHandler *get_handler(const string& type);
+
+ int put_entry(RGWMetadataHandler *handler, const string& key, bufferlist& bl, bool exclusive,
+ RGWObjVersionTracker *objv_tracker, real_time mtime, map<string, bufferlist> *pattrs = NULL);
+ int remove_entry(RGWMetadataHandler *handler,
+ const string& key,
+ RGWObjVersionTracker *objv_tracker);
+ int get(string& metadata_key, Formatter *f);
+ int put(string& metadata_key, bufferlist& bl,
+ RGWMetadataHandler::sync_type_t sync_mode,
+ obj_version *existing_version = NULL);
+ int remove(string& metadata_key);
+
+ int list_keys_init(const string& section, void **phandle);
+ int list_keys_init(const string& section, const string& marker, void **phandle);
+ int list_keys_next(void *handle, int max, list<string>& keys, bool *truncated);
+ void list_keys_complete(void *handle);
+
+ string get_marker(void *handle);
+
+ void dump_log_entry(cls_log_entry& entry, Formatter *f);
+
+ void get_sections(list<string>& sections);
+ int lock_exclusive(string& metadata_key, timespan duration, string& owner_id);
+ int unlock(string& metadata_key, string& owner_id);
+
+ int get_log_shard_id(const string& section, const string& key, int *shard_id);
+
+ void parse_metadata_key(const string& metadata_key, string& type, string& entry);
+};
+
+template <typename F>
+int RGWMetadataManager::mutate(RGWMetadataHandler *handler, const string& key,
+ const ceph::real_time& mtime, RGWObjVersionTracker *objv_tracker,
+ RGWMDLogStatus op_type,
+ RGWMetadataHandler::sync_type_t sync_mode,
+ F&& f)
+{
+ string oid;
+ rgw_pool pool;
+
+ handler->get_pool_and_oid(store, key, pool, oid);
+
+ int ret = prepare_mutate(store, pool, oid, mtime, objv_tracker, sync_mode);
+ if (ret < 0 ||
+ ret == STATUS_NO_APPLY) {
+ return ret;
+ }
+
+ string section;
+ RGWMetadataLogData log_data;
+ ret = pre_modify(handler, section, key, log_data, objv_tracker, MDLOG_STATUS_WRITE);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = std::forward<F>(f)();
+
+ /* cascading ret into post_modify() */
+
+ ret = post_modify(handler, section, key, log_data, objv_tracker, ret);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+#endif
diff --git a/src/rgw/rgw_multi.cc b/src/rgw/rgw_multi.cc
new file mode 100644
index 00000000..d055d98b
--- /dev/null
+++ b/src/rgw/rgw_multi.cc
@@ -0,0 +1,384 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <string.h>
+
+#include <iostream>
+#include <map>
+
+#include "include/types.h"
+
+#include "rgw_xml.h"
+#include "rgw_multi.h"
+#include "rgw_op.h"
+
+#include "services/svc_sys_obj.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+
+
+bool MultipartMetaFilter::filter(const string& name, string& key) {
+ // the length of the suffix so we can skip past it
+ static const size_t MP_META_SUFFIX_LEN = MP_META_SUFFIX.length();
+
+ size_t len = name.size();
+
+ // make sure there's room for suffix plus at least one more
+ // character
+ if (len <= MP_META_SUFFIX_LEN)
+ return false;
+
+ size_t pos = name.find(MP_META_SUFFIX, len - MP_META_SUFFIX_LEN);
+ if (pos == string::npos)
+ return false;
+
+ pos = name.rfind('.', pos - 1);
+ if (pos == string::npos)
+ return false;
+
+ key = name.substr(0, pos);
+
+ return true;
+}
+
+
+bool RGWMultiPart::xml_end(const char *el)
+{
+ RGWMultiPartNumber *num_obj = static_cast<RGWMultiPartNumber *>(find_first("PartNumber"));
+ RGWMultiETag *etag_obj = static_cast<RGWMultiETag *>(find_first("ETag"));
+
+ if (!num_obj || !etag_obj)
+ return false;
+
+ string s = num_obj->get_data();
+ if (s.empty())
+ return false;
+
+ num = atoi(s.c_str());
+
+ s = etag_obj->get_data();
+ etag = s;
+
+ return true;
+}
+
+bool RGWMultiCompleteUpload::xml_end(const char *el) {
+ XMLObjIter iter = find("Part");
+ RGWMultiPart *part = static_cast<RGWMultiPart *>(iter.get_next());
+ while (part) {
+ int num = part->get_num();
+ string etag = part->get_etag();
+ parts[num] = etag;
+ part = static_cast<RGWMultiPart *>(iter.get_next());
+ }
+ return true;
+}
+
+
+XMLObj *RGWMultiXMLParser::alloc_obj(const char *el) {
+ XMLObj *obj = NULL;
+ if (strcmp(el, "CompleteMultipartUpload") == 0 ||
+ strcmp(el, "MultipartUpload") == 0) {
+ obj = new RGWMultiCompleteUpload();
+ } else if (strcmp(el, "Part") == 0) {
+ obj = new RGWMultiPart();
+ } else if (strcmp(el, "PartNumber") == 0) {
+ obj = new RGWMultiPartNumber();
+ } else if (strcmp(el, "ETag") == 0) {
+ obj = new RGWMultiETag();
+ }
+
+ return obj;
+}
+
+bool is_v2_upload_id(const string& upload_id)
+{
+ const char *uid = upload_id.c_str();
+
+ return (strncmp(uid, MULTIPART_UPLOAD_ID_PREFIX, sizeof(MULTIPART_UPLOAD_ID_PREFIX) - 1) == 0) ||
+ (strncmp(uid, MULTIPART_UPLOAD_ID_PREFIX_LEGACY, sizeof(MULTIPART_UPLOAD_ID_PREFIX_LEGACY) - 1) == 0);
+}
+
+int list_multipart_parts(RGWRados *store, RGWBucketInfo& bucket_info,
+ CephContext *cct,
+ const string& upload_id,
+ const string& meta_oid, int num_parts,
+ int marker, map<uint32_t, RGWUploadPartInfo>& parts,
+ int *next_marker, bool *truncated,
+ bool assume_unsorted)
+{
+ map<string, bufferlist> parts_map;
+ map<string, bufferlist>::iterator iter;
+
+ rgw_obj obj;
+ obj.init_ns(bucket_info.bucket, meta_oid, RGW_OBJ_NS_MULTIPART);
+ obj.set_in_extra_data(true);
+
+ rgw_raw_obj raw_obj;
+ store->obj_to_raw(bucket_info.placement_rule, obj, &raw_obj);
+
+ bool sorted_omap = is_v2_upload_id(upload_id) && !assume_unsorted;
+
+ parts.clear();
+
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+ auto sysobj = obj_ctx.get_obj(raw_obj);
+ int ret;
+ if (sorted_omap) {
+ string p;
+ p = "part.";
+ char buf[32];
+
+ snprintf(buf, sizeof(buf), "%08d", marker);
+ p.append(buf);
+
+ ret = sysobj.omap().get_vals(p, num_parts + 1, &parts_map, nullptr);
+ } else {
+ ret = sysobj.omap().get_all(&parts_map);
+ }
+ if (ret < 0) {
+ return ret;
+ }
+
+ int i;
+ int last_num = 0;
+
+ uint32_t expected_next = marker + 1;
+
+ for (i = 0, iter = parts_map.begin();
+ (i < num_parts || !sorted_omap) && iter != parts_map.end();
+ ++iter, ++i) {
+ bufferlist& bl = iter->second;
+ auto bli = bl.cbegin();
+ RGWUploadPartInfo info;
+ try {
+ decode(info, bli);
+ } catch (buffer::error& err) {
+ ldout(cct, 0) << "ERROR: could not part info, caught buffer::error" <<
+ dendl;
+ return -EIO;
+ }
+ if (sorted_omap) {
+ if (info.num != expected_next) {
+ /* ouch, we expected a specific part num here, but we got a
+ * different one. Either a part is missing, or it could be a
+ * case of mixed rgw versions working on the same upload,
+ * where one gateway doesn't support correctly sorted omap
+ * keys for multipart upload just assume data is unsorted.
+ */
+ return list_multipart_parts(store, bucket_info, cct, upload_id,
+ meta_oid, num_parts, marker, parts,
+ next_marker, truncated, true);
+ }
+ expected_next++;
+ }
+ if (sorted_omap ||
+ (int)info.num > marker) {
+ parts[info.num] = info;
+ last_num = info.num;
+ }
+ }
+
+ if (sorted_omap) {
+ if (truncated) {
+ *truncated = (iter != parts_map.end());
+ }
+ } else {
+ /* rebuild a map with only num_parts entries */
+ map<uint32_t, RGWUploadPartInfo> new_parts;
+ map<uint32_t, RGWUploadPartInfo>::iterator piter;
+ for (i = 0, piter = parts.begin();
+ i < num_parts && piter != parts.end();
+ ++i, ++piter) {
+ new_parts[piter->first] = piter->second;
+ last_num = piter->first;
+ }
+
+ if (truncated) {
+ *truncated = (piter != parts.end());
+ }
+
+ parts.swap(new_parts);
+ }
+
+ if (next_marker) {
+ *next_marker = last_num;
+ }
+
+ return 0;
+}
+
+int list_multipart_parts(RGWRados *store, struct req_state *s,
+ const string& upload_id,
+ const string& meta_oid, int num_parts,
+ int marker, map<uint32_t, RGWUploadPartInfo>& parts,
+ int *next_marker, bool *truncated,
+ bool assume_unsorted)
+{
+ return list_multipart_parts(store, s->bucket_info, s->cct, upload_id,
+ meta_oid, num_parts, marker, parts,
+ next_marker, truncated, assume_unsorted);
+}
+
+int abort_multipart_upload(RGWRados *store, CephContext *cct,
+ RGWObjectCtx *obj_ctx, RGWBucketInfo& bucket_info,
+ RGWMPObj& mp_obj)
+{
+ rgw_obj meta_obj;
+ meta_obj.init_ns(bucket_info.bucket, mp_obj.get_meta(), RGW_OBJ_NS_MULTIPART);
+ meta_obj.set_in_extra_data(true);
+ meta_obj.index_hash_source = mp_obj.get_key();
+ cls_rgw_obj_chain chain;
+ list<rgw_obj_index_key> remove_objs;
+ map<uint32_t, RGWUploadPartInfo> obj_parts;
+ bool truncated;
+ int marker = 0;
+ int ret;
+ uint64_t parts_accounted_size = 0;
+
+ do {
+ ret = list_multipart_parts(store, bucket_info, cct,
+ mp_obj.get_upload_id(), mp_obj.get_meta(),
+ 1000, marker, obj_parts, &marker, &truncated);
+ if (ret < 0) {
+ ldout(cct, 20) << __func__ << ": list_multipart_parts returned " <<
+ ret << dendl;
+ return (ret == -ENOENT) ? -ERR_NO_SUCH_UPLOAD : ret;
+ }
+
+ for (auto obj_iter = obj_parts.begin();
+ obj_iter != obj_parts.end();
+ ++obj_iter) {
+ RGWUploadPartInfo& obj_part = obj_iter->second;
+ rgw_obj obj;
+ if (obj_part.manifest.empty()) {
+ string oid = mp_obj.get_part(obj_iter->second.num);
+ obj.init_ns(bucket_info.bucket, oid, RGW_OBJ_NS_MULTIPART);
+ obj.index_hash_source = mp_obj.get_key();
+ ret = store->delete_obj(*obj_ctx, bucket_info, obj, 0);
+ if (ret < 0 && ret != -ENOENT)
+ return ret;
+ } else {
+ store->update_gc_chain(meta_obj, obj_part.manifest, &chain);
+ RGWObjManifest::obj_iterator oiter = obj_part.manifest.obj_begin();
+ if (oiter != obj_part.manifest.obj_end()) {
+ rgw_obj head;
+ rgw_raw_obj raw_head = oiter.get_location().get_raw_obj(store);
+ rgw_raw_obj_to_obj(bucket_info.bucket, raw_head, &head);
+
+ rgw_obj_index_key key;
+ head.key.get_index_key(&key);
+ remove_objs.push_back(key);
+ }
+ }
+ parts_accounted_size += obj_part.accounted_size;
+ }
+ } while (truncated);
+
+ /* use upload id as tag and do it asynchronously */
+ ret = store->send_chain_to_gc(chain, mp_obj.get_upload_id(), false);
+ if (ret < 0) {
+ ldout(cct, 5) << __func__ << ": gc->send_chain() returned " << ret << dendl;
+ return (ret == -ENOENT) ? -ERR_NO_SUCH_UPLOAD : ret;
+ }
+
+ RGWRados::Object del_target(store, bucket_info, *obj_ctx, meta_obj);
+ RGWRados::Object::Delete del_op(&del_target);
+ del_op.params.bucket_owner = bucket_info.owner;
+ del_op.params.versioning_status = 0;
+ if (!remove_objs.empty()) {
+ del_op.params.remove_objs = &remove_objs;
+ }
+
+ del_op.params.abortmp = true;
+ del_op.params.parts_accounted_size = parts_accounted_size;
+
+ // and also remove the metadata obj
+ ret = del_op.delete_obj();
+ if (ret < 0) {
+ ldout(cct, 20) << __func__ << ": del_op.delete_obj returned " <<
+ ret << dendl;
+ }
+ return (ret == -ENOENT) ? -ERR_NO_SUCH_UPLOAD : ret;
+}
+
+int list_bucket_multiparts(RGWRados *store, RGWBucketInfo& bucket_info,
+ const string& prefix, const string& marker,
+ const string& delim,
+ const int& max_uploads,
+ vector<rgw_bucket_dir_entry> *objs,
+ map<string, bool> *common_prefixes, bool *is_truncated)
+{
+ RGWRados::Bucket target(store, bucket_info);
+ RGWRados::Bucket::List list_op(&target);
+ MultipartMetaFilter mp_filter;
+
+ list_op.params.prefix = prefix;
+ list_op.params.delim = delim;
+ list_op.params.marker = marker;
+ list_op.params.ns = RGW_OBJ_NS_MULTIPART;
+ list_op.params.filter = &mp_filter;
+
+ return(list_op.list_objects(max_uploads, objs, common_prefixes, is_truncated));
+}
+
+int abort_bucket_multiparts(RGWRados *store, CephContext *cct, RGWBucketInfo& bucket_info,
+ string& prefix, string& delim)
+{
+ constexpr int max = 1000;
+ int ret, num_deleted = 0;
+ vector<rgw_bucket_dir_entry> objs;
+ RGWObjectCtx obj_ctx(store);
+ string marker;
+ bool is_truncated;
+
+ do {
+ ret = list_bucket_multiparts(store, bucket_info, prefix, marker, delim,
+ max, &objs, nullptr, &is_truncated);
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << __func__ <<
+ " ERROR : calling list_bucket_multiparts; ret=" << ret <<
+ "; bucket=\"" << bucket_info.bucket << "\"; prefix=\"" <<
+ prefix << "\"; delim=\"" << delim << "\"" << dendl;
+ return ret;
+ }
+ ldout(store->ctx(), 20) << __func__ <<
+ " INFO: aborting and cleaning up multipart upload(s); bucket=\"" <<
+ bucket_info.bucket << "\"; objs.size()=" << objs.size() <<
+ "; is_truncated=" << is_truncated << dendl;
+
+ if (!objs.empty()) {
+ RGWMPObj mp;
+ for (const auto& obj : objs) {
+ rgw_obj_key key(obj.key);
+ if (!mp.from_meta(key.name))
+ continue;
+ ret = abort_multipart_upload(store, cct, &obj_ctx, bucket_info, mp);
+ if (ret < 0) {
+ // we're doing a best-effort; if something cannot be found,
+ // log it and keep moving forward
+ if (ret != -ENOENT && ret != -ERR_NO_SUCH_UPLOAD) {
+ ldout(store->ctx(), 0) << __func__ <<
+ " ERROR : failed to abort and clean-up multipart upload \"" <<
+ key.get_oid() << "\"" << dendl;
+ return ret;
+ } else {
+ ldout(store->ctx(), 10) << __func__ <<
+ " NOTE : unable to find part(s) of "
+ "aborted multipart upload of \"" << key.get_oid() <<
+ "\" for cleaning up" << dendl;
+ }
+ }
+ num_deleted++;
+ }
+ if (num_deleted) {
+ ldout(store->ctx(), 0) << __func__ <<
+ " WARNING : aborted " << num_deleted <<
+ " incomplete multipart uploads" << dendl;
+ }
+ }
+ } while (is_truncated);
+
+ return 0;
+}
diff --git a/src/rgw/rgw_multi.h b/src/rgw/rgw_multi.h
new file mode 100644
index 00000000..8c682888
--- /dev/null
+++ b/src/rgw/rgw_multi.h
@@ -0,0 +1,114 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_MULTI_H
+#define CEPH_RGW_MULTI_H
+
+#include <map>
+#include "rgw_xml.h"
+#include "rgw_rados.h"
+
+#define MULTIPART_UPLOAD_ID_PREFIX_LEGACY "2/"
+#define MULTIPART_UPLOAD_ID_PREFIX "2~" // must contain a unique char that may not come up in gen_rand_alpha()
+
+class RGWMultiCompleteUpload : public XMLObj
+{
+public:
+ RGWMultiCompleteUpload() {}
+ ~RGWMultiCompleteUpload() override {}
+ bool xml_end(const char *el) override;
+
+ std::map<int, string> parts;
+};
+
+class RGWMultiPart : public XMLObj
+{
+ string etag;
+ int num;
+public:
+ RGWMultiPart() : num(0) {}
+ ~RGWMultiPart() override {}
+ bool xml_end(const char *el) override;
+
+ string& get_etag() { return etag; }
+ int get_num() { return num; }
+};
+
+class RGWMultiPartNumber : public XMLObj
+{
+public:
+ RGWMultiPartNumber() {}
+ ~RGWMultiPartNumber() override {}
+};
+
+class RGWMultiETag : public XMLObj
+{
+public:
+ RGWMultiETag() {}
+ ~RGWMultiETag() override {}
+};
+
+class RGWMultiXMLParser : public RGWXMLParser
+{
+ XMLObj *alloc_obj(const char *el) override;
+public:
+ RGWMultiXMLParser() {}
+ ~RGWMultiXMLParser() override {}
+};
+
+/**
+ * A filter to a) test whether an object name is a multipart meta
+ * object, and b) filter out just the key used to determine the bucket
+ * index shard.
+ *
+ * Objects for multipart meta have names adorned with an upload id and
+ * other elements -- specifically a ".", MULTIPART_UPLOAD_ID_PREFIX,
+ * unique id, and MP_META_SUFFIX. This filter will return true when
+ * the name provided is such. It will also extract the key used for
+ * bucket index shard calculation from the adorned name.
+ */
+class MultipartMetaFilter : public RGWAccessListFilter {
+public:
+ MultipartMetaFilter() {}
+
+ /**
+ * @param name [in] The object name as it appears in the bucket index.
+ * @param key [out] An output parameter that will contain the bucket
+ * index key if this entry is in the form of a multipart meta object.
+ * @return true if the name provided is in the form of a multipart meta
+ * object, false otherwise
+ */
+ bool filter(const string& name, string& key) override;
+}; // class MultipartMetaFilter
+
+extern bool is_v2_upload_id(const string& upload_id);
+
+extern int list_multipart_parts(RGWRados *store, RGWBucketInfo& bucket_info,
+ CephContext *cct,
+ const string& upload_id,
+ const string& meta_oid, int num_parts,
+ int marker, map<uint32_t, RGWUploadPartInfo>& parts,
+ int *next_marker, bool *truncated,
+ bool assume_unsorted = false);
+
+extern int list_multipart_parts(RGWRados *store, struct req_state *s,
+ const string& upload_id,
+ const string& meta_oid, int num_parts,
+ int marker, map<uint32_t, RGWUploadPartInfo>& parts,
+ int *next_marker, bool *truncated,
+ bool assume_unsorted = false);
+
+extern int abort_multipart_upload(RGWRados *store, CephContext *cct, RGWObjectCtx *obj_ctx,
+ RGWBucketInfo& bucket_info, RGWMPObj& mp_obj);
+
+extern int list_bucket_multiparts(RGWRados *store, RGWBucketInfo& bucket_info,
+ const string& prefix,
+ const string& marker,
+ const string& delim,
+ const int& max_uploads,
+ vector<rgw_bucket_dir_entry> *objs,
+ map<string, bool> *common_prefixes, bool *is_truncated);
+
+extern int abort_bucket_multiparts(RGWRados *store, CephContext *cct, RGWBucketInfo& bucket_info,
+ string& prefix, string& delim);
+#endif
diff --git a/src/rgw/rgw_multi_del.cc b/src/rgw/rgw_multi_del.cc
new file mode 100644
index 00000000..2faa8069
--- /dev/null
+++ b/src/rgw/rgw_multi_del.cc
@@ -0,0 +1,73 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <string.h>
+
+#include <iostream>
+
+#include "include/types.h"
+
+#include "rgw_xml.h"
+#include "rgw_multi_del.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+
+
+bool RGWMultiDelObject::xml_end(const char *el)
+{
+ RGWMultiDelKey *key_obj = static_cast<RGWMultiDelKey *>(find_first("Key"));
+ RGWMultiDelVersionId *vid = static_cast<RGWMultiDelVersionId *>(find_first("VersionId"));
+
+ if (!key_obj)
+ return false;
+
+ string s = key_obj->get_data();
+ if (s.empty())
+ return false;
+
+ key = s;
+
+ if (vid) {
+ version_id = vid->get_data();
+ }
+
+ return true;
+}
+
+bool RGWMultiDelDelete::xml_end(const char *el) {
+ RGWMultiDelQuiet *quiet_set = static_cast<RGWMultiDelQuiet *>(find_first("Quiet"));
+ if (quiet_set) {
+ string quiet_val = quiet_set->get_data();
+ quiet = (strcasecmp(quiet_val.c_str(), "true") == 0);
+ }
+
+ XMLObjIter iter = find("Object");
+ RGWMultiDelObject *object = static_cast<RGWMultiDelObject *>(iter.get_next());
+ while (object) {
+ const string& key = object->get_key();
+ const string& instance = object->get_version_id();
+ rgw_obj_key k(key, instance);
+ objects.push_back(k);
+ object = static_cast<RGWMultiDelObject *>(iter.get_next());
+ }
+ return true;
+}
+
+XMLObj *RGWMultiDelXMLParser::alloc_obj(const char *el) {
+ XMLObj *obj = NULL;
+ if (strcmp(el, "Delete") == 0) {
+ obj = new RGWMultiDelDelete();
+ } else if (strcmp(el, "Quiet") == 0) {
+ obj = new RGWMultiDelQuiet();
+ } else if (strcmp(el, "Object") == 0) {
+ obj = new RGWMultiDelObject ();
+ } else if (strcmp(el, "Key") == 0) {
+ obj = new RGWMultiDelKey();
+ } else if (strcmp(el, "VersionId") == 0) {
+ obj = new RGWMultiDelVersionId();
+ }
+
+ return obj;
+}
+
diff --git a/src/rgw/rgw_multi_del.h b/src/rgw/rgw_multi_del.h
new file mode 100644
index 00000000..1ac8e491
--- /dev/null
+++ b/src/rgw/rgw_multi_del.h
@@ -0,0 +1,66 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RGW_MULTI_DELETE_H_
+#define RGW_MULTI_DELETE_H_
+
+#include <vector>
+#include "rgw_xml.h"
+#include "rgw_common.h"
+
+class RGWMultiDelDelete : public XMLObj
+{
+public:
+ RGWMultiDelDelete() :quiet(false) {}
+ ~RGWMultiDelDelete() override {}
+ bool xml_end(const char *el) override;
+
+ std::vector<rgw_obj_key> objects;
+ bool quiet;
+ bool is_quiet() { return quiet; }
+};
+
+class RGWMultiDelQuiet : public XMLObj
+{
+public:
+ RGWMultiDelQuiet() {}
+ ~RGWMultiDelQuiet() override {}
+};
+
+class RGWMultiDelObject : public XMLObj
+{
+ string key;
+ string version_id;
+public:
+ RGWMultiDelObject() {}
+ ~RGWMultiDelObject() override {}
+ bool xml_end(const char *el) override;
+
+ const string& get_key() { return key; }
+ const string& get_version_id() { return version_id; }
+};
+
+class RGWMultiDelKey : public XMLObj
+{
+public:
+ RGWMultiDelKey() {}
+ ~RGWMultiDelKey() override {}
+};
+
+class RGWMultiDelVersionId : public XMLObj
+{
+public:
+ RGWMultiDelVersionId() {}
+ ~RGWMultiDelVersionId() override {}
+};
+
+class RGWMultiDelXMLParser : public RGWXMLParser
+{
+ XMLObj *alloc_obj(const char *el) override;
+public:
+ RGWMultiDelXMLParser() {}
+ ~RGWMultiDelXMLParser() override {}
+};
+
+
+#endif
diff --git a/src/rgw/rgw_multiparser.cc b/src/rgw/rgw_multiparser.cc
new file mode 100644
index 00000000..63d70d72
--- /dev/null
+++ b/src/rgw/rgw_multiparser.cc
@@ -0,0 +1,46 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <string.h>
+
+#include <iostream>
+#include <map>
+
+#include "include/types.h"
+
+#include "rgw_multi.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+
+int main(int argc, char **argv) {
+ RGWMultiXMLParser parser;
+
+ if (!parser.init())
+ exit(1);
+
+ char buf[1024];
+
+ for (;;) {
+ int done;
+ int len;
+
+ len = fread(buf, 1, sizeof(buf), stdin);
+ if (ferror(stdin)) {
+ fprintf(stderr, "Read error\n");
+ exit(-1);
+ }
+ done = feof(stdin);
+
+ bool result = parser.parse(buf, len, done);
+ if (!result) {
+ cerr << "failed to parse!" << std::endl;
+ }
+
+ if (done)
+ break;
+ }
+
+ exit(0);
+}
+
diff --git a/src/rgw/rgw_notify.cc b/src/rgw/rgw_notify.cc
new file mode 100644
index 00000000..2104031a
--- /dev/null
+++ b/src/rgw/rgw_notify.cc
@@ -0,0 +1,141 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "rgw_notify.h"
+#include <memory>
+#include <boost/algorithm/hex.hpp>
+#include "rgw_pubsub.h"
+#include "rgw_pubsub_push.h"
+#include "rgw_perf_counters.h"
+#include "common/dout.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+namespace rgw::notify {
+
+// populate record from request
+void populate_record_from_request(const req_state *s,
+ const rgw_obj_key& key,
+ uint64_t size,
+ const ceph::real_time& mtime,
+ const std::string& etag,
+ EventType event_type,
+ rgw_pubsub_s3_record& record) {
+ record.eventTime = mtime;
+ record.eventName = to_string(event_type);
+ record.userIdentity = s->user->user_id.id; // user that triggered the change
+ record.x_amz_request_id = s->req_id; // request ID of the original change
+ record.x_amz_id_2 = s->host_id; // RGW on which the change was made
+ // configurationId is filled from notification configuration
+ record.bucket_name = s->bucket_name;
+ record.bucket_ownerIdentity = s->bucket_owner.get_id().id;
+ record.bucket_arn = to_string(rgw::ARN(s->bucket));
+ record.object_key = key.name;
+ record.object_size = size;
+ record.object_etag = etag;
+ record.object_versionId = key.instance;
+ // use timestamp as per key sequence id (hex encoded)
+ const utime_t ts(real_clock::now());
+ boost::algorithm::hex((const char*)&ts, (const char*)&ts + sizeof(utime_t),
+ std::back_inserter(record.object_sequencer));
+ set_event_id(record.id, etag, ts);
+ record.bucket_id = s->bucket.bucket_id;
+ // pass meta data
+ record.x_meta_map = s->info.x_meta_map;
+ // pass tags
+ record.tags = s->tagset.get_tags();
+ // opaque data will be filled from topic configuration
+}
+
+bool match(const rgw_pubsub_topic_filter& filter, const req_state* s, EventType event) {
+ if (!::match(filter.events, event)) {
+ return false;
+ }
+ if (!::match(filter.s3_filter.key_filter, s->object.name)) {
+ return false;
+ }
+ if (!::match(filter.s3_filter.metadata_filter, s->info.x_meta_map)) {
+ return false;
+ }
+ if (!::match(filter.s3_filter.tag_filter, s->tagset.get_tags())) {
+ return false;
+ }
+ return true;
+}
+
+int publish(const req_state* s,
+ const rgw_obj_key& key,
+ uint64_t size,
+ const ceph::real_time& mtime,
+ const std::string& etag,
+ EventType event_type,
+ RGWRados* store) {
+ RGWUserPubSub ps_user(store, s->user->user_id);
+ RGWUserPubSub::Bucket ps_bucket(&ps_user, s->bucket);
+ rgw_pubsub_bucket_topics bucket_topics;
+ auto rc = ps_bucket.get_topics(&bucket_topics);
+ if (rc < 0) {
+ // failed to fetch bucket topics
+ return rc;
+ }
+ rgw_pubsub_s3_record record;
+ populate_record_from_request(s, key, size, mtime, etag, event_type, record);
+ bool event_handled = false;
+ bool event_should_be_handled = false;
+ for (const auto& bucket_topic : bucket_topics.topics) {
+ const rgw_pubsub_topic_filter& topic_filter = bucket_topic.second;
+ const rgw_pubsub_topic& topic_cfg = topic_filter.topic;
+ if (!match(topic_filter, s, event_type)) {
+ // topic does not apply to req_state
+ continue;
+ }
+ event_should_be_handled = true;
+ record.configurationId = topic_filter.s3_id;
+ record.opaque_data = topic_cfg.opaque_data;
+ ldout(s->cct, 20) << "notification: '" << topic_filter.s3_id <<
+ "' on topic: '" << topic_cfg.dest.arn_topic <<
+ "' and bucket: '" << s->bucket.name <<
+ "' (unique topic: '" << topic_cfg.name <<
+ "') apply to event of type: '" << to_string(event_type) << "'" << dendl;
+ try {
+ // TODO add endpoint LRU cache
+ const auto push_endpoint = RGWPubSubEndpoint::create(topic_cfg.dest.push_endpoint,
+ topic_cfg.dest.arn_topic,
+ RGWHTTPArgs(topic_cfg.dest.push_endpoint_args),
+ s->cct);
+ const std::string push_endpoint_str = push_endpoint->to_str();
+ ldout(s->cct, 20) << "push endpoint created: " << push_endpoint_str << dendl;
+ auto rc = push_endpoint->send_to_completion_async(s->cct, record, s->yield);
+ if (rc < 0) {
+ // bail out on first error
+ // TODO: add conf for bail out policy
+ ldout(s->cct, 1) << "push to endpoint " << push_endpoint_str << " failed, with error: " << rc << dendl;
+ if (perfcounter) perfcounter->inc(l_rgw_pubsub_push_failed);
+ return rc;
+ }
+ if (perfcounter) perfcounter->inc(l_rgw_pubsub_push_ok);
+ ldout(s->cct, 20) << "successfull push to endpoint " << push_endpoint_str << dendl;
+ event_handled = true;
+ } catch (const RGWPubSubEndpoint::configuration_error& e) {
+ ldout(s->cct, 1) << "ERROR: failed to create push endpoint: "
+ << topic_cfg.dest.push_endpoint << " due to: " << e.what() << dendl;
+ if (perfcounter) perfcounter->inc(l_rgw_pubsub_push_failed);
+ return -EINVAL;
+ }
+ }
+
+ if (event_should_be_handled) {
+ // not counting events with no notifications or events that are filtered
+ // counting a single event, regardless of the number of notifications it sends
+ if (perfcounter) perfcounter->inc(l_rgw_pubsub_event_triggered);
+ if (!event_handled) {
+ // all notifications for this event failed
+ if (perfcounter) perfcounter->inc(l_rgw_pubsub_event_lost);
+ }
+ }
+
+ return 0;
+}
+
+}
+
diff --git a/src/rgw/rgw_notify.h b/src/rgw/rgw_notify.h
new file mode 100644
index 00000000..5b480c0e
--- /dev/null
+++ b/src/rgw/rgw_notify.h
@@ -0,0 +1,27 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <string>
+#include "common/ceph_time.h"
+#include "rgw_notify_event_type.h"
+
+// forward declarations
+class RGWRados;
+class req_state;
+struct rgw_obj_key;
+
+namespace rgw::notify {
+
+// publish notification
+int publish(const req_state* s,
+ const rgw_obj_key& key,
+ uint64_t size,
+ const ceph::real_time& mtime,
+ const std::string& etag,
+ EventType event_type,
+ RGWRados* store);
+
+}
+
diff --git a/src/rgw/rgw_notify_event_type.cc b/src/rgw/rgw_notify_event_type.cc
new file mode 100644
index 00000000..10c77c28
--- /dev/null
+++ b/src/rgw/rgw_notify_event_type.cc
@@ -0,0 +1,82 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "rgw_notify_event_type.h"
+#include "include/str_list.h"
+
+namespace rgw::notify {
+
+ std::string to_string(EventType t) {
+ switch (t) {
+ case ObjectCreated:
+ return "s3:ObjectCreated:*";
+ case ObjectCreatedPut:
+ return "s3:ObjectCreated:Put";
+ case ObjectCreatedPost:
+ return "s3:ObjectCreated:Post";
+ case ObjectCreatedCopy:
+ return "s3:ObjectCreated:Copy";
+ case ObjectCreatedCompleteMultipartUpload:
+ return "s3:ObjectCreated:CompleteMultipartUpload";
+ case ObjectRemoved:
+ return "s3:ObjectRemoved:*";
+ case ObjectRemovedDelete:
+ return "s3:ObjectRemoved:Delete";
+ case ObjectRemovedDeleteMarkerCreated:
+ return "s3:ObjectRemoved:DeleteMarkerCreated";
+ case UnknownEvent:
+ return "s3:UnknownEvet";
+ }
+ return "s3:UnknownEvent";
+ }
+
+ std::string to_ceph_string(EventType t) {
+ switch (t) {
+ case ObjectCreated:
+ case ObjectCreatedPut:
+ case ObjectCreatedPost:
+ case ObjectCreatedCopy:
+ case ObjectCreatedCompleteMultipartUpload:
+ return "OBJECT_CREATE";
+ case ObjectRemovedDelete:
+ return "OBJECT_DELETE";
+ case ObjectRemovedDeleteMarkerCreated:
+ return "DELETE_MARKER_CREATE";
+ case ObjectRemoved:
+ case UnknownEvent:
+ return "UNKNOWN_EVENT";
+ }
+ return "UNKNOWN_EVENT";
+ }
+
+ EventType from_string(const std::string& s) {
+ if (s == "s3:ObjectCreated:*" || s == "OBJECT_CREATE")
+ return ObjectCreated;
+ if (s == "s3:ObjectCreated:Put")
+ return ObjectCreatedPut;
+ if (s == "s3:ObjectCreated:Post")
+ return ObjectCreatedPost;
+ if (s == "s3:ObjectCreated:Copy")
+ return ObjectCreatedCopy;
+ if (s == "s3:ObjectCreated:CompleteMultipartUpload")
+ return ObjectCreatedCompleteMultipartUpload;
+ if (s == "s3:ObjectRemoved:*")
+ return ObjectRemoved;
+ if (s == "s3:ObjectRemoved:Delete" || s == "OBJECT_DELETE")
+ return ObjectRemovedDelete;
+ if (s == "s3:ObjectRemoved:DeleteMarkerCreated" || s == "DELETE_MARKER_CREATE")
+ return ObjectRemovedDeleteMarkerCreated;
+ return UnknownEvent;
+ }
+
+bool operator==(EventType lhs, EventType rhs) {
+ return lhs & rhs;
+}
+
+void from_string_list(const std::string& string_list, EventTypeList& event_list) {
+ event_list.clear();
+ ceph::for_each_substr(string_list, ",", [&event_list] (auto token) {
+ event_list.push_back(rgw::notify::from_string(std::string(token.begin(), token.end())));
+ });
+}
+}
diff --git a/src/rgw/rgw_notify_event_type.h b/src/rgw/rgw_notify_event_type.h
new file mode 100644
index 00000000..0d86bf3f
--- /dev/null
+++ b/src/rgw/rgw_notify_event_type.h
@@ -0,0 +1,35 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+#include <string>
+#include <vector>
+
+namespace rgw::notify {
+ enum EventType {
+ ObjectCreated = 0xF,
+ ObjectCreatedPut = 0x1,
+ ObjectCreatedPost = 0x2,
+ ObjectCreatedCopy = 0x4,
+ ObjectCreatedCompleteMultipartUpload = 0x8,
+ ObjectRemoved = 0xF0,
+ ObjectRemovedDelete = 0x10,
+ ObjectRemovedDeleteMarkerCreated = 0x20,
+ UnknownEvent = 0x100
+ };
+
+ using EventTypeList = std::vector<EventType>;
+
+ // two event types are considered equal if their bits intersect
+ bool operator==(EventType lhs, EventType rhs);
+
+ std::string to_string(EventType t);
+
+ std::string to_ceph_string(EventType t);
+
+ EventType from_string(const std::string& s);
+
+ // create a vector of event types from comma separated list of event types
+ void from_string_list(const std::string& string_list, EventTypeList& event_list);
+}
+
diff --git a/src/rgw/rgw_object_expirer.cc b/src/rgw/rgw_object_expirer.cc
new file mode 100644
index 00000000..93258b42
--- /dev/null
+++ b/src/rgw/rgw_object_expirer.cc
@@ -0,0 +1,107 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <errno.h>
+#include <iostream>
+#include <sstream>
+#include <string>
+
+
+#include "auth/Crypto.h"
+
+#include "common/armor.h"
+#include "common/ceph_json.h"
+#include "common/config.h"
+#include "common/ceph_argparse.h"
+#include "common/Formatter.h"
+#include "common/errno.h"
+
+#include "global/global_init.h"
+
+#include "include/utime.h"
+#include "include/str_list.h"
+
+#include "rgw_user.h"
+#include "rgw_bucket.h"
+#include "rgw_rados.h"
+#include "rgw_acl.h"
+#include "rgw_acl_s3.h"
+#include "rgw_log.h"
+#include "rgw_formats.h"
+#include "rgw_usage.h"
+#include "rgw_object_expirer_core.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+static RGWRados *store = NULL;
+
+class StoreDestructor {
+ RGWRados *store;
+
+public:
+ explicit StoreDestructor(RGWRados *_s) : store(_s) {}
+ ~StoreDestructor() {
+ if (store) {
+ RGWStoreManager::close_storage(store);
+ }
+ }
+};
+
+static void usage()
+{
+ generic_server_usage();
+}
+
+int main(const int argc, const char **argv)
+{
+ vector<const char *> args;
+ argv_to_vec(argc, argv, args);
+ if (args.empty()) {
+ cerr << argv[0] << ": -h or --help for usage" << std::endl;
+ exit(1);
+ }
+ if (ceph_argparse_need_usage(args)) {
+ usage();
+ exit(0);
+ }
+
+ auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT,
+ CODE_ENVIRONMENT_DAEMON,
+ CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS, "rgw_data");
+
+ for (std::vector<const char *>::iterator i = args.begin(); i != args.end(); ) {
+ if (ceph_argparse_double_dash(args, i)) {
+ break;
+ }
+ }
+
+ if (g_conf()->daemonize) {
+ global_init_daemonize(g_ceph_context);
+ }
+
+ common_init_finish(g_ceph_context);
+
+ store = RGWStoreManager::get_storage(g_ceph_context, false, false, false, false, false);
+ if (!store) {
+ std::cerr << "couldn't init storage provider" << std::endl;
+ return EIO;
+ }
+
+ rgw_user_init(store);
+ rgw_bucket_init(store->meta_mgr);
+
+ /* Guard to not forget about closing the rados store. */
+ StoreDestructor store_dtor(store);
+
+ RGWObjectExpirer objexp(store);
+ objexp.start_processor();
+
+ const utime_t interval(g_ceph_context->_conf->rgw_objexp_gc_interval, 0);
+ while (true) {
+ interval.sleep();
+ }
+
+ /* unreachable */
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/rgw/rgw_object_expirer_core.cc b/src/rgw/rgw_object_expirer_core.cc
new file mode 100644
index 00000000..b2e302ba
--- /dev/null
+++ b/src/rgw/rgw_object_expirer_core.cc
@@ -0,0 +1,294 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <errno.h>
+#include <iostream>
+#include <sstream>
+#include <string>
+
+
+#include "auth/Crypto.h"
+
+#include "common/armor.h"
+#include "common/ceph_json.h"
+#include "common/config.h"
+#include "common/ceph_argparse.h"
+#include "common/Formatter.h"
+#include "common/errno.h"
+
+#include "global/global_init.h"
+
+#include "include/utime.h"
+#include "include/str_list.h"
+
+#include "rgw_user.h"
+#include "rgw_bucket.h"
+#include "rgw_rados.h"
+#include "rgw_acl.h"
+#include "rgw_acl_s3.h"
+#include "rgw_log.h"
+#include "rgw_formats.h"
+#include "rgw_usage.h"
+#include "rgw_object_expirer_core.h"
+
+#include "services/svc_sys_obj.h"
+
+#include "cls/lock/cls_lock_client.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+static string objexp_lock_name = "gc_process";
+
+int RGWObjectExpirer::init_bucket_info(const string& tenant_name,
+ const string& bucket_name,
+ const string& bucket_id,
+ RGWBucketInfo& bucket_info)
+{
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+
+ /*
+ * XXX Here's where it gets tricky. We went to all the trouble of
+ * punching the tenant through the objexp_hint_entry, but now we
+ * find that our instances do not actually have tenants. They are
+ * unique thanks to IDs. So the tenant string is not needed...
+
+ * XXX reloaded: it turns out tenants were needed after all since bucket ids
+ * are ephemeral, good call encoding tenant info!
+ */
+
+ return store->get_bucket_info(obj_ctx, tenant_name, bucket_name,
+ bucket_info, nullptr, nullptr);
+
+}
+
+int RGWObjectExpirer::garbage_single_object(objexp_hint_entry& hint)
+{
+ RGWBucketInfo bucket_info;
+
+ int ret = init_bucket_info(hint.tenant, hint.bucket_name,
+ hint.bucket_id, bucket_info);
+ if (-ENOENT == ret) {
+ ldout(store->ctx(), 15) << "NOTICE: cannot find bucket = " \
+ << hint.bucket_name << ". The object must be already removed" << dendl;
+ return -ERR_PRECONDITION_FAILED;
+ } else if (ret < 0) {
+ ldout(store->ctx(), 1) << "ERROR: could not init bucket = " \
+ << hint.bucket_name << "due to ret = " << ret << dendl;
+ return ret;
+ }
+
+ RGWObjectCtx rctx(store);
+
+ rgw_obj_key key = hint.obj_key;
+ if (key.instance.empty()) {
+ key.instance = "null";
+ }
+
+ rgw_obj obj(bucket_info.bucket, key);
+ store->set_atomic(&rctx, obj);
+ ret = store->delete_obj(rctx, bucket_info, obj,
+ bucket_info.versioning_status(), 0, hint.exp_time);
+
+ return ret;
+}
+
+void RGWObjectExpirer::garbage_chunk(list<cls_timeindex_entry>& entries, /* in */
+ bool& need_trim) /* out */
+{
+ need_trim = false;
+
+ for (list<cls_timeindex_entry>::iterator iter = entries.begin();
+ iter != entries.end();
+ ++iter)
+ {
+ objexp_hint_entry hint;
+ ldout(store->ctx(), 15) << "got removal hint for: " << iter->key_ts.sec() \
+ << " - " << iter->key_ext << dendl;
+
+ int ret = store->objexp_hint_parse(*iter, hint);
+ if (ret < 0) {
+ ldout(store->ctx(), 1) << "cannot parse removal hint for " << hint.obj_key << dendl;
+ continue;
+ }
+
+ /* PRECOND_FAILED simply means that our hint is not valid.
+ * We can silently ignore that and move forward. */
+ ret = garbage_single_object(hint);
+ if (ret == -ERR_PRECONDITION_FAILED) {
+ ldout(store->ctx(), 15) << "not actual hint for object: " << hint.obj_key << dendl;
+ } else if (ret < 0) {
+ ldout(store->ctx(), 1) << "cannot remove expired object: " << hint.obj_key << dendl;
+ }
+
+ need_trim = true;
+ }
+
+ return;
+}
+
+void RGWObjectExpirer::trim_chunk(const string& shard,
+ const utime_t& from,
+ const utime_t& to,
+ const string& from_marker,
+ const string& to_marker)
+{
+ ldout(store->ctx(), 20) << "trying to trim removal hints to=" << to
+ << ", to_marker=" << to_marker << dendl;
+
+ real_time rt_from = from.to_real_time();
+ real_time rt_to = to.to_real_time();
+
+ int ret = store->objexp_hint_trim(shard, rt_from, rt_to,
+ from_marker, to_marker);
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "ERROR during trim: " << ret << dendl;
+ }
+
+ return;
+}
+
+bool RGWObjectExpirer::process_single_shard(const string& shard,
+ const utime_t& last_run,
+ const utime_t& round_start)
+{
+ string marker;
+ string out_marker;
+ bool truncated = false;
+ bool done = true;
+
+ CephContext *cct = store->ctx();
+ int num_entries = cct->_conf->rgw_objexp_chunk_size;
+
+ int max_secs = cct->_conf->rgw_objexp_gc_interval;
+ utime_t end = ceph_clock_now();
+ end += max_secs;
+
+ rados::cls::lock::Lock l(objexp_lock_name);
+
+ utime_t time(max_secs, 0);
+ l.set_duration(time);
+
+ int ret = l.lock_exclusive(&store->objexp_pool_ctx, shard);
+ if (ret == -EBUSY) { /* already locked by another processor */
+ dout(5) << __func__ << "(): failed to acquire lock on " << shard << dendl;
+ return false;
+ }
+
+ do {
+ real_time rt_last = last_run.to_real_time();
+ real_time rt_start = round_start.to_real_time();
+
+ list<cls_timeindex_entry> entries;
+ ret = store->objexp_hint_list(shard, rt_last, rt_start,
+ num_entries, marker, entries,
+ &out_marker, &truncated);
+ if (ret < 0) {
+ ldout(cct, 10) << "cannot get removal hints from shard: " << shard
+ << dendl;
+ continue;
+ }
+
+ bool need_trim;
+ garbage_chunk(entries, need_trim);
+
+ if (need_trim) {
+ trim_chunk(shard, last_run, round_start, marker, out_marker);
+ }
+
+ utime_t now = ceph_clock_now();
+ if (now >= end) {
+ done = false;
+ break;
+ }
+
+ marker = out_marker;
+ } while (truncated);
+
+ l.unlock(&store->objexp_pool_ctx, shard);
+ return done;
+}
+
+/* Returns true if all shards have been processed successfully. */
+bool RGWObjectExpirer::inspect_all_shards(const utime_t& last_run,
+ const utime_t& round_start)
+{
+ CephContext * const cct = store->ctx();
+ int num_shards = cct->_conf->rgw_objexp_hints_num_shards;
+ bool all_done = true;
+
+ for (int i = 0; i < num_shards; i++) {
+ string shard;
+ store->objexp_get_shard(i, shard);
+
+ ldout(store->ctx(), 20) << "processing shard = " << shard << dendl;
+
+ if (! process_single_shard(shard, last_run, round_start)) {
+ all_done = false;
+ }
+ }
+
+ return all_done;
+}
+
+bool RGWObjectExpirer::going_down()
+{
+ return down_flag;
+}
+
+void RGWObjectExpirer::start_processor()
+{
+ worker = new OEWorker(store->ctx(), this);
+ worker->create("rgw_obj_expirer");
+}
+
+void RGWObjectExpirer::stop_processor()
+{
+ down_flag = true;
+ if (worker) {
+ worker->stop();
+ worker->join();
+ }
+ delete worker;
+ worker = NULL;
+}
+
+void *RGWObjectExpirer::OEWorker::entry() {
+ utime_t last_run;
+ do {
+ utime_t start = ceph_clock_now();
+ ldout(cct, 2) << "object expiration: start" << dendl;
+ if (oe->inspect_all_shards(last_run, start)) {
+ /* All shards have been processed properly. Next time we can start
+ * from this moment. */
+ last_run = start;
+ }
+ ldout(cct, 2) << "object expiration: stop" << dendl;
+
+
+ if (oe->going_down())
+ break;
+
+ utime_t end = ceph_clock_now();
+ end -= start;
+ int secs = cct->_conf->rgw_objexp_gc_interval;
+
+ if (secs <= end.sec())
+ continue; // next round
+
+ secs -= end.sec();
+
+ lock.Lock();
+ cond.WaitInterval(lock, utime_t(secs, 0));
+ lock.Unlock();
+ } while (!oe->going_down());
+
+ return NULL;
+}
+
+void RGWObjectExpirer::OEWorker::stop()
+{
+ Mutex::Locker l(lock);
+ cond.Signal();
+}
+
diff --git a/src/rgw/rgw_object_expirer_core.h b/src/rgw/rgw_object_expirer_core.h
new file mode 100644
index 00000000..c3caff5c
--- /dev/null
+++ b/src/rgw/rgw_object_expirer_core.h
@@ -0,0 +1,100 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_OBJEXP_H
+#define CEPH_OBJEXP_H
+
+#include <atomic>
+#include <string>
+#include <cerrno>
+#include <sstream>
+#include <iostream>
+
+#include "auth/Crypto.h"
+
+#include "common/armor.h"
+#include "common/ceph_json.h"
+#include "common/config.h"
+#include "common/ceph_argparse.h"
+#include "common/Formatter.h"
+#include "common/errno.h"
+
+#include "common/Mutex.h"
+#include "common/Cond.h"
+#include "common/Thread.h"
+
+#include "global/global_init.h"
+
+#include "include/utime.h"
+#include "include/str_list.h"
+
+#include "rgw_user.h"
+#include "rgw_bucket.h"
+#include "rgw_rados.h"
+#include "rgw_acl.h"
+#include "rgw_acl_s3.h"
+#include "rgw_log.h"
+#include "rgw_formats.h"
+#include "rgw_usage.h"
+
+class RGWObjectExpirer {
+protected:
+ RGWRados *store;
+
+ int init_bucket_info(const std::string& tenant_name,
+ const std::string& bucket_name,
+ const std::string& bucket_id,
+ RGWBucketInfo& bucket_info);
+
+ class OEWorker : public Thread {
+ CephContext *cct;
+ RGWObjectExpirer *oe;
+ Mutex lock;
+ Cond cond;
+
+ public:
+ OEWorker(CephContext * const cct,
+ RGWObjectExpirer * const oe)
+ : cct(cct),
+ oe(oe),
+ lock("OEWorker") {
+ }
+
+ void *entry() override;
+ void stop();
+ };
+
+ OEWorker *worker{nullptr};
+ std::atomic<bool> down_flag = { false };
+
+public:
+ explicit RGWObjectExpirer(RGWRados *_store)
+ : store(_store), worker(NULL) {
+ }
+ ~RGWObjectExpirer() {
+ stop_processor();
+ }
+
+ int garbage_single_object(objexp_hint_entry& hint);
+
+ void garbage_chunk(std::list<cls_timeindex_entry>& entries, /* in */
+ bool& need_trim); /* out */
+
+ void trim_chunk(const std::string& shard,
+ const utime_t& from,
+ const utime_t& to,
+ const string& from_marker,
+ const string& to_marker);
+
+ bool process_single_shard(const std::string& shard,
+ const utime_t& last_run,
+ const utime_t& round_start);
+
+ bool inspect_all_shards(const utime_t& last_run,
+ const utime_t& round_start);
+
+ bool going_down();
+ void start_processor();
+ void stop_processor();
+};
+#endif /* CEPH_OBJEXP_H */
diff --git a/src/rgw/rgw_object_lock.cc b/src/rgw/rgw_object_lock.cc
new file mode 100644
index 00000000..69da8881
--- /dev/null
+++ b/src/rgw/rgw_object_lock.cc
@@ -0,0 +1,96 @@
+#include "rgw_object_lock.h"
+
+void DefaultRetention::decode_xml(XMLObj *obj) {
+ RGWXMLDecoder::decode_xml("Mode", mode, obj, true);
+ if (mode.compare("GOVERNANCE") != 0 && mode.compare("COMPLIANCE") != 0) {
+ throw RGWXMLDecoder::err("bad Mode in lock rule");
+ }
+ bool days_exist = RGWXMLDecoder::decode_xml("Days", days, obj);
+ bool years_exist = RGWXMLDecoder::decode_xml("Years", years, obj);
+ if ((days_exist && years_exist) || (!days_exist && !years_exist)) {
+ throw RGWXMLDecoder::err("either Days or Years must be specified, but not both");
+ }
+}
+
+void DefaultRetention::dump_xml(Formatter *f) const {
+ encode_xml("Mode", mode, f);
+ if (days > 0) {
+ encode_xml("Days", days, f);
+ } else {
+ encode_xml("Years", years, f);
+ }
+}
+
+void ObjectLockRule::decode_xml(XMLObj *obj) {
+ RGWXMLDecoder::decode_xml("DefaultRetention", defaultRetention, obj, true);
+}
+
+void ObjectLockRule::dump_xml(Formatter *f) const {
+ encode_xml("DefaultRetention", defaultRetention, f);
+}
+
+void RGWObjectLock::decode_xml(XMLObj *obj) {
+ string enabled_str;
+ RGWXMLDecoder::decode_xml("ObjectLockEnabled", enabled_str, obj, true);
+ if (enabled_str.compare("Enabled") != 0) {
+ throw RGWXMLDecoder::err("invalid ObjectLockEnabled value");
+ } else {
+ enabled = true;
+ }
+ rule_exist = RGWXMLDecoder::decode_xml("Rule", rule, obj);
+}
+
+void RGWObjectLock::dump_xml(Formatter *f) const {
+ if (enabled) {
+ encode_xml("ObjectLockEnabled", "Enabled", f);
+ }
+ if (rule_exist) {
+ encode_xml("Rule", rule, f);
+ }
+}
+
+ceph::real_time RGWObjectLock::get_lock_until_date(const ceph::real_time& mtime) const {
+ if (!rule_exist) {
+ return ceph::real_time();
+ }
+ int days = get_days();
+ if (days <= 0) {
+ days = get_years()*365;
+ }
+ return mtime + make_timespan(days*24*60*60);
+}
+
+void RGWObjectRetention::decode_xml(XMLObj *obj) {
+ RGWXMLDecoder::decode_xml("Mode", mode, obj, true);
+ if (mode.compare("GOVERNANCE") != 0 && mode.compare("COMPLIANCE") != 0) {
+ throw RGWXMLDecoder::err("bad Mode in retention");
+ }
+ string date_str;
+ RGWXMLDecoder::decode_xml("RetainUntilDate", date_str, obj, true);
+ boost::optional<ceph::real_time> date = ceph::from_iso_8601(date_str);
+ if (boost::none == date) {
+ throw RGWXMLDecoder::err("invalid RetainUntilDate value");
+ }
+ retain_until_date = *date;
+}
+
+void RGWObjectRetention::dump_xml(Formatter *f) const {
+ encode_xml("Mode", mode, f);
+ string date = ceph::to_iso_8601(retain_until_date);
+ encode_xml("RetainUntilDate", date, f);
+}
+
+void RGWObjectLegalHold::decode_xml(XMLObj *obj) {
+ RGWXMLDecoder::decode_xml("Status", status, obj, true);
+ if (status.compare("ON") != 0 && status.compare("OFF") != 0) {
+ throw RGWXMLDecoder::err("bad status in legal hold");
+ }
+}
+
+void RGWObjectLegalHold::dump_xml(Formatter *f) const {
+ encode_xml("Status", status, f);
+}
+
+bool RGWObjectLegalHold::is_enabled() const {
+ return status.compare("ON") == 0;
+}
diff --git a/src/rgw/rgw_object_lock.h b/src/rgw/rgw_object_lock.h
new file mode 100644
index 00000000..63990d62
--- /dev/null
+++ b/src/rgw/rgw_object_lock.h
@@ -0,0 +1,221 @@
+#ifndef CEPH_RGW_OBJECT_LOCK_H
+#define CEPH_RGW_OBJECT_LOCK_H
+
+#include <string>
+#include "common/ceph_time.h"
+#include "common/iso_8601.h"
+#include "rgw_xml.h"
+
+class DefaultRetention
+{
+protected:
+ string mode;
+ int days;
+ int years;
+
+public:
+ DefaultRetention(): days(0), years(0) {};
+
+ int get_days() const {
+ return days;
+ }
+
+ int get_years() const {
+ return years;
+ }
+
+ string get_mode() const {
+ return mode;
+ }
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(mode, bl);
+ encode(days, bl);
+ encode(years, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(mode, bl);
+ decode(days, bl);
+ decode(years, bl);
+ DECODE_FINISH(bl);
+ }
+
+ void decode_xml(XMLObj *obj);
+ void dump_xml(Formatter *f) const;
+};
+WRITE_CLASS_ENCODER(DefaultRetention)
+
+class ObjectLockRule
+{
+protected:
+ DefaultRetention defaultRetention;
+public:
+ int get_days() const {
+ return defaultRetention.get_days();
+ }
+
+ int get_years() const {
+ return defaultRetention.get_years();
+ }
+
+ string get_mode() const {
+ return defaultRetention.get_mode();
+ }
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(defaultRetention, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(defaultRetention, bl);
+ DECODE_FINISH(bl);
+ }
+
+ void decode_xml(XMLObj *obj);
+ void dump_xml(Formatter *f) const;
+};
+WRITE_CLASS_ENCODER(ObjectLockRule)
+
+class RGWObjectLock
+{
+protected:
+ bool enabled;
+ bool rule_exist;
+ ObjectLockRule rule;
+
+public:
+ RGWObjectLock():enabled(true), rule_exist(false) {}
+
+ int get_days() const {
+ return rule.get_days();
+ }
+
+ int get_years() const {
+ return rule.get_years();
+ }
+
+ string get_mode() const {
+ return rule.get_mode();
+ }
+
+ bool retention_period_valid() const {
+ // DefaultRetention requires either Days or Years.
+ // You can't specify both at the same time.
+ // see https://docs.aws.amazon.com/AmazonS3/latest/API/RESTBucketPUTObjectLockConfiguration.html
+ return (get_years() > 0) != (get_days() > 0);
+ }
+
+ bool has_rule() const {
+ return rule_exist;
+ }
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(enabled, bl);
+ encode(rule_exist, bl);
+ if (rule_exist) {
+ encode(rule, bl);
+ }
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(enabled, bl);
+ decode(rule_exist, bl);
+ if (rule_exist) {
+ decode(rule, bl);
+ }
+ DECODE_FINISH(bl);
+ }
+
+ void decode_xml(XMLObj *obj);
+ void dump_xml(Formatter *f) const;
+ ceph::real_time get_lock_until_date(const ceph::real_time& mtime) const;
+};
+WRITE_CLASS_ENCODER(RGWObjectLock)
+
+class RGWObjectRetention
+{
+protected:
+ string mode;
+ ceph::real_time retain_until_date;
+public:
+ RGWObjectRetention() {}
+ RGWObjectRetention(string _mode, ceph::real_time _date): mode(_mode), retain_until_date(_date) {}
+
+ void set_mode(string _mode) {
+ mode = _mode;
+ }
+
+ string get_mode() const {
+ return mode;
+ }
+
+ void set_retain_until_date(ceph::real_time _retain_until_date) {
+ retain_until_date = _retain_until_date;
+ }
+
+ ceph::real_time get_retain_until_date() const {
+ return retain_until_date;
+ }
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(mode, bl);
+ encode(retain_until_date, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(mode, bl);
+ decode(retain_until_date, bl);
+ DECODE_FINISH(bl);
+ }
+
+ void decode_xml(XMLObj *obj);
+ void dump_xml(Formatter *f) const;
+};
+WRITE_CLASS_ENCODER(RGWObjectRetention)
+
+class RGWObjectLegalHold
+{
+protected:
+ string status;
+public:
+ RGWObjectLegalHold() {}
+ RGWObjectLegalHold(string _status): status(_status) {}
+ void set_status(string _status) {
+ status = _status;
+ }
+
+ string get_status() const {
+ return status;
+ }
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(status, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(status, bl);
+ DECODE_FINISH(bl);
+ }
+
+ void decode_xml(XMLObj *obj);
+ void dump_xml(Formatter *f) const;
+ bool is_enabled() const;
+};
+WRITE_CLASS_ENCODER(RGWObjectLegalHold)
+#endif //CEPH_RGW_OBJECT_LOCK_H
diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc
new file mode 100644
index 00000000..c2501b78
--- /dev/null
+++ b/src/rgw/rgw_op.cc
@@ -0,0 +1,7942 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <errno.h>
+#include <stdlib.h>
+#include <system_error>
+#include <unistd.h>
+
+#include <sstream>
+
+#include <boost/algorithm/string/predicate.hpp>
+#include <boost/bind.hpp>
+#include <boost/optional.hpp>
+#include <boost/utility/in_place_factory.hpp>
+#include <boost/utility/string_view.hpp>
+
+#include "include/scope_guard.h"
+#include "common/Clock.h"
+#include "common/armor.h"
+#include "common/errno.h"
+#include "common/mime.h"
+#include "common/utf8.h"
+#include "common/ceph_json.h"
+#include "common/static_ptr.h"
+
+#include "rgw_rados.h"
+#include "rgw_zone.h"
+#include "rgw_op.h"
+#include "rgw_rest.h"
+#include "rgw_acl.h"
+#include "rgw_acl_s3.h"
+#include "rgw_acl_swift.h"
+#include "rgw_aio_throttle.h"
+#include "rgw_user.h"
+#include "rgw_bucket.h"
+#include "rgw_log.h"
+#include "rgw_multi.h"
+#include "rgw_multi_del.h"
+#include "rgw_cors.h"
+#include "rgw_cors_s3.h"
+#include "rgw_rest_conn.h"
+#include "rgw_rest_s3.h"
+#include "rgw_tar.h"
+#include "rgw_client_io.h"
+#include "rgw_compression.h"
+#include "rgw_role.h"
+#include "rgw_tag_s3.h"
+#include "rgw_putobj_processor.h"
+#include "rgw_crypt.h"
+#include "rgw_perf_counters.h"
+#include "rgw_notify.h"
+#include "rgw_notify_event_type.h"
+
+#include "services/svc_zone.h"
+#include "services/svc_quota.h"
+#include "services/svc_sys_obj.h"
+
+#include "cls/lock/cls_lock_client.h"
+#include "cls/rgw/cls_rgw_client.h"
+
+
+#include "include/ceph_assert.h"
+
+#include "compressor/Compressor.h"
+
+#ifdef WITH_LTTNG
+#define TRACEPOINT_DEFINE
+#define TRACEPOINT_PROBE_DYNAMIC_LINKAGE
+#include "tracing/rgw_op.h"
+#undef TRACEPOINT_PROBE_DYNAMIC_LINKAGE
+#undef TRACEPOINT_DEFINE
+#else
+#define tracepoint(...)
+#endif
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+using namespace librados;
+using ceph::crypto::MD5;
+using boost::optional;
+using boost::none;
+
+using rgw::ARN;
+using rgw::IAM::Effect;
+using rgw::IAM::Policy;
+
+static string mp_ns = RGW_OBJ_NS_MULTIPART;
+static string shadow_ns = RGW_OBJ_NS_SHADOW;
+
+static void forward_req_info(CephContext *cct, req_info& info, const std::string& bucket_name);
+static int forward_request_to_master(struct req_state *s, obj_version *objv, RGWRados *store,
+ bufferlist& in_data, JSONParser *jp, req_info *forward_info = nullptr);
+
+static MultipartMetaFilter mp_filter;
+
+// this probably should belong in the rgw_iam_policy_keywords, I'll get it to it
+// at some point
+static constexpr auto S3_EXISTING_OBJTAG = "s3:ExistingObjectTag";
+
+int RGWGetObj::parse_range(void)
+{
+ int r = -ERANGE;
+ string rs(range_str);
+ string ofs_str;
+ string end_str;
+
+ ignore_invalid_range = s->cct->_conf->rgw_ignore_get_invalid_range;
+ partial_content = false;
+
+ size_t pos = rs.find("bytes=");
+ if (pos == string::npos) {
+ pos = 0;
+ while (isspace(rs[pos]))
+ pos++;
+ int end = pos;
+ while (isalpha(rs[end]))
+ end++;
+ if (strncasecmp(rs.c_str(), "bytes", end - pos) != 0)
+ return 0;
+ while (isspace(rs[end]))
+ end++;
+ if (rs[end] != '=')
+ return 0;
+ rs = rs.substr(end + 1);
+ } else {
+ rs = rs.substr(pos + 6); /* size of("bytes=") */
+ }
+ pos = rs.find('-');
+ if (pos == string::npos)
+ goto done;
+
+ partial_content = true;
+
+ ofs_str = rs.substr(0, pos);
+ end_str = rs.substr(pos + 1);
+ if (end_str.length()) {
+ end = atoll(end_str.c_str());
+ if (end < 0)
+ goto done;
+ }
+
+ if (ofs_str.length()) {
+ ofs = atoll(ofs_str.c_str());
+ } else { // RFC2616 suffix-byte-range-spec
+ ofs = -end;
+ end = -1;
+ }
+
+ if (end >= 0 && end < ofs)
+ goto done;
+
+ range_parsed = true;
+ return 0;
+
+done:
+ if (ignore_invalid_range) {
+ partial_content = false;
+ ofs = 0;
+ end = -1;
+ range_parsed = false; // allow retry
+ r = 0;
+ }
+
+ return r;
+}
+
+static int decode_policy(CephContext *cct,
+ bufferlist& bl,
+ RGWAccessControlPolicy *policy)
+{
+ auto iter = bl.cbegin();
+ try {
+ policy->decode(iter);
+ } catch (buffer::error& err) {
+ ldout(cct, 0) << "ERROR: could not decode policy, caught buffer::error" << dendl;
+ return -EIO;
+ }
+ if (cct->_conf->subsys.should_gather<ceph_subsys_rgw, 15>()) {
+ ldout(cct, 15) << __func__ << " Read AccessControlPolicy";
+ RGWAccessControlPolicy_S3 *s3policy = static_cast<RGWAccessControlPolicy_S3 *>(policy);
+ s3policy->to_xml(*_dout);
+ *_dout << dendl;
+ }
+ return 0;
+}
+
+
+static int get_user_policy_from_attr(CephContext * const cct,
+ RGWRados * const store,
+ map<string, bufferlist>& attrs,
+ RGWAccessControlPolicy& policy /* out */)
+{
+ auto aiter = attrs.find(RGW_ATTR_ACL);
+ if (aiter != attrs.end()) {
+ int ret = decode_policy(cct, aiter->second, &policy);
+ if (ret < 0) {
+ return ret;
+ }
+ } else {
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+static int get_bucket_instance_policy_from_attr(CephContext *cct,
+ RGWRados *store,
+ RGWBucketInfo& bucket_info,
+ map<string, bufferlist>& bucket_attrs,
+ RGWAccessControlPolicy *policy)
+{
+ map<string, bufferlist>::iterator aiter = bucket_attrs.find(RGW_ATTR_ACL);
+
+ if (aiter != bucket_attrs.end()) {
+ int ret = decode_policy(cct, aiter->second, policy);
+ if (ret < 0)
+ return ret;
+ } else {
+ ldout(cct, 0) << "WARNING: couldn't find acl header for bucket, generating default" << dendl;
+ RGWUserInfo uinfo;
+ /* object exists, but policy is broken */
+ int r = rgw_get_user_info_by_uid(store, bucket_info.owner, uinfo);
+ if (r < 0)
+ return r;
+
+ policy->create_default(bucket_info.owner, uinfo.display_name);
+ }
+ return 0;
+}
+
+static int get_obj_policy_from_attr(CephContext *cct,
+ RGWRados *store,
+ RGWObjectCtx& obj_ctx,
+ RGWBucketInfo& bucket_info,
+ map<string, bufferlist>& bucket_attrs,
+ RGWAccessControlPolicy *policy,
+ string *storage_class,
+ rgw_obj& obj)
+{
+ bufferlist bl;
+ int ret = 0;
+
+ RGWRados::Object op_target(store, bucket_info, obj_ctx, obj);
+ RGWRados::Object::Read rop(&op_target);
+
+ ret = rop.get_attr(RGW_ATTR_ACL, bl);
+ if (ret >= 0) {
+ ret = decode_policy(cct, bl, policy);
+ if (ret < 0)
+ return ret;
+ } else if (ret == -ENODATA) {
+ /* object exists, but policy is broken */
+ ldout(cct, 0) << "WARNING: couldn't find acl header for object, generating default" << dendl;
+ RGWUserInfo uinfo;
+ ret = rgw_get_user_info_by_uid(store, bucket_info.owner, uinfo);
+ if (ret < 0)
+ return ret;
+
+ policy->create_default(bucket_info.owner, uinfo.display_name);
+ }
+
+ if (storage_class) {
+ bufferlist scbl;
+ int r = rop.get_attr(RGW_ATTR_STORAGE_CLASS, scbl);
+ if (r >= 0) {
+ *storage_class = scbl.to_str();
+ } else {
+ storage_class->clear();
+ }
+ }
+
+ return ret;
+}
+
+
+/**
+ * Get the AccessControlPolicy for an object off of disk.
+ * policy: must point to a valid RGWACL, and will be filled upon return.
+ * bucket: name of the bucket containing the object.
+ * object: name of the object to get the ACL for.
+ * Returns: 0 on success, -ERR# otherwise.
+ */
+int rgw_op_get_bucket_policy_from_attr(CephContext *cct,
+ RGWRados *store,
+ RGWBucketInfo& bucket_info,
+ map<string, bufferlist>& bucket_attrs,
+ RGWAccessControlPolicy *policy)
+{
+ return get_bucket_instance_policy_from_attr(cct, store, bucket_info, bucket_attrs, policy);
+}
+
+static boost::optional<Policy> get_iam_policy_from_attr(CephContext* cct,
+ RGWRados* store,
+ map<string, bufferlist>& attrs,
+ const string& tenant) {
+ auto i = attrs.find(RGW_ATTR_IAM_POLICY);
+ if (i != attrs.end()) {
+ return Policy(cct, tenant, i->second);
+ } else {
+ return none;
+ }
+}
+
+vector<Policy> get_iam_user_policy_from_attr(CephContext* cct,
+ RGWRados* store,
+ map<string, bufferlist>& attrs,
+ const string& tenant) {
+ vector<Policy> policies;
+ if (auto it = attrs.find(RGW_ATTR_USER_POLICY); it != attrs.end()) {
+ bufferlist out_bl = attrs[RGW_ATTR_USER_POLICY];
+ map<string, string> policy_map;
+ decode(policy_map, out_bl);
+ for (auto& it : policy_map) {
+ bufferlist bl = bufferlist::static_from_string(it.second);
+ Policy p(cct, tenant, bl);
+ policies.push_back(std::move(p));
+ }
+ }
+ return policies;
+}
+
+static int get_obj_attrs(RGWRados *store, struct req_state *s, const rgw_obj& obj, map<string, bufferlist>& attrs, rgw_obj *target_obj = nullptr)
+{
+ RGWRados::Object op_target(store, s->bucket_info, *static_cast<RGWObjectCtx *>(s->obj_ctx), obj);
+ RGWRados::Object::Read read_op(&op_target);
+
+ read_op.params.attrs = &attrs;
+ read_op.params.target_obj = target_obj;
+
+ return read_op.prepare();
+}
+
+static int get_obj_head(RGWRados *store, struct req_state *s,
+ const rgw_obj& obj,
+ map<string, bufferlist> *attrs,
+ bufferlist *pbl)
+{
+ store->set_prefetch_data(s->obj_ctx, obj);
+
+ RGWRados::Object op_target(store, s->bucket_info, *static_cast<RGWObjectCtx *>(s->obj_ctx), obj);
+ RGWRados::Object::Read read_op(&op_target);
+
+ read_op.params.attrs = attrs;
+
+ int ret = read_op.prepare();
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (!pbl) {
+ return 0;
+ }
+
+ ret = read_op.read(0, s->cct->_conf->rgw_max_chunk_size, *pbl);
+
+ return 0;
+}
+
+struct multipart_upload_info
+{
+ rgw_placement_rule dest_placement;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(dest_placement, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(dest_placement, bl);
+ DECODE_FINISH(bl);
+ }
+};
+WRITE_CLASS_ENCODER(multipart_upload_info)
+
+static int get_multipart_info(RGWRados *store, struct req_state *s,
+ const rgw_obj& obj,
+ RGWAccessControlPolicy *policy,
+ map<string, bufferlist> *attrs,
+ multipart_upload_info *upload_info)
+{
+ bufferlist header;
+
+ bufferlist headbl;
+ bufferlist *pheadbl = (upload_info ? &headbl : nullptr);
+
+ int op_ret = get_obj_head(store, s, obj, attrs, pheadbl);
+ if (op_ret < 0) {
+ if (op_ret == -ENOENT) {
+ return -ERR_NO_SUCH_UPLOAD;
+ }
+ return op_ret;
+ }
+
+ if (upload_info && headbl.length() > 0) {
+ auto hiter = headbl.cbegin();
+ try {
+ decode(*upload_info, hiter);
+ } catch (buffer::error& err) {
+ ldpp_dout(s, 0) << "ERROR: failed to decode multipart upload info" << dendl;
+ return -EIO;
+ }
+ }
+
+ if (policy && attrs) {
+ for (auto& iter : *attrs) {
+ string name = iter.first;
+ if (name.compare(RGW_ATTR_ACL) == 0) {
+ bufferlist& bl = iter.second;
+ auto bli = bl.cbegin();
+ try {
+ decode(*policy, bli);
+ } catch (buffer::error& err) {
+ ldpp_dout(s, 0) << "ERROR: could not decode policy" << dendl;
+ return -EIO;
+ }
+ break;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int get_multipart_info(RGWRados *store, struct req_state *s,
+ const string& meta_oid,
+ RGWAccessControlPolicy *policy,
+ map<string, bufferlist> *attrs,
+ multipart_upload_info *upload_info)
+{
+ map<string, bufferlist>::iterator iter;
+ bufferlist header;
+
+ rgw_obj meta_obj;
+ meta_obj.init_ns(s->bucket, meta_oid, mp_ns);
+ meta_obj.set_in_extra_data(true);
+
+ return get_multipart_info(store, s, meta_obj, policy, attrs, upload_info);
+}
+
+static int modify_obj_attr(RGWRados *store, struct req_state *s, const rgw_obj& obj, const char* attr_name, bufferlist& attr_val)
+{
+ map<string, bufferlist> attrs;
+ RGWRados::Object op_target(store, s->bucket_info, *static_cast<RGWObjectCtx *>(s->obj_ctx), obj);
+ RGWRados::Object::Read read_op(&op_target);
+
+ read_op.params.attrs = &attrs;
+
+ int r = read_op.prepare();
+ if (r < 0) {
+ return r;
+ }
+ store->set_atomic(s->obj_ctx, read_op.state.obj);
+ attrs[attr_name] = attr_val;
+ return store->set_attrs(s->obj_ctx, s->bucket_info, read_op.state.obj, attrs, NULL);
+}
+
+static int read_bucket_policy(RGWRados *store,
+ struct req_state *s,
+ RGWBucketInfo& bucket_info,
+ map<string, bufferlist>& bucket_attrs,
+ RGWAccessControlPolicy *policy,
+ rgw_bucket& bucket)
+{
+ if (!s->system_request && bucket_info.flags & BUCKET_SUSPENDED) {
+ ldpp_dout(s, 0) << "NOTICE: bucket " << bucket_info.bucket.name
+ << " is suspended" << dendl;
+ return -ERR_USER_SUSPENDED;
+ }
+
+ if (bucket.name.empty()) {
+ return 0;
+ }
+
+ int ret = rgw_op_get_bucket_policy_from_attr(s->cct, store, bucket_info, bucket_attrs, policy);
+ if (ret == -ENOENT) {
+ ret = -ERR_NO_SUCH_BUCKET;
+ }
+
+ return ret;
+}
+
+static int read_obj_policy(RGWRados *store,
+ struct req_state *s,
+ RGWBucketInfo& bucket_info,
+ map<string, bufferlist>& bucket_attrs,
+ RGWAccessControlPolicy* acl,
+ string *storage_class,
+ boost::optional<Policy>& policy,
+ rgw_bucket& bucket,
+ rgw_obj_key& object)
+{
+ string upload_id;
+ upload_id = s->info.args.get("uploadId");
+ rgw_obj obj;
+
+ if (!s->system_request && bucket_info.flags & BUCKET_SUSPENDED) {
+ ldpp_dout(s, 0) << "NOTICE: bucket " << bucket_info.bucket.name
+ << " is suspended" << dendl;
+ return -ERR_USER_SUSPENDED;
+ }
+
+ if (!upload_id.empty()) {
+ /* multipart upload */
+ RGWMPObj mp(object.name, upload_id);
+ string oid = mp.get_meta();
+ obj.init_ns(bucket, oid, mp_ns);
+ obj.set_in_extra_data(true);
+ } else {
+ obj = rgw_obj(bucket, object);
+ }
+ policy = get_iam_policy_from_attr(s->cct, store, bucket_attrs, bucket.tenant);
+
+ RGWObjectCtx *obj_ctx = static_cast<RGWObjectCtx *>(s->obj_ctx);
+ int ret = get_obj_policy_from_attr(s->cct, store, *obj_ctx,
+ bucket_info, bucket_attrs, acl, storage_class, obj);
+ if (ret == -ENOENT) {
+ /* object does not exist checking the bucket's ACL to make sure
+ that we send a proper error code */
+ RGWAccessControlPolicy bucket_policy(s->cct);
+ ret = rgw_op_get_bucket_policy_from_attr(s->cct, store, bucket_info, bucket_attrs, &bucket_policy);
+ if (ret < 0) {
+ return ret;
+ }
+ const rgw_user& bucket_owner = bucket_policy.get_owner().get_id();
+ if (bucket_owner.compare(s->user->user_id) != 0 &&
+ ! s->auth.identity->is_admin_of(bucket_owner)) {
+ if (policy) {
+ auto r = policy->eval(s->env, *s->auth.identity, rgw::IAM::s3ListBucket, ARN(bucket));
+ if (r == Effect::Allow)
+ return -ENOENT;
+ if (r == Effect::Deny)
+ return -EACCES;
+ }
+ if (! bucket_policy.verify_permission(s, *s->auth.identity, s->perm_mask, RGW_PERM_READ))
+ ret = -EACCES;
+ else
+ ret = -ENOENT;
+ } else {
+ ret = -ENOENT;
+ }
+ }
+
+ return ret;
+}
+
+/**
+ * Get the AccessControlPolicy for an user, bucket or object off of disk.
+ * s: The req_state to draw information from.
+ * only_bucket: If true, reads the user and bucket ACLs rather than the object ACL.
+ * Returns: 0 on success, -ERR# otherwise.
+ */
+int rgw_build_bucket_policies(RGWRados* store, struct req_state* s)
+{
+ int ret = 0;
+ rgw_obj_key obj;
+ RGWUserInfo bucket_owner_info;
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+
+ string bi = s->info.args.get(RGW_SYS_PARAM_PREFIX "bucket-instance");
+ if (!bi.empty()) {
+ ret = rgw_bucket_parse_bucket_instance(bi, &s->bucket_instance_id, &s->bucket_instance_shard_id);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ if(s->dialect.compare("s3") == 0) {
+ s->bucket_acl = std::make_unique<RGWAccessControlPolicy_S3>(s->cct);
+ } else if(s->dialect.compare("swift") == 0) {
+ /* We aren't allocating the account policy for those operations using
+ * the Swift's infrastructure that don't really need req_state::user.
+ * Typical example here is the implementation of /info. */
+ if (!s->user->user_id.empty()) {
+ s->user_acl = std::make_unique<RGWAccessControlPolicy_SWIFTAcct>(s->cct);
+ }
+ s->bucket_acl = std::make_unique<RGWAccessControlPolicy_SWIFT>(s->cct);
+ } else {
+ s->bucket_acl = std::make_unique<RGWAccessControlPolicy>(s->cct);
+ }
+
+ /* check if copy source is within the current domain */
+ if (!s->src_bucket_name.empty()) {
+ RGWBucketInfo source_info;
+
+ if (s->bucket_instance_id.empty()) {
+ ret = store->get_bucket_info(obj_ctx, s->src_tenant_name, s->src_bucket_name, source_info, NULL);
+ } else {
+ ret = store->get_bucket_instance_info(obj_ctx, s->bucket_instance_id, source_info, NULL, NULL);
+ }
+ if (ret == 0) {
+ string& zonegroup = source_info.zonegroup;
+ s->local_source = store->svc.zone->get_zonegroup().equals(zonegroup);
+ }
+ }
+
+ struct {
+ rgw_user uid;
+ std::string display_name;
+ } acct_acl_user = {
+ s->user->user_id,
+ s->user->display_name,
+ };
+
+ if (!s->bucket_name.empty()) {
+ s->bucket_exists = true;
+ if (s->bucket_instance_id.empty()) {
+ ret = store->get_bucket_info(obj_ctx, s->bucket_tenant, s->bucket_name,
+ s->bucket_info, &s->bucket_mtime,
+ &s->bucket_attrs);
+ } else {
+ ret = store->get_bucket_instance_info(obj_ctx, s->bucket_instance_id,
+ s->bucket_info, &s->bucket_mtime,
+ &s->bucket_attrs);
+ }
+ if (ret < 0) {
+ if (ret != -ENOENT) {
+ string bucket_log;
+ rgw_make_bucket_entry_name(s->bucket_tenant, s->bucket_name, bucket_log);
+ ldpp_dout(s, 0) << "NOTICE: couldn't get bucket from bucket_name (name="
+ << bucket_log << ")" << dendl;
+ return ret;
+ }
+ s->bucket_exists = false;
+ }
+ s->bucket = s->bucket_info.bucket;
+
+ if (s->bucket_exists) {
+ ret = read_bucket_policy(store, s, s->bucket_info, s->bucket_attrs,
+ s->bucket_acl.get(), s->bucket);
+ acct_acl_user = {
+ s->bucket_info.owner,
+ s->bucket_acl->get_owner().get_display_name(),
+ };
+ } else {
+ s->bucket_acl->create_default(s->user->user_id, s->user->display_name);
+ ret = -ERR_NO_SUCH_BUCKET;
+ }
+
+ s->bucket_owner = s->bucket_acl->get_owner();
+
+ RGWZoneGroup zonegroup;
+ int r = store->svc.zone->get_zonegroup(s->bucket_info.zonegroup, zonegroup);
+ if (!r) {
+ if (!zonegroup.endpoints.empty()) {
+ s->zonegroup_endpoint = zonegroup.endpoints.front();
+ } else {
+ // use zonegroup's master zone endpoints
+ auto z = zonegroup.zones.find(zonegroup.master_zone);
+ if (z != zonegroup.zones.end() && !z->second.endpoints.empty()) {
+ s->zonegroup_endpoint = z->second.endpoints.front();
+ }
+ }
+ s->zonegroup_name = zonegroup.get_name();
+ }
+ if (r < 0 && ret == 0) {
+ ret = r;
+ }
+
+ if (s->bucket_exists && !store->svc.zone->get_zonegroup().equals(s->bucket_info.zonegroup)) {
+ ldpp_dout(s, 0) << "NOTICE: request for data in a different zonegroup ("
+ << s->bucket_info.zonegroup << " != "
+ << store->svc.zone->get_zonegroup().get_id() << ")" << dendl;
+ /* we now need to make sure that the operation actually requires copy source, that is
+ * it's a copy operation
+ */
+ if (store->svc.zone->get_zonegroup().is_master_zonegroup() && s->system_request) {
+ /*If this is the master, don't redirect*/
+ } else if (s->op_type == RGW_OP_GET_BUCKET_LOCATION ) {
+ /* If op is get bucket location, don't redirect */
+ } else if (!s->local_source ||
+ (s->op != OP_PUT && s->op != OP_COPY) ||
+ s->object.empty()) {
+ return -ERR_PERMANENT_REDIRECT;
+ }
+ }
+
+ /* init dest placement -- only if bucket exists, otherwise request is either not relevant, or
+ * it's a create_bucket request, in which case the op will deal with the placement later */
+ if (s->bucket_exists) {
+ s->dest_placement.storage_class = s->info.storage_class;
+ s->dest_placement.inherit_from(s->bucket_info.placement_rule);
+
+ if (!store->svc.zone->get_zone_params().valid_placement(s->dest_placement)) {
+ ldpp_dout(s, 0) << "NOTICE: invalid dest placement: " << s->dest_placement.to_str() << dendl;
+ return -EINVAL;
+ }
+ }
+ }
+
+ /* handle user ACL only for those APIs which support it */
+ if (s->user_acl) {
+ map<string, bufferlist> uattrs;
+ ret = rgw_get_user_attrs_by_uid(store, acct_acl_user.uid, uattrs);
+ if (!ret) {
+ ret = get_user_policy_from_attr(s->cct, store, uattrs, *s->user_acl);
+ }
+ if (-ENOENT == ret) {
+ /* In already existing clusters users won't have ACL. In such case
+ * assuming that only account owner has the rights seems to be
+ * reasonable. That allows to have only one verification logic.
+ * NOTE: there is small compatibility kludge for global, empty tenant:
+ * 1. if we try to reach an existing bucket, its owner is considered
+ * as account owner.
+ * 2. otherwise account owner is identity stored in s->user->user_id. */
+ s->user_acl->create_default(acct_acl_user.uid,
+ acct_acl_user.display_name);
+ ret = 0;
+ } else if (ret < 0) {
+ ldpp_dout(s, 0) << "NOTICE: couldn't get user attrs for handling ACL "
+ "(user_id=" << s->user->user_id << ", ret=" << ret << ")" << dendl;
+ return ret;
+ }
+ }
+ // We don't need user policies in case of STS token returned by AssumeRole,
+ // hence the check for user type
+ if (! s->user->user_id.empty() && s->auth.identity->get_identity_type() != TYPE_ROLE) {
+ try {
+ map<string, bufferlist> uattrs;
+ if (ret = rgw_get_user_attrs_by_uid(store, s->user->user_id, uattrs); ! ret) {
+ if (s->iam_user_policies.empty()) {
+ s->iam_user_policies = get_iam_user_policy_from_attr(s->cct, store, uattrs, s->user->user_id.tenant);
+ } else {
+ // This scenario can happen when a STS token has a policy, then we need to append other user policies
+ // to the existing ones. (e.g. token returned by GetSessionToken)
+ auto user_policies = get_iam_user_policy_from_attr(s->cct, store, uattrs, s->user->user_id.tenant);
+ s->iam_user_policies.insert(s->iam_user_policies.end(), user_policies.begin(), user_policies.end());
+ }
+ } else {
+ if (ret == -ENOENT)
+ ret = 0;
+ else ret = -EACCES;
+ }
+ } catch (const std::exception& e) {
+ lderr(s->cct) << "Error reading IAM User Policy: " << e.what() << dendl;
+ ret = -EACCES;
+ }
+ }
+
+ try {
+ s->iam_policy = get_iam_policy_from_attr(s->cct, store, s->bucket_attrs,
+ s->bucket_tenant);
+ } catch (const std::exception& e) {
+ // Really this is a can't happen condition. We parse the policy
+ // when it's given to us, so perhaps we should abort or otherwise
+ // raise bloody murder.
+ ldpp_dout(s, 0) << "Error reading IAM Policy: " << e.what() << dendl;
+ ret = -EACCES;
+ }
+
+ bool success = store->svc.zone->get_redirect_zone_endpoint(&s->redirect_zone_endpoint);
+ if (success) {
+ ldpp_dout(s, 20) << "redirect_zone_endpoint=" << s->redirect_zone_endpoint << dendl;
+ }
+
+ return ret;
+}
+
+/**
+ * Get the AccessControlPolicy for a bucket or object off of disk.
+ * s: The req_state to draw information from.
+ * only_bucket: If true, reads the bucket ACL rather than the object ACL.
+ * Returns: 0 on success, -ERR# otherwise.
+ */
+int rgw_build_object_policies(RGWRados *store, struct req_state *s,
+ bool prefetch_data)
+{
+ int ret = 0;
+
+ if (!s->object.empty()) {
+ if (!s->bucket_exists) {
+ return -ERR_NO_SUCH_BUCKET;
+ }
+ s->object_acl = std::make_unique<RGWAccessControlPolicy>(s->cct);
+ rgw_obj obj(s->bucket, s->object);
+
+ store->set_atomic(s->obj_ctx, obj);
+ if (prefetch_data) {
+ store->set_prefetch_data(s->obj_ctx, obj);
+ }
+ ret = read_obj_policy(store, s, s->bucket_info, s->bucket_attrs,
+ s->object_acl.get(), nullptr, s->iam_policy, s->bucket,
+ s->object);
+ }
+
+ return ret;
+}
+
+void rgw_add_to_iam_environment(rgw::IAM::Environment& e, std::string_view key, std::string_view val){
+ // This variant just adds non empty key pairs to IAM env., values can be empty
+ // in certain cases like tagging
+ if (!key.empty())
+ e.emplace(key,val);
+}
+
+static int rgw_iam_add_tags_from_bl(struct req_state* s, bufferlist& bl){
+ RGWObjTags& tagset = s->tagset;
+ try {
+ auto bliter = bl.cbegin();
+ tagset.decode(bliter);
+ } catch (buffer::error& err) {
+ ldpp_dout(s, 0) << "ERROR: caught buffer::error, couldn't decode TagSet" << dendl;
+ return -EIO;
+ }
+
+ for (const auto& tag: tagset.get_tags()){
+ rgw_add_to_iam_environment(s->env, "s3:ExistingObjectTag/" + tag.first, tag.second);
+ }
+ return 0;
+}
+
+static int rgw_iam_add_existing_objtags(RGWRados* store, struct req_state* s, rgw_obj& obj, std::uint64_t action){
+ map <string, bufferlist> attrs;
+ store->set_atomic(s->obj_ctx, obj);
+ int op_ret = get_obj_attrs(store, s, obj, attrs);
+ if (op_ret < 0)
+ return op_ret;
+ auto tags = attrs.find(RGW_ATTR_TAGS);
+ if (tags != attrs.end()){
+ return rgw_iam_add_tags_from_bl(s, tags->second);
+ }
+ return 0;
+}
+
+static void rgw_add_grant_to_iam_environment(rgw::IAM::Environment& e, struct req_state *s){
+
+ using header_pair_t = std::pair <const char*, const char*>;
+ static const std::initializer_list <header_pair_t> acl_header_conditionals {
+ {"HTTP_X_AMZ_GRANT_READ", "s3:x-amz-grant-read"},
+ {"HTTP_X_AMZ_GRANT_WRITE", "s3:x-amz-grant-write"},
+ {"HTTP_X_AMZ_GRANT_READ_ACP", "s3:x-amz-grant-read-acp"},
+ {"HTTP_X_AMZ_GRANT_WRITE_ACP", "s3:x-amz-grant-write-acp"},
+ {"HTTP_X_AMZ_GRANT_FULL_CONTROL", "s3:x-amz-grant-full-control"}
+ };
+
+ if (s->has_acl_header){
+ for (const auto& c: acl_header_conditionals){
+ auto hdr = s->info.env->get(c.first);
+ if(hdr) {
+ e[c.second] = hdr;
+ }
+ }
+ }
+}
+
+void rgw_build_iam_environment(RGWRados* store,
+ struct req_state* s)
+{
+ const auto& m = s->info.env->get_map();
+ auto t = ceph::real_clock::now();
+ s->env.emplace("aws:CurrentTime", std::to_string(ceph::real_clock::to_time_t(t)));
+ s->env.emplace("aws:EpochTime", ceph::to_iso_8601(t));
+ // TODO: This is fine for now, but once we have STS we'll need to
+ // look and see. Also this won't work with the IdentityApplier
+ // model, since we need to know the actual credential.
+ s->env.emplace("aws:PrincipalType", "User");
+
+ auto i = m.find("HTTP_REFERER");
+ if (i != m.end()) {
+ s->env.emplace("aws:Referer", i->second);
+ }
+
+ if (rgw_transport_is_secure(s->cct, *s->info.env)) {
+ s->env.emplace("aws:SecureTransport", "true");
+ }
+
+ const auto remote_addr_param = s->cct->_conf->rgw_remote_addr_param;
+ if (remote_addr_param.length()) {
+ i = m.find(remote_addr_param);
+ } else {
+ i = m.find("REMOTE_ADDR");
+ }
+ if (i != m.end()) {
+ const string* ip = &(i->second);
+ string temp;
+ if (remote_addr_param == "HTTP_X_FORWARDED_FOR") {
+ const auto comma = ip->find(',');
+ if (comma != string::npos) {
+ temp.assign(*ip, 0, comma);
+ ip = &temp;
+ }
+ }
+ s->env.emplace("aws:SourceIp", *ip);
+ }
+
+ i = m.find("HTTP_USER_AGENT"); {
+ if (i != m.end())
+ s->env.emplace("aws:UserAgent", i->second);
+ }
+
+ if (s->user) {
+ // What to do about aws::userid? One can have multiple access
+ // keys so that isn't really suitable. Do we have a durable
+ // identifier that can persist through name changes?
+ s->env.emplace("aws:username", s->user->user_id.id);
+ }
+
+ i = m.find("HTTP_X_AMZ_SECURITY_TOKEN");
+ if (i != m.end()) {
+ s->env.emplace("sts:authentication", "true");
+ } else {
+ s->env.emplace("sts:authentication", "false");
+ }
+}
+
+void rgw_bucket_object_pre_exec(struct req_state *s)
+{
+ if (s->expect_cont)
+ dump_continue(s);
+
+ dump_bucket_from_state(s);
+}
+
+// So! Now and then when we try to update bucket information, the
+// bucket has changed during the course of the operation. (Or we have
+// a cache consistency problem that Watch/Notify isn't ruling out
+// completely.)
+//
+// When this happens, we need to update the bucket info and try
+// again. We have, however, to try the right *part* again. We can't
+// simply re-send, since that will obliterate the previous update.
+//
+// Thus, callers of this function should include everything that
+// merges information to be changed into the bucket information as
+// well as the call to set it.
+//
+// The called function must return an integer, negative on error. In
+// general, they should just return op_ret.
+namespace {
+template<typename F>
+int retry_raced_bucket_write(RGWRados* g, req_state* s, const F& f) {
+ auto r = f();
+ for (auto i = 0u; i < 15u && r == -ECANCELED; ++i) {
+ r = g->try_refresh_bucket_info(s->bucket_info, nullptr,
+ &s->bucket_attrs);
+ if (r >= 0) {
+ r = f();
+ }
+ }
+ return r;
+}
+}
+
+
+int RGWGetObj::verify_permission()
+{
+ obj = rgw_obj(s->bucket, s->object);
+ store->set_atomic(s->obj_ctx, obj);
+ if (get_data) {
+ store->set_prefetch_data(s->obj_ctx, obj);
+ }
+
+ if (torrent.get_flag()) {
+ if (obj.key.instance.empty()) {
+ action = rgw::IAM::s3GetObjectTorrent;
+ } else {
+ action = rgw::IAM::s3GetObjectVersionTorrent;
+ }
+ } else {
+ if (obj.key.instance.empty()) {
+ action = rgw::IAM::s3GetObject;
+ } else {
+ action = rgw::IAM::s3GetObjectVersion;
+ }
+ if (s->iam_policy && s->iam_policy->has_partial_conditional(S3_EXISTING_OBJTAG))
+ rgw_iam_add_existing_objtags(store, s, obj, action);
+ if (! s->iam_user_policies.empty()) {
+ for (auto& user_policy : s->iam_user_policies) {
+ if (user_policy.has_partial_conditional(S3_EXISTING_OBJTAG))
+ rgw_iam_add_existing_objtags(store, s, obj, action);
+ }
+ }
+ }
+
+ if (!verify_object_permission(this, s, action)) {
+ return -EACCES;
+ }
+
+ if (s->bucket_info.obj_lock_enabled()) {
+ get_retention = verify_object_permission(this, s, rgw::IAM::s3GetObjectRetention);
+ get_legal_hold = verify_object_permission(this, s, rgw::IAM::s3GetObjectLegalHold);
+ }
+
+ return 0;
+}
+
+// cache the objects tags into the requests
+// use inside try/catch as "decode()" may throw
+void populate_tags_in_request(req_state* s, const std::map<std::string, bufferlist>& attrs) {
+ const auto attr_iter = attrs.find(RGW_ATTR_TAGS);
+ if (attr_iter != attrs.end()) {
+ auto bliter = attr_iter->second.cbegin();
+ decode(s->tagset, bliter);
+ }
+}
+
+// cache the objects metadata into the request
+void populate_metadata_in_request(req_state* s, std::map<std::string, bufferlist>& attrs) {
+ for (auto& attr : attrs) {
+ if (boost::algorithm::starts_with(attr.first, RGW_ATTR_META_PREFIX)) {
+ std::string_view key(attr.first);
+ key.remove_prefix(sizeof(RGW_ATTR_PREFIX)-1);
+ s->info.x_meta_map.emplace(key, attr.second.c_str());
+ }
+ }
+}
+
+int RGWOp::verify_op_mask()
+{
+ uint32_t required_mask = op_mask();
+
+ ldpp_dout(this, 20) << "required_mask= " << required_mask
+ << " user.op_mask=" << s->user->op_mask << dendl;
+
+ if ((s->user->op_mask & required_mask) != required_mask) {
+ return -EPERM;
+ }
+
+ if (!s->system_request && (required_mask & RGW_OP_TYPE_MODIFY) && !store->svc.zone->zone_is_writeable()) {
+ ldpp_dout(this, 5) << "NOTICE: modify request to a read-only zone by a "
+ "non-system user, permission denied" << dendl;
+ return -EPERM;
+ }
+
+ return 0;
+}
+
+int RGWGetObjTags::verify_permission()
+{
+ auto iam_action = s->object.instance.empty()?
+ rgw::IAM::s3GetObjectTagging:
+ rgw::IAM::s3GetObjectVersionTagging;
+ // TODO since we are parsing the bl now anyway, we probably change
+ // the send_response function to accept RGWObjTag instead of a bl
+ if (s->iam_policy && s->iam_policy->has_partial_conditional(S3_EXISTING_OBJTAG)){
+ rgw_obj obj = rgw_obj(s->bucket, s->object);
+ rgw_iam_add_existing_objtags(store, s, obj, iam_action);
+ }
+ if (! s->iam_user_policies.empty()) {
+ for (auto& user_policy : s->iam_user_policies) {
+ if (user_policy.has_partial_conditional(S3_EXISTING_OBJTAG)) {
+ rgw_obj obj = rgw_obj(s->bucket, s->object);
+ rgw_iam_add_existing_objtags(store, s, obj, iam_action);
+ }
+ }
+ }
+ if (!verify_object_permission(this, s,iam_action))
+ return -EACCES;
+
+ return 0;
+}
+
+void RGWGetObjTags::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+void RGWGetObjTags::execute()
+{
+ rgw_obj obj;
+ map<string,bufferlist> attrs;
+
+ obj = rgw_obj(s->bucket, s->object);
+
+ store->set_atomic(s->obj_ctx, obj);
+
+ op_ret = get_obj_attrs(store, s, obj, attrs);
+ if (op_ret < 0) {
+ ldpp_dout(this, 0) << "ERROR: failed to get obj attrs, obj=" << obj
+ << " ret=" << op_ret << dendl;
+ return;
+ }
+
+ auto tags = attrs.find(RGW_ATTR_TAGS);
+ if(tags != attrs.end()){
+ has_tags = true;
+ tags_bl.append(tags->second);
+ }
+ send_response_data(tags_bl);
+}
+
+int RGWPutObjTags::verify_permission()
+{
+ auto iam_action = s->object.instance.empty() ?
+ rgw::IAM::s3PutObjectTagging:
+ rgw::IAM::s3PutObjectVersionTagging;
+
+ if(s->iam_policy && s->iam_policy->has_partial_conditional(S3_EXISTING_OBJTAG)){
+ auto obj = rgw_obj(s->bucket, s->object);
+ rgw_iam_add_existing_objtags(store, s, obj, iam_action);
+ }
+ if (! s->iam_user_policies.empty()) {
+ for (auto& user_policy : s->iam_user_policies) {
+ if (user_policy.has_partial_conditional(S3_EXISTING_OBJTAG)) {
+ rgw_obj obj = rgw_obj(s->bucket, s->object);
+ rgw_iam_add_existing_objtags(store, s, obj, iam_action);
+ }
+ }
+ }
+ if (!verify_object_permission(this, s,iam_action))
+ return -EACCES;
+ return 0;
+}
+
+void RGWPutObjTags::execute()
+{
+ op_ret = get_params();
+ if (op_ret < 0)
+ return;
+
+ if (s->object.empty()){
+ op_ret= -EINVAL; // we only support tagging on existing objects
+ return;
+ }
+
+ rgw_obj obj;
+ obj = rgw_obj(s->bucket, s->object);
+ store->set_atomic(s->obj_ctx, obj);
+ op_ret = modify_obj_attr(store, s, obj, RGW_ATTR_TAGS, tags_bl);
+ if (op_ret == -ECANCELED){
+ op_ret = -ERR_TAG_CONFLICT;
+ }
+}
+
+void RGWDeleteObjTags::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+
+int RGWDeleteObjTags::verify_permission()
+{
+ if (!s->object.empty()) {
+ auto iam_action = s->object.instance.empty() ?
+ rgw::IAM::s3DeleteObjectTagging:
+ rgw::IAM::s3DeleteObjectVersionTagging;
+
+ if (s->iam_policy && s->iam_policy->has_partial_conditional(S3_EXISTING_OBJTAG)){
+ auto obj = rgw_obj(s->bucket, s->object);
+ rgw_iam_add_existing_objtags(store, s, obj, iam_action);
+ }
+ if (! s->iam_user_policies.empty()) {
+ for (auto& user_policy : s->iam_user_policies) {
+ if (user_policy.has_partial_conditional(S3_EXISTING_OBJTAG)) {
+ auto obj = rgw_obj(s->bucket, s->object);
+ rgw_iam_add_existing_objtags(store, s, obj, iam_action);
+ }
+ }
+ }
+ if (!verify_object_permission(this, s, iam_action))
+ return -EACCES;
+ }
+ return 0;
+}
+
+void RGWDeleteObjTags::execute()
+{
+ if (s->object.empty())
+ return;
+
+ rgw_obj obj;
+ obj = rgw_obj(s->bucket, s->object);
+ store->set_atomic(s->obj_ctx, obj);
+ map <string, bufferlist> attrs;
+ map <string, bufferlist> rmattr;
+ bufferlist bl;
+ rmattr[RGW_ATTR_TAGS] = bl;
+ op_ret = store->set_attrs(s->obj_ctx, s->bucket_info, obj, attrs, &rmattr);
+}
+
+int RGWOp::do_aws4_auth_completion()
+{
+ ldpp_dout(this, 5) << "NOTICE: call to do_aws4_auth_completion" << dendl;
+ if (s->auth.completer) {
+ if (!s->auth.completer->complete()) {
+ return -ERR_AMZ_CONTENT_SHA256_MISMATCH;
+ } else {
+ ldpp_dout(this, 10) << "v4 auth ok -- do_aws4_auth_completion" << dendl;
+ }
+
+ /* TODO(rzarzynski): yes, we're really called twice on PUTs. Only first
+ * call passes, so we disable second one. This is old behaviour, sorry!
+ * Plan for tomorrow: seek and destroy. */
+ s->auth.completer = nullptr;
+ }
+
+ return 0;
+}
+
+int RGWOp::init_quota()
+{
+ /* no quota enforcement for system requests */
+ if (s->system_request)
+ return 0;
+
+ /* init quota related stuff */
+ if (!(s->user->op_mask & RGW_OP_TYPE_MODIFY)) {
+ return 0;
+ }
+
+ /* only interested in object related ops */
+ if (s->object.empty()) {
+ return 0;
+ }
+
+ RGWUserInfo owner_info;
+ RGWUserInfo *uinfo;
+
+ if (s->user->user_id == s->bucket_owner.get_id()) {
+ uinfo = s->user;
+ } else {
+ int r = rgw_get_user_info_by_uid(store, s->bucket_info.owner, owner_info);
+ if (r < 0)
+ return r;
+ uinfo = &owner_info;
+ }
+
+ if (s->bucket_info.quota.enabled) {
+ bucket_quota = s->bucket_info.quota;
+ } else if (uinfo->bucket_quota.enabled) {
+ bucket_quota = uinfo->bucket_quota;
+ } else {
+ bucket_quota = store->svc.quota->get_bucket_quota();
+ }
+
+ if (uinfo->user_quota.enabled) {
+ user_quota = uinfo->user_quota;
+ } else {
+ user_quota = store->svc.quota->get_user_quota();
+ }
+
+ return 0;
+}
+
+static bool validate_cors_rule_method(RGWCORSRule *rule, const char *req_meth) {
+ uint8_t flags = 0;
+
+ if (!req_meth) {
+ dout(5) << "req_meth is null" << dendl;
+ return false;
+ }
+
+ if (strcmp(req_meth, "GET") == 0) flags = RGW_CORS_GET;
+ else if (strcmp(req_meth, "POST") == 0) flags = RGW_CORS_POST;
+ else if (strcmp(req_meth, "PUT") == 0) flags = RGW_CORS_PUT;
+ else if (strcmp(req_meth, "DELETE") == 0) flags = RGW_CORS_DELETE;
+ else if (strcmp(req_meth, "HEAD") == 0) flags = RGW_CORS_HEAD;
+
+ if (rule->get_allowed_methods() & flags) {
+ dout(10) << "Method " << req_meth << " is supported" << dendl;
+ } else {
+ dout(5) << "Method " << req_meth << " is not supported" << dendl;
+ return false;
+ }
+
+ return true;
+}
+
+static bool validate_cors_rule_header(RGWCORSRule *rule, const char *req_hdrs) {
+ if (req_hdrs) {
+ vector<string> hdrs;
+ get_str_vec(req_hdrs, hdrs);
+ for (const auto& hdr : hdrs) {
+ if (!rule->is_header_allowed(hdr.c_str(), hdr.length())) {
+ dout(5) << "Header " << hdr << " is not registered in this rule" << dendl;
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+int RGWOp::read_bucket_cors()
+{
+ bufferlist bl;
+
+ map<string, bufferlist>::iterator aiter = s->bucket_attrs.find(RGW_ATTR_CORS);
+ if (aiter == s->bucket_attrs.end()) {
+ ldpp_dout(this, 20) << "no CORS configuration attr found" << dendl;
+ cors_exist = false;
+ return 0; /* no CORS configuration found */
+ }
+
+ cors_exist = true;
+
+ bl = aiter->second;
+
+ auto iter = bl.cbegin();
+ try {
+ bucket_cors.decode(iter);
+ } catch (buffer::error& err) {
+ ldpp_dout(this, 0) << "ERROR: could not decode policy, caught buffer::error" << dendl;
+ return -EIO;
+ }
+ if (s->cct->_conf->subsys.should_gather<ceph_subsys_rgw, 15>()) {
+ RGWCORSConfiguration_S3 *s3cors = static_cast<RGWCORSConfiguration_S3 *>(&bucket_cors);
+ ldpp_dout(this, 15) << "Read RGWCORSConfiguration";
+ s3cors->to_xml(*_dout);
+ *_dout << dendl;
+ }
+ return 0;
+}
+
+/** CORS 6.2.6.
+ * If any of the header field-names is not a ASCII case-insensitive match for
+ * any of the values in list of headers do not set any additional headers and
+ * terminate this set of steps.
+ * */
+static void get_cors_response_headers(RGWCORSRule *rule, const char *req_hdrs, string& hdrs, string& exp_hdrs, unsigned *max_age) {
+ if (req_hdrs) {
+ list<string> hl;
+ get_str_list(req_hdrs, hl);
+ for(list<string>::iterator it = hl.begin(); it != hl.end(); ++it) {
+ if (!rule->is_header_allowed((*it).c_str(), (*it).length())) {
+ dout(5) << "Header " << (*it) << " is not registered in this rule" << dendl;
+ } else {
+ if (hdrs.length() > 0) hdrs.append(",");
+ hdrs.append((*it));
+ }
+ }
+ }
+ rule->format_exp_headers(exp_hdrs);
+ *max_age = rule->get_max_age();
+}
+
+/**
+ * Generate the CORS header response
+ *
+ * This is described in the CORS standard, section 6.2.
+ */
+bool RGWOp::generate_cors_headers(string& origin, string& method, string& headers, string& exp_headers, unsigned *max_age)
+{
+ /* CORS 6.2.1. */
+ const char *orig = s->info.env->get("HTTP_ORIGIN");
+ if (!orig) {
+ return false;
+ }
+
+ /* Custom: */
+ origin = orig;
+ op_ret = read_bucket_cors();
+ if (op_ret < 0) {
+ return false;
+ }
+
+ if (!cors_exist) {
+ ldpp_dout(this, 2) << "No CORS configuration set yet for this bucket" << dendl;
+ return false;
+ }
+
+ /* CORS 6.2.2. */
+ RGWCORSRule *rule = bucket_cors.host_name_rule(orig);
+ if (!rule)
+ return false;
+
+ /*
+ * Set the Allowed-Origin header to a asterisk if this is allowed in the rule
+ * and no Authorization was send by the client
+ *
+ * The origin parameter specifies a URI that may access the resource. The browser must enforce this.
+ * For requests without credentials, the server may specify "*" as a wildcard,
+ * thereby allowing any origin to access the resource.
+ */
+ const char *authorization = s->info.env->get("HTTP_AUTHORIZATION");
+ if (!authorization && rule->has_wildcard_origin())
+ origin = "*";
+
+ /* CORS 6.2.3. */
+ const char *req_meth = s->info.env->get("HTTP_ACCESS_CONTROL_REQUEST_METHOD");
+ if (!req_meth) {
+ req_meth = s->info.method;
+ }
+
+ if (req_meth) {
+ method = req_meth;
+ /* CORS 6.2.5. */
+ if (!validate_cors_rule_method(rule, req_meth)) {
+ return false;
+ }
+ }
+
+ /* CORS 6.2.4. */
+ const char *req_hdrs = s->info.env->get("HTTP_ACCESS_CONTROL_REQUEST_HEADERS");
+
+ /* CORS 6.2.6. */
+ get_cors_response_headers(rule, req_hdrs, headers, exp_headers, max_age);
+
+ return true;
+}
+
+int RGWGetObj::read_user_manifest_part(rgw_bucket& bucket,
+ const rgw_bucket_dir_entry& ent,
+ RGWAccessControlPolicy * const bucket_acl,
+ const boost::optional<Policy>& bucket_policy,
+ const off_t start_ofs,
+ const off_t end_ofs,
+ bool swift_slo)
+{
+ ldpp_dout(this, 20) << "user manifest obj=" << ent.key.name
+ << "[" << ent.key.instance << "]" << dendl;
+ RGWGetObj_CB cb(this);
+ RGWGetObj_Filter* filter = &cb;
+ boost::optional<RGWGetObj_Decompress> decompress;
+
+ int64_t cur_ofs = start_ofs;
+ int64_t cur_end = end_ofs;
+
+ rgw_obj part(bucket, ent.key);
+
+ map<string, bufferlist> attrs;
+
+ uint64_t obj_size;
+ RGWObjectCtx obj_ctx(store);
+ RGWAccessControlPolicy obj_policy(s->cct);
+
+ ldpp_dout(this, 20) << "reading obj=" << part << " ofs=" << cur_ofs
+ << " end=" << cur_end << dendl;
+
+ obj_ctx.set_atomic(part);
+ store->set_prefetch_data(&obj_ctx, part);
+
+ RGWRados::Object op_target(store, s->bucket_info, obj_ctx, part);
+ RGWRados::Object::Read read_op(&op_target);
+
+ if (!swift_slo) {
+ /* SLO etag is optional */
+ read_op.conds.if_match = ent.meta.etag.c_str();
+ }
+ read_op.params.attrs = &attrs;
+ read_op.params.obj_size = &obj_size;
+
+ op_ret = read_op.prepare();
+ if (op_ret < 0)
+ return op_ret;
+ op_ret = read_op.range_to_ofs(ent.meta.accounted_size, cur_ofs, cur_end);
+ if (op_ret < 0)
+ return op_ret;
+ bool need_decompress;
+ op_ret = rgw_compression_info_from_attrset(attrs, need_decompress, cs_info);
+ if (op_ret < 0) {
+ ldpp_dout(this, 0) << "ERROR: failed to decode compression info" << dendl;
+ return -EIO;
+ }
+
+ if (need_decompress)
+ {
+ if (cs_info.orig_size != ent.meta.accounted_size) {
+ // hmm.. something wrong, object not as expected, abort!
+ ldpp_dout(this, 0) << "ERROR: expected cs_info.orig_size=" << cs_info.orig_size
+ << ", actual read size=" << ent.meta.size << dendl;
+ return -EIO;
+ }
+ decompress.emplace(s->cct, &cs_info, partial_content, filter);
+ filter = &*decompress;
+ }
+ else
+ {
+ if (obj_size != ent.meta.size) {
+ // hmm.. something wrong, object not as expected, abort!
+ ldpp_dout(this, 0) << "ERROR: expected obj_size=" << obj_size
+ << ", actual read size=" << ent.meta.size << dendl;
+ return -EIO;
+ }
+ }
+
+ op_ret = rgw_policy_from_attrset(s->cct, attrs, &obj_policy);
+ if (op_ret < 0)
+ return op_ret;
+
+ /* We can use global user_acl because LOs cannot have segments
+ * stored inside different accounts. */
+ if (s->system_request) {
+ ldpp_dout(this, 2) << "overriding permissions due to system operation" << dendl;
+ } else if (s->auth.identity->is_admin_of(s->user->user_id)) {
+ ldpp_dout(this, 2) << "overriding permissions due to admin operation" << dendl;
+ } else if (!verify_object_permission(this, s, part, s->user_acl.get(), bucket_acl,
+ &obj_policy, bucket_policy, s->iam_user_policies, action)) {
+ return -EPERM;
+ }
+ if (ent.meta.size == 0) {
+ return 0;
+ }
+
+ perfcounter->inc(l_rgw_get_b, cur_end - cur_ofs);
+ filter->fixup_range(cur_ofs, cur_end);
+ op_ret = read_op.iterate(cur_ofs, cur_end, filter);
+ if (op_ret >= 0)
+ op_ret = filter->flush();
+ return op_ret;
+}
+
+static int iterate_user_manifest_parts(CephContext * const cct,
+ RGWRados * const store,
+ const off_t ofs,
+ const off_t end,
+ RGWBucketInfo *pbucket_info,
+ const string& obj_prefix,
+ RGWAccessControlPolicy * const bucket_acl,
+ const boost::optional<Policy>& bucket_policy,
+ uint64_t * const ptotal_len,
+ uint64_t * const pobj_size,
+ string * const pobj_sum,
+ int (*cb)(rgw_bucket& bucket,
+ const rgw_bucket_dir_entry& ent,
+ RGWAccessControlPolicy * const bucket_acl,
+ const boost::optional<Policy>& bucket_policy,
+ off_t start_ofs,
+ off_t end_ofs,
+ void *param,
+ bool swift_slo),
+ void * const cb_param)
+{
+ rgw_bucket& bucket = pbucket_info->bucket;
+ uint64_t obj_ofs = 0, len_count = 0;
+ bool found_start = false, found_end = false, handled_end = false;
+ string delim;
+ bool is_truncated;
+ vector<rgw_bucket_dir_entry> objs;
+
+ utime_t start_time = ceph_clock_now();
+
+ RGWRados::Bucket target(store, *pbucket_info);
+ RGWRados::Bucket::List list_op(&target);
+
+ list_op.params.prefix = obj_prefix;
+ list_op.params.delim = delim;
+
+ MD5 etag_sum;
+ do {
+#define MAX_LIST_OBJS 100
+ int r = list_op.list_objects(MAX_LIST_OBJS, &objs, NULL, &is_truncated);
+ if (r < 0) {
+ return r;
+ }
+
+ for (rgw_bucket_dir_entry& ent : objs) {
+ const uint64_t cur_total_len = obj_ofs;
+ const uint64_t obj_size = ent.meta.accounted_size;
+ uint64_t start_ofs = 0, end_ofs = obj_size;
+
+ if ((ptotal_len || cb) && !found_start && cur_total_len + obj_size > (uint64_t)ofs) {
+ start_ofs = ofs - obj_ofs;
+ found_start = true;
+ }
+
+ obj_ofs += obj_size;
+ if (pobj_sum) {
+ etag_sum.Update((const unsigned char *)ent.meta.etag.c_str(),
+ ent.meta.etag.length());
+ }
+
+ if ((ptotal_len || cb) && !found_end && obj_ofs > (uint64_t)end) {
+ end_ofs = end - cur_total_len + 1;
+ found_end = true;
+ }
+
+ perfcounter->tinc(l_rgw_get_lat,
+ (ceph_clock_now() - start_time));
+
+ if (found_start && !handled_end) {
+ len_count += end_ofs - start_ofs;
+
+ if (cb) {
+ r = cb(bucket, ent, bucket_acl, bucket_policy, start_ofs, end_ofs,
+ cb_param, false /* swift_slo */);
+ if (r < 0) {
+ return r;
+ }
+ }
+ }
+
+ handled_end = found_end;
+ start_time = ceph_clock_now();
+ }
+ } while (is_truncated);
+
+ if (ptotal_len) {
+ *ptotal_len = len_count;
+ }
+ if (pobj_size) {
+ *pobj_size = obj_ofs;
+ }
+ if (pobj_sum) {
+ complete_etag(etag_sum, pobj_sum);
+ }
+
+ return 0;
+}
+
+struct rgw_slo_part {
+ RGWAccessControlPolicy *bucket_acl = nullptr;
+ Policy* bucket_policy = nullptr;
+ rgw_bucket bucket;
+ string obj_name;
+ uint64_t size = 0;
+ string etag;
+};
+
+static int iterate_slo_parts(CephContext *cct,
+ RGWRados *store,
+ off_t ofs,
+ off_t end,
+ map<uint64_t, rgw_slo_part>& slo_parts,
+ int (*cb)(rgw_bucket& bucket,
+ const rgw_bucket_dir_entry& ent,
+ RGWAccessControlPolicy *bucket_acl,
+ const boost::optional<Policy>& bucket_policy,
+ off_t start_ofs,
+ off_t end_ofs,
+ void *param,
+ bool swift_slo),
+ void *cb_param)
+{
+ bool found_start = false, found_end = false;
+
+ if (slo_parts.empty()) {
+ return 0;
+ }
+
+ utime_t start_time = ceph_clock_now();
+
+ map<uint64_t, rgw_slo_part>::iterator iter = slo_parts.upper_bound(ofs);
+ if (iter != slo_parts.begin()) {
+ --iter;
+ }
+
+ uint64_t obj_ofs = iter->first;
+
+ for (; iter != slo_parts.end() && !found_end; ++iter) {
+ rgw_slo_part& part = iter->second;
+ rgw_bucket_dir_entry ent;
+
+ ent.key.name = part.obj_name;
+ ent.meta.accounted_size = ent.meta.size = part.size;
+ ent.meta.etag = part.etag;
+
+ uint64_t cur_total_len = obj_ofs;
+ uint64_t start_ofs = 0, end_ofs = ent.meta.size - 1;
+
+ if (!found_start && cur_total_len + ent.meta.size > (uint64_t)ofs) {
+ start_ofs = ofs - obj_ofs;
+ found_start = true;
+ }
+
+ obj_ofs += ent.meta.size;
+
+ if (!found_end && obj_ofs > (uint64_t)end) {
+ end_ofs = end - cur_total_len;
+ found_end = true;
+ }
+
+ perfcounter->tinc(l_rgw_get_lat,
+ (ceph_clock_now() - start_time));
+
+ if (found_start) {
+ if (cb) {
+ dout(20) << "iterate_slo_parts()"
+ << " obj=" << part.obj_name
+ << " start_ofs=" << start_ofs
+ << " end_ofs=" << end_ofs
+ << dendl;
+
+ // SLO is a Swift thing, and Swift has no knowledge of S3 Policies.
+ int r = cb(part.bucket, ent, part.bucket_acl,
+ (part.bucket_policy ?
+ boost::optional<Policy>(*part.bucket_policy) : none),
+ start_ofs, end_ofs, cb_param, true /* swift_slo */);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ start_time = ceph_clock_now();
+ }
+
+ return 0;
+}
+
+static int get_obj_user_manifest_iterate_cb(rgw_bucket& bucket,
+ const rgw_bucket_dir_entry& ent,
+ RGWAccessControlPolicy * const bucket_acl,
+ const boost::optional<Policy>& bucket_policy,
+ const off_t start_ofs,
+ const off_t end_ofs,
+ void * const param,
+ bool swift_slo = false)
+{
+ RGWGetObj *op = static_cast<RGWGetObj *>(param);
+ return op->read_user_manifest_part(
+ bucket, ent, bucket_acl, bucket_policy, start_ofs, end_ofs, swift_slo);
+}
+
+int RGWGetObj::handle_user_manifest(const char *prefix)
+{
+ const boost::string_view prefix_view(prefix);
+ ldpp_dout(this, 2) << "RGWGetObj::handle_user_manifest() prefix="
+ << prefix_view << dendl;
+
+ const size_t pos = prefix_view.find('/');
+ if (pos == string::npos) {
+ return -EINVAL;
+ }
+
+ const std::string bucket_name = url_decode(prefix_view.substr(0, pos));
+ const std::string obj_prefix = url_decode(prefix_view.substr(pos + 1));
+
+ rgw_bucket bucket;
+
+ RGWAccessControlPolicy _bucket_acl(s->cct);
+ RGWAccessControlPolicy *bucket_acl;
+ boost::optional<Policy> _bucket_policy;
+ boost::optional<Policy>* bucket_policy;
+ RGWBucketInfo bucket_info;
+ RGWBucketInfo *pbucket_info;
+
+ if (bucket_name.compare(s->bucket.name) != 0) {
+ map<string, bufferlist> bucket_attrs;
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+ int r = store->get_bucket_info(obj_ctx, s->user->user_id.tenant,
+ bucket_name, bucket_info, NULL,
+ &bucket_attrs);
+ if (r < 0) {
+ ldpp_dout(this, 0) << "could not get bucket info for bucket="
+ << bucket_name << dendl;
+ return r;
+ }
+ bucket = bucket_info.bucket;
+ pbucket_info = &bucket_info;
+ bucket_acl = &_bucket_acl;
+ r = read_bucket_policy(store, s, bucket_info, bucket_attrs, bucket_acl, bucket);
+ if (r < 0) {
+ ldpp_dout(this, 0) << "failed to read bucket policy" << dendl;
+ return r;
+ }
+ _bucket_policy = get_iam_policy_from_attr(s->cct, store, bucket_attrs,
+ bucket_info.bucket.tenant);
+ bucket_policy = &_bucket_policy;
+ } else {
+ bucket = s->bucket;
+ pbucket_info = &s->bucket_info;
+ bucket_acl = s->bucket_acl.get();
+ bucket_policy = &s->iam_policy;
+ }
+
+ /* dry run to find out:
+ * - total length (of the parts we are going to send to client),
+ * - overall DLO's content size,
+ * - md5 sum of overall DLO's content (for etag of Swift API). */
+ int r = iterate_user_manifest_parts(s->cct, store, ofs, end,
+ pbucket_info, obj_prefix, bucket_acl, *bucket_policy,
+ nullptr, &s->obj_size, &lo_etag,
+ nullptr /* cb */, nullptr /* cb arg */);
+ if (r < 0) {
+ return r;
+ }
+
+ r = RGWRados::Object::Read::range_to_ofs(s->obj_size, ofs, end);
+ if (r < 0) {
+ return r;
+ }
+
+ r = iterate_user_manifest_parts(s->cct, store, ofs, end,
+ pbucket_info, obj_prefix, bucket_acl, *bucket_policy,
+ &total_len, nullptr, nullptr,
+ nullptr, nullptr);
+ if (r < 0) {
+ return r;
+ }
+
+ if (!get_data) {
+ bufferlist bl;
+ send_response_data(bl, 0, 0);
+ return 0;
+ }
+
+ r = iterate_user_manifest_parts(s->cct, store, ofs, end,
+ pbucket_info, obj_prefix, bucket_acl, *bucket_policy,
+ nullptr, nullptr, nullptr,
+ get_obj_user_manifest_iterate_cb, (void *)this);
+ if (r < 0) {
+ return r;
+ }
+
+ if (!total_len) {
+ bufferlist bl;
+ send_response_data(bl, 0, 0);
+ }
+
+ return 0;
+}
+
+int RGWGetObj::handle_slo_manifest(bufferlist& bl)
+{
+ RGWSLOInfo slo_info;
+ auto bliter = bl.cbegin();
+ try {
+ decode(slo_info, bliter);
+ } catch (buffer::error& err) {
+ ldpp_dout(this, 0) << "ERROR: failed to decode slo manifest" << dendl;
+ return -EIO;
+ }
+ ldpp_dout(this, 2) << "RGWGetObj::handle_slo_manifest()" << dendl;
+
+ vector<RGWAccessControlPolicy> allocated_acls;
+ map<string, pair<RGWAccessControlPolicy *, boost::optional<Policy>>> policies;
+ map<string, rgw_bucket> buckets;
+
+ map<uint64_t, rgw_slo_part> slo_parts;
+
+ MD5 etag_sum;
+ total_len = 0;
+
+ for (const auto& entry : slo_info.entries) {
+ const string& path = entry.path;
+
+ /* If the path starts with slashes, strip them all. */
+ const size_t pos_init = path.find_first_not_of('/');
+ /* According to the documentation of std::string::find following check
+ * is not necessary as we should get the std::string::npos propagation
+ * here. This might be true with the accuracy to implementation's bugs.
+ * See following question on SO:
+ * http://stackoverflow.com/questions/1011790/why-does-stdstring-findtext-stdstringnpos-not-return-npos
+ */
+ if (pos_init == string::npos) {
+ return -EINVAL;
+ }
+
+ const size_t pos_sep = path.find('/', pos_init);
+ if (pos_sep == string::npos) {
+ return -EINVAL;
+ }
+
+ string bucket_name = path.substr(pos_init, pos_sep - pos_init);
+ string obj_name = path.substr(pos_sep + 1);
+
+ rgw_bucket bucket;
+ RGWAccessControlPolicy *bucket_acl;
+ Policy* bucket_policy;
+
+ if (bucket_name.compare(s->bucket.name) != 0) {
+ const auto& piter = policies.find(bucket_name);
+ if (piter != policies.end()) {
+ bucket_acl = piter->second.first;
+ bucket_policy = piter->second.second.get_ptr();
+ bucket = buckets[bucket_name];
+ } else {
+ allocated_acls.push_back(RGWAccessControlPolicy(s->cct));
+ RGWAccessControlPolicy& _bucket_acl = allocated_acls.back();
+
+ RGWBucketInfo bucket_info;
+ map<string, bufferlist> bucket_attrs;
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+ int r = store->get_bucket_info(obj_ctx, s->user->user_id.tenant,
+ bucket_name, bucket_info, nullptr,
+ &bucket_attrs);
+ if (r < 0) {
+ ldpp_dout(this, 0) << "could not get bucket info for bucket="
+ << bucket_name << dendl;
+ return r;
+ }
+ bucket = bucket_info.bucket;
+ bucket_acl = &_bucket_acl;
+ r = read_bucket_policy(store, s, bucket_info, bucket_attrs, bucket_acl,
+ bucket);
+ if (r < 0) {
+ ldpp_dout(this, 0) << "failed to read bucket ACL for bucket "
+ << bucket << dendl;
+ return r;
+ }
+ auto _bucket_policy = get_iam_policy_from_attr(
+ s->cct, store, bucket_attrs, bucket_info.bucket.tenant);
+ bucket_policy = _bucket_policy.get_ptr();
+ buckets[bucket_name] = bucket;
+ policies[bucket_name] = make_pair(bucket_acl, _bucket_policy);
+ }
+ } else {
+ bucket = s->bucket;
+ bucket_acl = s->bucket_acl.get();
+ bucket_policy = s->iam_policy.get_ptr();
+ }
+
+ rgw_slo_part part;
+ part.bucket_acl = bucket_acl;
+ part.bucket_policy = bucket_policy;
+ part.bucket = bucket;
+ part.obj_name = obj_name;
+ part.size = entry.size_bytes;
+ part.etag = entry.etag;
+ ldpp_dout(this, 20) << "slo_part: bucket=" << part.bucket
+ << " obj=" << part.obj_name
+ << " size=" << part.size
+ << " etag=" << part.etag
+ << dendl;
+
+ etag_sum.Update((const unsigned char *)entry.etag.c_str(),
+ entry.etag.length());
+
+ slo_parts[total_len] = part;
+ total_len += part.size;
+ } /* foreach entry */
+
+ complete_etag(etag_sum, &lo_etag);
+
+ s->obj_size = slo_info.total_size;
+ ldpp_dout(this, 20) << "s->obj_size=" << s->obj_size << dendl;
+
+ int r = RGWRados::Object::Read::range_to_ofs(total_len, ofs, end);
+ if (r < 0) {
+ return r;
+ }
+
+ total_len = end - ofs + 1;
+ ldpp_dout(this, 20) << "Requested: ofs=" << ofs
+ << " end=" << end
+ << " total=" << total_len
+ << dendl;
+
+ r = iterate_slo_parts(s->cct, store, ofs, end, slo_parts,
+ get_obj_user_manifest_iterate_cb, (void *)this);
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+int RGWGetObj::get_data_cb(bufferlist& bl, off_t bl_ofs, off_t bl_len)
+{
+ /* garbage collection related handling */
+ utime_t start_time = ceph_clock_now();
+ if (start_time > gc_invalidate_time) {
+ int r = store->defer_gc(s->obj_ctx, s->bucket_info, obj);
+ if (r < 0) {
+ ldpp_dout(this, 0) << "WARNING: could not defer gc entry for obj" << dendl;
+ }
+ gc_invalidate_time = start_time;
+ gc_invalidate_time += (s->cct->_conf->rgw_gc_obj_min_wait / 2);
+ }
+ return send_response_data(bl, bl_ofs, bl_len);
+}
+
+bool RGWGetObj::prefetch_data()
+{
+ /* HEAD request, stop prefetch*/
+ if (!get_data) {
+ return false;
+ }
+
+ range_str = s->info.env->get("HTTP_RANGE");
+ // TODO: add range prefetch
+ if (range_str) {
+ parse_range();
+ return false;
+ }
+
+ return get_data;
+}
+
+void RGWGetObj::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+static bool object_is_expired(map<string, bufferlist>& attrs) {
+ map<string, bufferlist>::iterator iter = attrs.find(RGW_ATTR_DELETE_AT);
+ if (iter != attrs.end()) {
+ utime_t delete_at;
+ try {
+ decode(delete_at, iter->second);
+ } catch (buffer::error& err) {
+ dout(0) << "ERROR: " << __func__ << ": failed to decode " RGW_ATTR_DELETE_AT " attr" << dendl;
+ return false;
+ }
+
+ if (delete_at <= ceph_clock_now() && !delete_at.is_zero()) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static inline void rgw_cond_decode_objtags(
+ struct req_state *s,
+ const std::map<std::string, buffer::list> &attrs)
+{
+ const auto& tags = attrs.find(RGW_ATTR_TAGS);
+ if (tags != attrs.end()) {
+ try {
+ bufferlist::const_iterator iter{&tags->second};
+ s->tagset.decode(iter);
+ } catch (buffer::error& err) {
+ ldout(s->cct, 0)
+ << "ERROR: caught buffer::error, couldn't decode TagSet" << dendl;
+ }
+ }
+}
+
+void RGWGetObj::execute()
+{
+ bufferlist bl;
+ gc_invalidate_time = ceph_clock_now();
+ gc_invalidate_time += (s->cct->_conf->rgw_gc_obj_min_wait / 2);
+
+ bool need_decompress;
+ int64_t ofs_x, end_x;
+
+ RGWGetObj_CB cb(this);
+ RGWGetObj_Filter* filter = (RGWGetObj_Filter *)&cb;
+ boost::optional<RGWGetObj_Decompress> decompress;
+ std::unique_ptr<RGWGetObj_Filter> decrypt;
+ map<string, bufferlist>::iterator attr_iter;
+
+ perfcounter->inc(l_rgw_get);
+
+ RGWRados::Object op_target(store, s->bucket_info, *static_cast<RGWObjectCtx *>(s->obj_ctx), obj);
+ RGWRados::Object::Read read_op(&op_target);
+
+ op_ret = get_params();
+ if (op_ret < 0)
+ goto done_err;
+
+ op_ret = init_common();
+ if (op_ret < 0)
+ goto done_err;
+
+ read_op.conds.mod_ptr = mod_ptr;
+ read_op.conds.unmod_ptr = unmod_ptr;
+ read_op.conds.high_precision_time = s->system_request; /* system request need to use high precision time */
+ read_op.conds.mod_zone_id = mod_zone_id;
+ read_op.conds.mod_pg_ver = mod_pg_ver;
+ read_op.conds.if_match = if_match;
+ read_op.conds.if_nomatch = if_nomatch;
+ read_op.params.attrs = &attrs;
+ read_op.params.lastmod = &lastmod;
+ read_op.params.obj_size = &s->obj_size;
+
+ op_ret = read_op.prepare();
+ if (op_ret < 0)
+ goto done_err;
+ version_id = read_op.state.obj.key.instance;
+
+ /* STAT ops don't need data, and do no i/o */
+ if (get_type() == RGW_OP_STAT_OBJ) {
+ return;
+ }
+
+ /* start gettorrent */
+ if (torrent.get_flag())
+ {
+ attr_iter = attrs.find(RGW_ATTR_CRYPT_MODE);
+ if (attr_iter != attrs.end() && attr_iter->second.to_str() == "SSE-C-AES256") {
+ ldpp_dout(this, 0) << "ERROR: torrents are not supported for objects "
+ "encrypted with SSE-C" << dendl;
+ op_ret = -EINVAL;
+ goto done_err;
+ }
+ torrent.init(s, store);
+ op_ret = torrent.get_torrent_file(read_op, total_len, bl, obj);
+ if (op_ret < 0)
+ {
+ ldpp_dout(this, 0) << "ERROR: failed to get_torrent_file ret= " << op_ret
+ << dendl;
+ goto done_err;
+ }
+ op_ret = send_response_data(bl, 0, total_len);
+ if (op_ret < 0)
+ {
+ ldpp_dout(this, 0) << "ERROR: failed to send_response_data ret= " << op_ret << dendl;
+ goto done_err;
+ }
+ return;
+ }
+ /* end gettorrent */
+
+ op_ret = rgw_compression_info_from_attrset(attrs, need_decompress, cs_info);
+ if (op_ret < 0) {
+ ldpp_dout(s, 0) << "ERROR: failed to decode compression info, cannot decompress" << dendl;
+ goto done_err;
+ }
+ if (need_decompress) {
+ s->obj_size = cs_info.orig_size;
+ decompress.emplace(s->cct, &cs_info, partial_content, filter);
+ filter = &*decompress;
+ }
+
+ attr_iter = attrs.find(RGW_ATTR_USER_MANIFEST);
+ if (attr_iter != attrs.end() && !skip_manifest) {
+ op_ret = handle_user_manifest(attr_iter->second.c_str());
+ if (op_ret < 0) {
+ ldpp_dout(this, 0) << "ERROR: failed to handle user manifest ret="
+ << op_ret << dendl;
+ goto done_err;
+ }
+ return;
+ }
+
+ attr_iter = attrs.find(RGW_ATTR_SLO_MANIFEST);
+ if (attr_iter != attrs.end() && !skip_manifest) {
+ is_slo = true;
+ op_ret = handle_slo_manifest(attr_iter->second);
+ if (op_ret < 0) {
+ ldpp_dout(this, 0) << "ERROR: failed to handle slo manifest ret=" << op_ret
+ << dendl;
+ goto done_err;
+ }
+ return;
+ }
+
+ // for range requests with obj size 0
+ if (range_str && !(s->obj_size)) {
+ total_len = 0;
+ op_ret = -ERANGE;
+ goto done_err;
+ }
+
+ op_ret = read_op.range_to_ofs(s->obj_size, ofs, end);
+ if (op_ret < 0)
+ goto done_err;
+ total_len = (ofs <= end ? end + 1 - ofs : 0);
+
+ /* Check whether the object has expired. Swift API documentation
+ * stands that we should return 404 Not Found in such case. */
+ if (need_object_expiration() && object_is_expired(attrs)) {
+ op_ret = -ENOENT;
+ goto done_err;
+ }
+
+ /* Decode S3 objtags, if any */
+ rgw_cond_decode_objtags(s, attrs);
+
+ start = ofs;
+
+ attr_iter = attrs.find(RGW_ATTR_MANIFEST);
+ op_ret = this->get_decrypt_filter(&decrypt, filter,
+ attr_iter != attrs.end() ? &(attr_iter->second) : nullptr);
+ if (decrypt != nullptr) {
+ filter = decrypt.get();
+ }
+ if (op_ret < 0) {
+ goto done_err;
+ }
+
+ if (!get_data || ofs > end) {
+ send_response_data(bl, 0, 0);
+ return;
+ }
+
+ perfcounter->inc(l_rgw_get_b, end - ofs);
+
+ ofs_x = ofs;
+ end_x = end;
+ filter->fixup_range(ofs_x, end_x);
+ op_ret = read_op.iterate(ofs_x, end_x, filter);
+
+ if (op_ret >= 0)
+ op_ret = filter->flush();
+
+ perfcounter->tinc(l_rgw_get_lat, s->time_elapsed());
+ if (op_ret < 0) {
+ goto done_err;
+ }
+
+ op_ret = send_response_data(bl, 0, 0);
+ if (op_ret < 0) {
+ goto done_err;
+ }
+ return;
+
+done_err:
+ send_response_data_error();
+}
+
+int RGWGetObj::init_common()
+{
+ if (range_str) {
+ /* range parsed error when prefetch */
+ if (!range_parsed) {
+ int r = parse_range();
+ if (r < 0)
+ return r;
+ }
+ }
+ if (if_mod) {
+ if (parse_time(if_mod, &mod_time) < 0)
+ return -EINVAL;
+ mod_ptr = &mod_time;
+ }
+
+ if (if_unmod) {
+ if (parse_time(if_unmod, &unmod_time) < 0)
+ return -EINVAL;
+ unmod_ptr = &unmod_time;
+ }
+
+ return 0;
+}
+
+int RGWListBuckets::verify_permission()
+{
+ rgw::Partition partition = rgw::Partition::aws;
+ rgw::Service service = rgw::Service::s3;
+
+ if (!verify_user_permission(this, s, ARN(partition, service, "", s->user->user_id.tenant, "*"), rgw::IAM::s3ListAllMyBuckets)) {
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+int RGWGetUsage::verify_permission()
+{
+ if (s->auth.identity->is_anonymous()) {
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+void RGWListBuckets::execute()
+{
+ bool done;
+ bool started = false;
+ uint64_t total_count = 0;
+
+ const uint64_t max_buckets = s->cct->_conf->rgw_list_buckets_max_chunk;
+
+ op_ret = get_params();
+ if (op_ret < 0) {
+ goto send_end;
+ }
+
+ if (supports_account_metadata()) {
+ op_ret = rgw_get_user_attrs_by_uid(store, s->user->user_id, attrs);
+ if (op_ret < 0) {
+ goto send_end;
+ }
+ }
+
+ is_truncated = false;
+ do {
+ RGWUserBuckets buckets;
+ uint64_t read_count;
+ if (limit >= 0) {
+ read_count = min(limit - total_count, max_buckets);
+ } else {
+ read_count = max_buckets;
+ }
+
+ op_ret = rgw_read_user_buckets(store, s->user->user_id, buckets,
+ marker, end_marker, read_count,
+ should_get_stats(), &is_truncated,
+ get_default_max());
+ if (op_ret < 0) {
+ /* hmm.. something wrong here.. the user was authenticated, so it
+ should exist */
+ ldpp_dout(this, 10) << "WARNING: failed on rgw_get_user_buckets uid="
+ << s->user->user_id << dendl;
+ break;
+ }
+
+ /* We need to have stats for all our policies - even if a given policy
+ * isn't actually used in a given account. In such situation its usage
+ * stats would be simply full of zeros. */
+ for (const auto& policy : store->svc.zone->get_zonegroup().placement_targets) {
+ policies_stats.emplace(policy.second.name,
+ decltype(policies_stats)::mapped_type());
+ }
+
+ std::map<std::string, RGWBucketEnt>& m = buckets.get_buckets();
+ for (const auto& kv : m) {
+ const auto& bucket = kv.second;
+
+ global_stats.bytes_used += bucket.size;
+ global_stats.bytes_used_rounded += bucket.size_rounded;
+ global_stats.objects_count += bucket.count;
+
+ /* operator[] still can create a new entry for storage policy seen
+ * for first time. */
+ auto& policy_stats = policies_stats[bucket.placement_rule.to_str()];
+ policy_stats.bytes_used += bucket.size;
+ policy_stats.bytes_used_rounded += bucket.size_rounded;
+ policy_stats.buckets_count++;
+ policy_stats.objects_count += bucket.count;
+ }
+ global_stats.buckets_count += m.size();
+ total_count += m.size();
+
+ done = (m.size() < read_count || (limit >= 0 && total_count >= (uint64_t)limit));
+
+ if (!started) {
+ send_response_begin(buckets.count() > 0);
+ started = true;
+ }
+
+ if (!m.empty()) {
+ map<string, RGWBucketEnt>::reverse_iterator riter = m.rbegin();
+ marker = riter->first;
+
+ handle_listing_chunk(std::move(buckets));
+ }
+ } while (is_truncated && !done);
+
+send_end:
+ if (!started) {
+ send_response_begin(false);
+ }
+ send_response_end();
+}
+
+void RGWGetUsage::execute()
+{
+ uint64_t start_epoch = 0;
+ uint64_t end_epoch = (uint64_t)-1;
+ op_ret = get_params();
+ if (op_ret < 0)
+ return;
+
+ if (!start_date.empty()) {
+ op_ret = utime_t::parse_date(start_date, &start_epoch, NULL);
+ if (op_ret < 0) {
+ ldpp_dout(this, 0) << "ERROR: failed to parse start date" << dendl;
+ return;
+ }
+ }
+
+ if (!end_date.empty()) {
+ op_ret = utime_t::parse_date(end_date, &end_epoch, NULL);
+ if (op_ret < 0) {
+ ldpp_dout(this, 0) << "ERROR: failed to parse end date" << dendl;
+ return;
+ }
+ }
+
+ uint32_t max_entries = 1000;
+
+ bool is_truncated = true;
+
+ RGWUsageIter usage_iter;
+
+ while (is_truncated) {
+ op_ret = store->read_usage(s->user->user_id, s->bucket_name, start_epoch, end_epoch, max_entries,
+ &is_truncated, usage_iter, usage);
+
+ if (op_ret == -ENOENT) {
+ op_ret = 0;
+ is_truncated = false;
+ }
+
+ if (op_ret < 0) {
+ return;
+ }
+ }
+
+ op_ret = rgw_user_sync_all_stats(store, s->user->user_id);
+ if (op_ret < 0) {
+ ldpp_dout(this, 0) << "ERROR: failed to sync user stats" << dendl;
+ return;
+ }
+
+ op_ret = rgw_user_get_all_buckets_stats(store, s->user->user_id, buckets_usage);
+ if (op_ret < 0) {
+ ldpp_dout(this, 0) << "ERROR: failed to get user's buckets stats" << dendl;
+ return;
+ }
+
+ string user_str = s->user->user_id.to_str();
+ op_ret = store->cls_user_get_header(user_str, &header);
+ if (op_ret < 0) {
+ ldpp_dout(this, 0) << "ERROR: can't read user header" << dendl;
+ return;
+ }
+
+ return;
+}
+
+int RGWStatAccount::verify_permission()
+{
+ if (!verify_user_permission_no_policy(this, s, RGW_PERM_READ)) {
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+void RGWStatAccount::execute()
+{
+ string marker;
+ bool is_truncated = false;
+ uint64_t max_buckets = s->cct->_conf->rgw_list_buckets_max_chunk;
+ const string *lastmarker;
+
+ do {
+ RGWUserBuckets buckets;
+
+ lastmarker = nullptr;
+ op_ret = rgw_read_user_buckets(store, s->user->user_id, buckets, marker,
+ string(), max_buckets, true, &is_truncated);
+ if (op_ret < 0) {
+ /* hmm.. something wrong here.. the user was authenticated, so it
+ should exist */
+ ldpp_dout(this, 10) << "WARNING: failed on rgw_read_user_buckets uid="
+ << s->user->user_id << " ret=" << op_ret << dendl;
+ break;
+ } else {
+ /* We need to have stats for all our policies - even if a given policy
+ * isn't actually used in a given account. In such situation its usage
+ * stats would be simply full of zeros. */
+ for (const auto& policy : store->svc.zone->get_zonegroup().placement_targets) {
+ policies_stats.emplace(policy.second.name,
+ decltype(policies_stats)::mapped_type());
+ }
+
+ std::map<std::string, RGWBucketEnt>& m = buckets.get_buckets();
+ for (const auto& kv : m) {
+ const auto& bucket = kv.second;
+ lastmarker = &kv.first;
+
+ global_stats.bytes_used += bucket.size;
+ global_stats.bytes_used_rounded += bucket.size_rounded;
+ global_stats.objects_count += bucket.count;
+
+ /* operator[] still can create a new entry for storage policy seen
+ * for first time. */
+ auto& policy_stats = policies_stats[bucket.placement_rule.to_str()];
+ policy_stats.bytes_used += bucket.size;
+ policy_stats.bytes_used_rounded += bucket.size_rounded;
+ policy_stats.buckets_count++;
+ policy_stats.objects_count += bucket.count;
+ }
+ global_stats.buckets_count += m.size();
+
+ }
+ if (!lastmarker) {
+ lderr(s->cct) << "ERROR: rgw_read_user_buckets, stasis at marker="
+ << marker << " uid=" << s->user->user_id << dendl;
+ break;
+ }
+ marker = *lastmarker;
+ } while (is_truncated);
+}
+
+int RGWGetBucketVersioning::verify_permission()
+{
+ return verify_bucket_owner_or_policy(s, rgw::IAM::s3GetBucketVersioning);
+}
+
+void RGWGetBucketVersioning::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+void RGWGetBucketVersioning::execute()
+{
+ if (! s->bucket_exists) {
+ op_ret = -ERR_NO_SUCH_BUCKET;
+ return;
+ }
+
+ versioned = s->bucket_info.versioned();
+ versioning_enabled = s->bucket_info.versioning_enabled();
+ mfa_enabled = s->bucket_info.mfa_enabled();
+}
+
+int RGWSetBucketVersioning::verify_permission()
+{
+ return verify_bucket_owner_or_policy(s, rgw::IAM::s3PutBucketVersioning);
+}
+
+void RGWSetBucketVersioning::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+void RGWSetBucketVersioning::execute()
+{
+ op_ret = get_params();
+ if (op_ret < 0)
+ return;
+
+ if (! s->bucket_exists) {
+ op_ret = -ERR_NO_SUCH_BUCKET;
+ return;
+ }
+
+ if (s->bucket_info.obj_lock_enabled() && versioning_status != VersioningEnabled) {
+ op_ret = -ERR_INVALID_BUCKET_STATE;
+ return;
+ }
+
+ bool cur_mfa_status = (s->bucket_info.flags & BUCKET_MFA_ENABLED) != 0;
+
+ mfa_set_status &= (mfa_status != cur_mfa_status);
+
+ if (mfa_set_status &&
+ !s->mfa_verified) {
+ op_ret = -ERR_MFA_REQUIRED;
+ return;
+ }
+ //if mfa is enabled for bucket, make sure mfa code is validated in case versioned status gets changed
+ if (cur_mfa_status) {
+ bool req_versioning_status = false;
+ //if requested versioning status is not the same as the one set for the bucket, return error
+ if (versioning_status == VersioningEnabled) {
+ req_versioning_status = (s->bucket_info.flags & BUCKET_VERSIONS_SUSPENDED) != 0;
+ } else if (versioning_status == VersioningSuspended) {
+ req_versioning_status = (s->bucket_info.flags & BUCKET_VERSIONS_SUSPENDED) == 0;
+ }
+ if (req_versioning_status && !s->mfa_verified) {
+ op_ret = -ERR_MFA_REQUIRED;
+ return;
+ }
+ }
+
+ if (!store->svc.zone->is_meta_master()) {
+ op_ret = forward_request_to_master(s, NULL, store, in_data, nullptr);
+ if (op_ret < 0) {
+ ldpp_dout(this, 0) << "forward_request_to_master returned ret=" << op_ret << dendl;
+ return;
+ }
+ }
+
+ bool modified = mfa_set_status;
+
+ op_ret = retry_raced_bucket_write(store, s, [&] {
+ if (mfa_set_status) {
+ if (mfa_status) {
+ s->bucket_info.flags |= BUCKET_MFA_ENABLED;
+ } else {
+ s->bucket_info.flags &= ~BUCKET_MFA_ENABLED;
+ }
+ }
+
+ if (versioning_status == VersioningEnabled) {
+ s->bucket_info.flags |= BUCKET_VERSIONED;
+ s->bucket_info.flags &= ~BUCKET_VERSIONS_SUSPENDED;
+ modified = true;
+ } else if (versioning_status == VersioningSuspended) {
+ s->bucket_info.flags |= (BUCKET_VERSIONED | BUCKET_VERSIONS_SUSPENDED);
+ modified = true;
+ } else {
+ return op_ret;
+ }
+ return store->put_bucket_instance_info(s->bucket_info, false, real_time(),
+ &s->bucket_attrs);
+ });
+
+ if (!modified) {
+ return;
+ }
+
+ if (op_ret < 0) {
+ ldpp_dout(this, 0) << "NOTICE: put_bucket_info on bucket=" << s->bucket.name
+ << " returned err=" << op_ret << dendl;
+ return;
+ }
+}
+
+int RGWGetBucketWebsite::verify_permission()
+{
+ return verify_bucket_owner_or_policy(s, rgw::IAM::s3GetBucketWebsite);
+}
+
+void RGWGetBucketWebsite::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+void RGWGetBucketWebsite::execute()
+{
+ if (!s->bucket_info.has_website) {
+ op_ret = -ERR_NO_SUCH_WEBSITE_CONFIGURATION;
+ }
+}
+
+int RGWSetBucketWebsite::verify_permission()
+{
+ return verify_bucket_owner_or_policy(s, rgw::IAM::s3PutBucketWebsite);
+}
+
+void RGWSetBucketWebsite::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+void RGWSetBucketWebsite::execute()
+{
+ op_ret = get_params();
+
+ if (op_ret < 0)
+ return;
+
+ if (!store->svc.zone->is_meta_master()) {
+ op_ret = forward_request_to_master(s, NULL, store, in_data, nullptr);
+ if (op_ret < 0) {
+ ldpp_dout(this, 0) << " forward_request_to_master returned ret=" << op_ret << dendl;
+ return;
+ }
+ }
+
+ op_ret = retry_raced_bucket_write(store, s, [this] {
+ s->bucket_info.has_website = true;
+ s->bucket_info.website_conf = website_conf;
+ op_ret = store->put_bucket_instance_info(s->bucket_info, false,
+ real_time(), &s->bucket_attrs);
+ return op_ret;
+ });
+
+ if (op_ret < 0) {
+ ldpp_dout(this, 0) << "NOTICE: put_bucket_info on bucket=" << s->bucket.name
+ << " returned err=" << op_ret << dendl;
+ return;
+ }
+}
+
+int RGWDeleteBucketWebsite::verify_permission()
+{
+ return verify_bucket_owner_or_policy(s, rgw::IAM::s3DeleteBucketWebsite);
+}
+
+void RGWDeleteBucketWebsite::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+void RGWDeleteBucketWebsite::execute()
+{
+
+ if (!store->svc.zone->is_meta_master()) {
+ bufferlist in_data;
+ op_ret = forward_request_to_master(s, nullptr, store, in_data, nullptr);
+ if (op_ret < 0) {
+ ldpp_dout(this, 0) << "NOTICE: forward_to_master failed on bucket=" << s->bucket.name
+ << "returned err=" << op_ret << dendl;
+ return;
+ }
+ }
+ op_ret = retry_raced_bucket_write(store, s, [this] {
+ s->bucket_info.has_website = false;
+ s->bucket_info.website_conf = RGWBucketWebsiteConf();
+ op_ret = store->put_bucket_instance_info(s->bucket_info, false,
+ real_time(), &s->bucket_attrs);
+ return op_ret;
+ });
+ if (op_ret < 0) {
+ ldpp_dout(this, 0) << "NOTICE: put_bucket_info on bucket=" << s->bucket.name
+ << " returned err=" << op_ret << dendl;
+ return;
+ }
+}
+
+int RGWStatBucket::verify_permission()
+{
+ // This (a HEAD request on a bucket) is governed by the s3:ListBucket permission.
+ if (!verify_bucket_permission(this, s, rgw::IAM::s3ListBucket)) {
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+void RGWStatBucket::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+void RGWStatBucket::execute()
+{
+ if (!s->bucket_exists) {
+ op_ret = -ERR_NO_SUCH_BUCKET;
+ return;
+ }
+
+ RGWUserBuckets buckets;
+ bucket.bucket = s->bucket;
+ buckets.add(bucket);
+ map<string, RGWBucketEnt>& m = buckets.get_buckets();
+ op_ret = store->update_containers_stats(m);
+ if (! op_ret)
+ op_ret = -EEXIST;
+ if (op_ret > 0) {
+ op_ret = 0;
+ map<string, RGWBucketEnt>::iterator iter = m.find(bucket.bucket.name);
+ if (iter != m.end()) {
+ bucket = iter->second;
+ } else {
+ op_ret = -EINVAL;
+ }
+ }
+}
+
+int RGWListBucket::verify_permission()
+{
+ op_ret = get_params();
+ if (op_ret < 0) {
+ return op_ret;
+ }
+ if (!prefix.empty())
+ s->env.emplace("s3:prefix", prefix);
+
+ if (!delimiter.empty())
+ s->env.emplace("s3:delimiter", delimiter);
+
+ s->env.emplace("s3:max-keys", std::to_string(max));
+
+ if (!verify_bucket_permission(this,
+ s,
+ list_versions ?
+ rgw::IAM::s3ListBucketVersions :
+ rgw::IAM::s3ListBucket)) {
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+int RGWListBucket::parse_max_keys()
+{
+ // Bound max value of max-keys to configured value for security
+ // Bound min value of max-keys to '0'
+ // Some S3 clients explicitly send max-keys=0 to detect if the bucket is
+ // empty without listing any items.
+ return parse_value_and_bound(max_keys, max, 0,
+ g_conf().get_val<uint64_t>("rgw_max_listing_results"),
+ default_max);
+}
+
+void RGWListBucket::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+void RGWListBucket::execute()
+{
+ if (!s->bucket_exists) {
+ op_ret = -ERR_NO_SUCH_BUCKET;
+ return;
+ }
+
+ if (allow_unordered && !delimiter.empty()) {
+ ldpp_dout(this, 0) <<
+ "ERROR: unordered bucket listing requested with a delimiter" << dendl;
+ op_ret = -EINVAL;
+ return;
+ }
+
+ if (need_container_stats()) {
+ map<string, RGWBucketEnt> m;
+ m[s->bucket.name] = RGWBucketEnt();
+ m.begin()->second.bucket = s->bucket;
+ op_ret = store->update_containers_stats(m);
+ if (op_ret > 0) {
+ bucket = m.begin()->second;
+ }
+ }
+
+ RGWRados::Bucket target(store, s->bucket_info);
+ if (shard_id >= 0) {
+ target.set_shard_id(shard_id);
+ }
+ RGWRados::Bucket::List list_op(&target);
+
+ list_op.params.prefix = prefix;
+ list_op.params.delim = delimiter;
+ list_op.params.marker = marker;
+ list_op.params.end_marker = end_marker;
+ list_op.params.list_versions = list_versions;
+ list_op.params.allow_unordered = allow_unordered;
+
+ op_ret = list_op.list_objects(max, &objs, &common_prefixes, &is_truncated);
+ if (op_ret >= 0) {
+ next_marker = list_op.get_next_marker();
+ }
+}
+
+int RGWGetBucketLogging::verify_permission()
+{
+ return verify_bucket_owner_or_policy(s, rgw::IAM::s3GetBucketLogging);
+}
+
+int RGWGetBucketLocation::verify_permission()
+{
+ return verify_bucket_owner_or_policy(s, rgw::IAM::s3GetBucketLocation);
+}
+
+int RGWCreateBucket::verify_permission()
+{
+ /* This check is mostly needed for S3 that doesn't support account ACL.
+ * Swift doesn't allow to delegate any permission to an anonymous user,
+ * so it will become an early exit in such case. */
+ if (s->auth.identity->is_anonymous()) {
+ return -EACCES;
+ }
+
+ rgw_bucket bucket;
+ bucket.name = s->bucket_name;
+ bucket.tenant = s->bucket_tenant;
+ ARN arn = ARN(bucket);
+ if (!verify_user_permission(this, s, arn, rgw::IAM::s3CreateBucket)) {
+ return -EACCES;
+ }
+
+ if (s->user->user_id.tenant != s->bucket_tenant) {
+ ldpp_dout(this, 10) << "user cannot create a bucket in a different tenant"
+ << " (user_id.tenant=" << s->user->user_id.tenant
+ << " requested=" << s->bucket_tenant << ")"
+ << dendl;
+ return -EACCES;
+ }
+ if (s->user->max_buckets < 0) {
+ return -EPERM;
+ }
+
+ if (s->user->max_buckets) {
+ RGWUserBuckets buckets;
+ string marker;
+ bool is_truncated = false;
+ op_ret = rgw_read_user_buckets(store, s->user->user_id, buckets,
+ marker, string(), s->user->max_buckets,
+ false, &is_truncated);
+ if (op_ret < 0) {
+ return op_ret;
+ }
+
+ if ((int)buckets.count() >= s->user->max_buckets) {
+ return -ERR_TOO_MANY_BUCKETS;
+ }
+ }
+
+ return 0;
+}
+
+static int forward_request_to_master(struct req_state *s, obj_version *objv,
+ RGWRados *store, bufferlist& in_data,
+ JSONParser *jp, req_info *forward_info)
+{
+ if (!store->svc.zone->get_master_conn()) {
+ ldpp_dout(s, 0) << "rest connection is invalid" << dendl;
+ return -EINVAL;
+ }
+ ldpp_dout(s, 0) << "sending request to master zonegroup" << dendl;
+ bufferlist response;
+ string uid_str = s->user->user_id.to_str();
+#define MAX_REST_RESPONSE (128 * 1024) // we expect a very small response
+ int ret = store->svc.zone->get_master_conn()->forward(uid_str, (forward_info ? *forward_info : s->info),
+ objv, MAX_REST_RESPONSE, &in_data, &response);
+ if (ret < 0)
+ return ret;
+
+ ldpp_dout(s, 20) << "response: " << response.c_str() << dendl;
+ if (jp && !jp->parse(response.c_str(), response.length())) {
+ ldpp_dout(s, 0) << "failed parsing response from master zonegroup" << dendl;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+void RGWCreateBucket::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+static void prepare_add_del_attrs(const map<string, bufferlist>& orig_attrs,
+ map<string, bufferlist>& out_attrs,
+ map<string, bufferlist>& out_rmattrs)
+{
+ for (const auto& kv : orig_attrs) {
+ const string& name = kv.first;
+
+ /* Check if the attr is user-defined metadata item. */
+ if (name.compare(0, sizeof(RGW_ATTR_META_PREFIX) - 1,
+ RGW_ATTR_META_PREFIX) == 0) {
+ /* For the objects all existing meta attrs have to be removed. */
+ out_rmattrs[name] = kv.second;
+ } else if (out_attrs.find(name) == std::end(out_attrs)) {
+ out_attrs[name] = kv.second;
+ }
+ }
+}
+
+/* Fuse resource metadata basing on original attributes in @orig_attrs, set
+ * of _custom_ attribute names to remove in @rmattr_names and attributes in
+ * @out_attrs. Place results in @out_attrs.
+ *
+ * NOTE: it's supposed that all special attrs already present in @out_attrs
+ * will be preserved without any change. Special attributes are those which
+ * names start with RGW_ATTR_META_PREFIX. They're complement to custom ones
+ * used for X-Account-Meta-*, X-Container-Meta-*, X-Amz-Meta and so on. */
+static void prepare_add_del_attrs(const map<string, bufferlist>& orig_attrs,
+ const set<string>& rmattr_names,
+ map<string, bufferlist>& out_attrs)
+{
+ for (const auto& kv : orig_attrs) {
+ const string& name = kv.first;
+
+ /* Check if the attr is user-defined metadata item. */
+ if (name.compare(0, strlen(RGW_ATTR_META_PREFIX),
+ RGW_ATTR_META_PREFIX) == 0) {
+ /* For the buckets all existing meta attrs are preserved,
+ except those that are listed in rmattr_names. */
+ if (rmattr_names.find(name) != std::end(rmattr_names)) {
+ const auto aiter = out_attrs.find(name);
+
+ if (aiter != std::end(out_attrs)) {
+ out_attrs.erase(aiter);
+ }
+ } else {
+ /* emplace() won't alter the map if the key is already present.
+ * This behaviour is fully intensional here. */
+ out_attrs.emplace(kv);
+ }
+ } else if (out_attrs.find(name) == std::end(out_attrs)) {
+ out_attrs[name] = kv.second;
+ }
+ }
+}
+
+
+static void populate_with_generic_attrs(const req_state * const s,
+ map<string, bufferlist>& out_attrs)
+{
+ for (const auto& kv : s->generic_attrs) {
+ bufferlist& attrbl = out_attrs[kv.first];
+ const string& val = kv.second;
+ attrbl.clear();
+ attrbl.append(val.c_str(), val.size() + 1);
+ }
+}
+
+
+static int filter_out_quota_info(std::map<std::string, bufferlist>& add_attrs,
+ const std::set<std::string>& rmattr_names,
+ RGWQuotaInfo& quota,
+ bool * quota_extracted = nullptr)
+{
+ bool extracted = false;
+
+ /* Put new limit on max objects. */
+ auto iter = add_attrs.find(RGW_ATTR_QUOTA_NOBJS);
+ std::string err;
+ if (std::end(add_attrs) != iter) {
+ quota.max_objects =
+ static_cast<int64_t>(strict_strtoll(iter->second.c_str(), 10, &err));
+ if (!err.empty()) {
+ return -EINVAL;
+ }
+ add_attrs.erase(iter);
+ extracted = true;
+ }
+
+ /* Put new limit on bucket (container) size. */
+ iter = add_attrs.find(RGW_ATTR_QUOTA_MSIZE);
+ if (iter != add_attrs.end()) {
+ quota.max_size =
+ static_cast<int64_t>(strict_strtoll(iter->second.c_str(), 10, &err));
+ if (!err.empty()) {
+ return -EINVAL;
+ }
+ add_attrs.erase(iter);
+ extracted = true;
+ }
+
+ for (const auto& name : rmattr_names) {
+ /* Remove limit on max objects. */
+ if (name.compare(RGW_ATTR_QUOTA_NOBJS) == 0) {
+ quota.max_objects = -1;
+ extracted = true;
+ }
+
+ /* Remove limit on max bucket size. */
+ if (name.compare(RGW_ATTR_QUOTA_MSIZE) == 0) {
+ quota.max_size = -1;
+ extracted = true;
+ }
+ }
+
+ /* Swift requries checking on raw usage instead of the 4 KiB rounded one. */
+ quota.check_on_raw = true;
+ quota.enabled = quota.max_size > 0 || quota.max_objects > 0;
+
+ if (quota_extracted) {
+ *quota_extracted = extracted;
+ }
+
+ return 0;
+}
+
+
+static void filter_out_website(std::map<std::string, ceph::bufferlist>& add_attrs,
+ const std::set<std::string>& rmattr_names,
+ RGWBucketWebsiteConf& ws_conf)
+{
+ std::string lstval;
+
+ /* Let's define a mapping between each custom attribute and the memory where
+ * attribute's value should be stored. The memory location is expressed by
+ * a non-const reference. */
+ const auto mapping = {
+ std::make_pair(RGW_ATTR_WEB_INDEX, std::ref(ws_conf.index_doc_suffix)),
+ std::make_pair(RGW_ATTR_WEB_ERROR, std::ref(ws_conf.error_doc)),
+ std::make_pair(RGW_ATTR_WEB_LISTINGS, std::ref(lstval)),
+ std::make_pair(RGW_ATTR_WEB_LIST_CSS, std::ref(ws_conf.listing_css_doc)),
+ std::make_pair(RGW_ATTR_SUBDIR_MARKER, std::ref(ws_conf.subdir_marker))
+ };
+
+ for (const auto& kv : mapping) {
+ const char * const key = kv.first;
+ auto& target = kv.second;
+
+ auto iter = add_attrs.find(key);
+
+ if (std::end(add_attrs) != iter) {
+ /* The "target" is a reference to ws_conf. */
+ target = iter->second.c_str();
+ add_attrs.erase(iter);
+ }
+
+ if (rmattr_names.count(key)) {
+ target = std::string();
+ }
+ }
+
+ if (! lstval.empty()) {
+ ws_conf.listing_enabled = boost::algorithm::iequals(lstval, "true");
+ }
+}
+
+
+void RGWCreateBucket::execute()
+{
+ RGWAccessControlPolicy old_policy(s->cct);
+ buffer::list aclbl;
+ buffer::list corsbl;
+ bool existed;
+ string bucket_name;
+ rgw_make_bucket_entry_name(s->bucket_tenant, s->bucket_name, bucket_name);
+ rgw_raw_obj obj(store->svc.zone->get_zone_params().domain_root, bucket_name);
+ obj_version objv, *pobjv = NULL;
+
+ op_ret = get_params();
+ if (op_ret < 0)
+ return;
+
+ if (!relaxed_region_enforcement &&
+ !location_constraint.empty() &&
+ !store->svc.zone->has_zonegroup_api(location_constraint)) {
+ ldpp_dout(this, 0) << "location constraint (" << location_constraint << ")"
+ << " can't be found." << dendl;
+ op_ret = -ERR_INVALID_LOCATION_CONSTRAINT;
+ s->err.message = "The specified location-constraint is not valid";
+ return;
+ }
+
+ if (!relaxed_region_enforcement && !store->svc.zone->get_zonegroup().is_master_zonegroup() && !location_constraint.empty() &&
+ store->svc.zone->get_zonegroup().api_name != location_constraint) {
+ ldpp_dout(this, 0) << "location constraint (" << location_constraint << ")"
+ << " doesn't match zonegroup" << " (" << store->svc.zone->get_zonegroup().api_name << ")"
+ << dendl;
+ op_ret = -ERR_INVALID_LOCATION_CONSTRAINT;
+ s->err.message = "The specified location-constraint is not valid";
+ return;
+ }
+
+ const auto& zonegroup = store->svc.zone->get_zonegroup();
+ if (!placement_rule.name.empty() &&
+ !zonegroup.placement_targets.count(placement_rule.name)) {
+ ldpp_dout(this, 0) << "placement target (" << placement_rule.name << ")"
+ << " doesn't exist in the placement targets of zonegroup"
+ << " (" << store->svc.zone->get_zonegroup().api_name << ")" << dendl;
+ op_ret = -ERR_INVALID_LOCATION_CONSTRAINT;
+ s->err.message = "The specified placement target does not exist";
+ return;
+ }
+
+ /* we need to make sure we read bucket info, it's not read before for this
+ * specific request */
+ op_ret = store->get_bucket_info(*s->sysobj_ctx, s->bucket_tenant, s->bucket_name,
+ s->bucket_info, nullptr, &s->bucket_attrs);
+ if (op_ret < 0 && op_ret != -ENOENT)
+ return;
+ s->bucket_exists = (op_ret != -ENOENT);
+
+ s->bucket_owner.set_id(s->user->user_id);
+ s->bucket_owner.set_name(s->user->display_name);
+ if (s->bucket_exists) {
+ int r = rgw_op_get_bucket_policy_from_attr(s->cct, store, s->bucket_info,
+ s->bucket_attrs, &old_policy);
+ if (r >= 0) {
+ if (old_policy.get_owner().get_id().compare(s->user->user_id) != 0) {
+ op_ret = -EEXIST;
+ return;
+ }
+ }
+ }
+
+ RGWBucketInfo master_info;
+ rgw_bucket *pmaster_bucket;
+ uint32_t *pmaster_num_shards;
+ real_time creation_time;
+
+ if (!store->svc.zone->is_meta_master()) {
+ JSONParser jp;
+ op_ret = forward_request_to_master(s, NULL, store, in_data, &jp);
+ if (op_ret < 0) {
+ return;
+ }
+
+ JSONDecoder::decode_json("entry_point_object_ver", ep_objv, &jp);
+ JSONDecoder::decode_json("object_ver", objv, &jp);
+ JSONDecoder::decode_json("bucket_info", master_info, &jp);
+ ldpp_dout(this, 20) << "parsed: objv.tag=" << objv.tag << " objv.ver=" << objv.ver << dendl;
+ ldpp_dout(this, 20) << "got creation time: << " << master_info.creation_time << dendl;
+ pmaster_bucket= &master_info.bucket;
+ creation_time = master_info.creation_time;
+ pmaster_num_shards = &master_info.num_shards;
+ pobjv = &objv;
+ obj_lock_enabled = master_info.obj_lock_enabled();
+ } else {
+ pmaster_bucket = NULL;
+ pmaster_num_shards = NULL;
+ }
+
+ string zonegroup_id;
+
+ if (s->system_request) {
+ zonegroup_id = s->info.args.get(RGW_SYS_PARAM_PREFIX "zonegroup");
+ if (zonegroup_id.empty()) {
+ zonegroup_id = store->svc.zone->get_zonegroup().get_id();
+ }
+ } else {
+ zonegroup_id = store->svc.zone->get_zonegroup().get_id();
+ }
+
+ if (s->bucket_exists) {
+ rgw_placement_rule selected_placement_rule;
+ rgw_bucket bucket;
+ bucket.tenant = s->bucket_tenant;
+ bucket.name = s->bucket_name;
+ op_ret = store->svc.zone->select_bucket_placement(*(s->user), zonegroup_id,
+ placement_rule,
+ &selected_placement_rule, nullptr);
+ if (selected_placement_rule != s->bucket_info.placement_rule) {
+ op_ret = -EEXIST;
+ return;
+ }
+ }
+
+ /* Encode special metadata first as we're using std::map::emplace under
+ * the hood. This method will add the new items only if the map doesn't
+ * contain such keys yet. */
+ policy.encode(aclbl);
+ emplace_attr(RGW_ATTR_ACL, std::move(aclbl));
+
+ if (has_cors) {
+ cors_config.encode(corsbl);
+ emplace_attr(RGW_ATTR_CORS, std::move(corsbl));
+ }
+
+ RGWQuotaInfo quota_info;
+ const RGWQuotaInfo * pquota_info = nullptr;
+ if (need_metadata_upload()) {
+ /* It's supposed that following functions WILL NOT change any special
+ * attributes (like RGW_ATTR_ACL) if they are already present in attrs. */
+ op_ret = rgw_get_request_metadata(s->cct, s->info, attrs, false);
+ if (op_ret < 0) {
+ return;
+ }
+ prepare_add_del_attrs(s->bucket_attrs, rmattr_names, attrs);
+ populate_with_generic_attrs(s, attrs);
+
+ op_ret = filter_out_quota_info(attrs, rmattr_names, quota_info);
+ if (op_ret < 0) {
+ return;
+ } else {
+ pquota_info = &quota_info;
+ }
+
+ /* Web site of Swift API. */
+ filter_out_website(attrs, rmattr_names, s->bucket_info.website_conf);
+ s->bucket_info.has_website = !s->bucket_info.website_conf.is_empty();
+ }
+
+ s->bucket.tenant = s->bucket_tenant; /* ignored if bucket exists */
+ s->bucket.name = s->bucket_name;
+
+ /* Handle updates of the metadata for Swift's object versioning. */
+ if (swift_ver_location) {
+ s->bucket_info.swift_ver_location = *swift_ver_location;
+ s->bucket_info.swift_versioning = (! swift_ver_location->empty());
+ }
+ if (obj_lock_enabled) {
+ info.flags = BUCKET_VERSIONED | BUCKET_OBJ_LOCK_ENABLED;
+ }
+
+
+ op_ret = store->create_bucket(*(s->user), s->bucket, zonegroup_id,
+ placement_rule, s->bucket_info.swift_ver_location,
+ pquota_info, attrs,
+ info, pobjv, &ep_objv, creation_time,
+ pmaster_bucket, pmaster_num_shards, true);
+ /* continue if EEXIST and create_bucket will fail below. this way we can
+ * recover from a partial create by retrying it. */
+ ldpp_dout(this, 20) << "rgw_create_bucket returned ret=" << op_ret << " bucket=" << s->bucket << dendl;
+
+ if (op_ret && op_ret != -EEXIST)
+ return;
+
+ existed = (op_ret == -EEXIST);
+
+ if (existed) {
+ /* bucket already existed, might have raced with another bucket creation, or
+ * might be partial bucket creation that never completed. Read existing bucket
+ * info, verify that the reported bucket owner is the current user.
+ * If all is ok then update the user's list of buckets.
+ * Otherwise inform client about a name conflict.
+ */
+ if (info.owner.compare(s->user->user_id) != 0) {
+ op_ret = -EEXIST;
+ return;
+ }
+ s->bucket = info.bucket;
+ }
+
+ op_ret = rgw_link_bucket(store, s->user->user_id, s->bucket,
+ info.creation_time, false);
+ if (op_ret && !existed && op_ret != -EEXIST) {
+ /* if it exists (or previously existed), don't remove it! */
+ op_ret = rgw_unlink_bucket(store, s->user->user_id, s->bucket.tenant,
+ s->bucket.name);
+ if (op_ret < 0) {
+ ldpp_dout(this, 0) << "WARNING: failed to unlink bucket: ret=" << op_ret
+ << dendl;
+ }
+ } else if (op_ret == -EEXIST || (op_ret == 0 && existed)) {
+ op_ret = -ERR_BUCKET_EXISTS;
+ }
+
+ if (need_metadata_upload() && existed) {
+ /* OK, it looks we lost race with another request. As it's required to
+ * handle metadata fusion and upload, the whole operation becomes very
+ * similar in nature to PutMetadataBucket. However, as the attrs may
+ * changed in the meantime, we have to refresh. */
+ short tries = 0;
+ do {
+ RGWBucketInfo binfo;
+ map<string, bufferlist> battrs;
+
+ op_ret = store->get_bucket_info(*s->sysobj_ctx, s->bucket_tenant, s->bucket_name,
+ binfo, nullptr, &battrs);
+ if (op_ret < 0) {
+ return;
+ } else if (binfo.owner.compare(s->user->user_id) != 0) {
+ /* New bucket doesn't belong to the account we're operating on. */
+ op_ret = -EEXIST;
+ return;
+ } else {
+ s->bucket_info = binfo;
+ s->bucket_attrs = battrs;
+ }
+
+ attrs.clear();
+
+ op_ret = rgw_get_request_metadata(s->cct, s->info, attrs, false);
+ if (op_ret < 0) {
+ return;
+ }
+ prepare_add_del_attrs(s->bucket_attrs, rmattr_names, attrs);
+ populate_with_generic_attrs(s, attrs);
+ op_ret = filter_out_quota_info(attrs, rmattr_names, s->bucket_info.quota);
+ if (op_ret < 0) {
+ return;
+ }
+
+ /* Handle updates of the metadata for Swift's object versioning. */
+ if (swift_ver_location) {
+ s->bucket_info.swift_ver_location = *swift_ver_location;
+ s->bucket_info.swift_versioning = (! swift_ver_location->empty());
+ }
+
+ /* Web site of Swift API. */
+ filter_out_website(attrs, rmattr_names, s->bucket_info.website_conf);
+ s->bucket_info.has_website = !s->bucket_info.website_conf.is_empty();
+
+ /* This will also set the quota on the bucket. */
+ op_ret = rgw_bucket_set_attrs(store, s->bucket_info, attrs,
+ &s->bucket_info.objv_tracker);
+ } while (op_ret == -ECANCELED && tries++ < 20);
+
+ /* Restore the proper return code. */
+ if (op_ret >= 0) {
+ op_ret = -ERR_BUCKET_EXISTS;
+ }
+ }
+}
+
+int RGWDeleteBucket::verify_permission()
+{
+ if (!verify_bucket_permission(this, s, rgw::IAM::s3DeleteBucket)) {
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+void RGWDeleteBucket::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+void RGWDeleteBucket::execute()
+{
+ if (s->bucket_name.empty()) {
+ op_ret = -EINVAL;
+ return;
+ }
+
+ if (!s->bucket_exists) {
+ ldpp_dout(this, 0) << "ERROR: bucket " << s->bucket_name << " not found" << dendl;
+ op_ret = -ERR_NO_SUCH_BUCKET;
+ return;
+ }
+ RGWObjVersionTracker ot;
+ ot.read_version = s->bucket_info.ep_objv;
+
+ if (s->system_request) {
+ string tag = s->info.args.get(RGW_SYS_PARAM_PREFIX "tag");
+ string ver_str = s->info.args.get(RGW_SYS_PARAM_PREFIX "ver");
+ if (!tag.empty()) {
+ ot.read_version.tag = tag;
+ uint64_t ver;
+ string err;
+ ver = strict_strtol(ver_str.c_str(), 10, &err);
+ if (!err.empty()) {
+ ldpp_dout(this, 0) << "failed to parse ver param" << dendl;
+ op_ret = -EINVAL;
+ return;
+ }
+ ot.read_version.ver = ver;
+ }
+ }
+
+ op_ret = rgw_bucket_sync_user_stats(store, s->user->user_id, s->bucket_info);
+ if ( op_ret < 0) {
+ ldpp_dout(this, 1) << "WARNING: failed to sync user stats before bucket delete: op_ret= " << op_ret << dendl;
+ }
+
+ op_ret = store->check_bucket_empty(s->bucket_info);
+ if (op_ret < 0) {
+ return;
+ }
+
+ if (!store->svc.zone->is_meta_master()) {
+ bufferlist in_data;
+ op_ret = forward_request_to_master(s, &ot.read_version, store, in_data,
+ NULL);
+ if (op_ret < 0) {
+ if (op_ret == -ENOENT) {
+ /* adjust error, we want to return with NoSuchBucket and not
+ * NoSuchKey */
+ op_ret = -ERR_NO_SUCH_BUCKET;
+ }
+ return;
+ }
+ }
+
+ string prefix, delimiter;
+
+ if (s->prot_flags & RGW_REST_SWIFT) {
+ string path_args;
+ path_args = s->info.args.get("path");
+ if (!path_args.empty()) {
+ if (!delimiter.empty() || !prefix.empty()) {
+ op_ret = -EINVAL;
+ return;
+ }
+ prefix = path_args;
+ delimiter="/";
+ }
+ }
+
+ op_ret = abort_bucket_multiparts(store, s->cct, s->bucket_info, prefix, delimiter);
+
+ if (op_ret < 0) {
+ return;
+ }
+
+ op_ret = store->delete_bucket(s->bucket_info, ot, false);
+
+ if (op_ret == -ECANCELED) {
+ // lost a race, either with mdlog sync or another delete bucket operation.
+ // in either case, we've already called rgw_unlink_bucket()
+ op_ret = 0;
+ return;
+ }
+
+ if (op_ret == 0) {
+ op_ret = rgw_unlink_bucket(store, s->bucket_info.owner, s->bucket.tenant,
+ s->bucket.name, false);
+ if (op_ret < 0) {
+ ldpp_dout(this, 0) << "WARNING: failed to unlink bucket: ret=" << op_ret
+ << dendl;
+ }
+ }
+}
+
+int RGWPutObj::verify_permission()
+{
+ if (! copy_source.empty()) {
+
+ RGWAccessControlPolicy cs_acl(s->cct);
+ boost::optional<Policy> policy;
+ map<string, bufferlist> cs_attrs;
+ rgw_bucket cs_bucket(copy_source_bucket_info.bucket);
+ rgw_obj_key cs_object(copy_source_object_name, copy_source_version_id);
+
+ rgw_obj obj(cs_bucket, cs_object);
+ store->set_atomic(s->obj_ctx, obj);
+ store->set_prefetch_data(s->obj_ctx, obj);
+
+ /* check source object permissions */
+ if (read_obj_policy(store, s, copy_source_bucket_info, cs_attrs, &cs_acl, nullptr,
+ policy, cs_bucket, cs_object) < 0) {
+ return -EACCES;
+ }
+
+ /* admin request overrides permission checks */
+ if (! s->auth.identity->is_admin_of(cs_acl.get_owner().get_id())) {
+ if (policy || ! s->iam_user_policies.empty()) {
+ auto usr_policy_res = Effect::Pass;
+ for (auto& user_policy : s->iam_user_policies) {
+ if (usr_policy_res = user_policy.eval(s->env, *s->auth.identity,
+ cs_object.instance.empty() ?
+ rgw::IAM::s3GetObject :
+ rgw::IAM::s3GetObjectVersion,
+ rgw::ARN(obj)); usr_policy_res == Effect::Deny)
+ return -EACCES;
+ else if (usr_policy_res == Effect::Allow)
+ break;
+ }
+ rgw::IAM::Effect e = Effect::Pass;
+ if (policy) {
+ e = policy->eval(s->env, *s->auth.identity,
+ cs_object.instance.empty() ?
+ rgw::IAM::s3GetObject :
+ rgw::IAM::s3GetObjectVersion,
+ rgw::ARN(obj));
+ }
+ if (e == Effect::Deny) {
+ return -EACCES;
+ } else if (usr_policy_res == Effect::Pass && e == Effect::Pass &&
+ !cs_acl.verify_permission(this, *s->auth.identity, s->perm_mask,
+ RGW_PERM_READ)) {
+ return -EACCES;
+ }
+ } else if (!cs_acl.verify_permission(this, *s->auth.identity, s->perm_mask,
+ RGW_PERM_READ)) {
+ return -EACCES;
+ }
+ }
+ }
+
+ auto op_ret = get_params();
+ if (op_ret < 0) {
+ ldpp_dout(this, 20) << "get_params() returned ret=" << op_ret << dendl;
+ return op_ret;
+ }
+
+ if (s->iam_policy || ! s->iam_user_policies.empty()) {
+ rgw_add_grant_to_iam_environment(s->env, s);
+
+ rgw_add_to_iam_environment(s->env, "s3:x-amz-acl", s->canned_acl);
+
+ if (obj_tags != nullptr && obj_tags->count() > 0){
+ auto tags = obj_tags->get_tags();
+ for (const auto& kv: tags){
+ rgw_add_to_iam_environment(s->env, "s3:RequestObjectTag/"+kv.first, kv.second);
+ }
+ }
+
+ constexpr auto encrypt_attr = "x-amz-server-side-encryption";
+ constexpr auto s3_encrypt_attr = "s3:x-amz-server-side-encryption";
+ auto enc_header = s->info.x_meta_map.find(encrypt_attr);
+ if (enc_header != s->info.x_meta_map.end()){
+ rgw_add_to_iam_environment(s->env, s3_encrypt_attr, enc_header->second);
+ }
+
+ constexpr auto kms_attr = "x-amz-server-side-encryption-aws-kms-key-id";
+ constexpr auto s3_kms_attr = "s3:x-amz-server-side-encryption-aws-kms-key-id";
+ auto kms_header = s->info.x_meta_map.find(kms_attr);
+ if (kms_header != s->info.x_meta_map.end()){
+ rgw_add_to_iam_environment(s->env, s3_kms_attr, kms_header->second);
+ }
+
+ auto usr_policy_res = eval_user_policies(s->iam_user_policies, s->env,
+ boost::none,
+ rgw::IAM::s3PutObject,
+ rgw_obj(s->bucket, s->object));
+ if (usr_policy_res == Effect::Deny)
+ return -EACCES;
+
+ rgw::IAM::Effect e = Effect::Pass;
+ if (s->iam_policy) {
+ e = s->iam_policy->eval(s->env, *s->auth.identity,
+ rgw::IAM::s3PutObject,
+ rgw_obj(s->bucket, s->object));
+ }
+ if (e == Effect::Allow) {
+ return 0;
+ } else if (e == Effect::Deny) {
+ return -EACCES;
+ } else if (usr_policy_res == Effect::Allow) {
+ return 0;
+ }
+ }
+
+ if (!verify_bucket_permission_no_policy(this, s, RGW_PERM_WRITE)) {
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+
+void RGWPutObj::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+class RGWPutObj_CB : public RGWGetObj_Filter
+{
+ RGWPutObj *op;
+public:
+ explicit RGWPutObj_CB(RGWPutObj *_op) : op(_op) {}
+ ~RGWPutObj_CB() override {}
+
+ int handle_data(bufferlist& bl, off_t bl_ofs, off_t bl_len) override {
+ return op->get_data_cb(bl, bl_ofs, bl_len);
+ }
+};
+
+int RGWPutObj::get_data_cb(bufferlist& bl, off_t bl_ofs, off_t bl_len)
+{
+ bufferlist bl_tmp;
+ bl.copy(bl_ofs, bl_len, bl_tmp);
+
+ bl_aux.append(bl_tmp);
+
+ return bl_len;
+}
+
+int RGWPutObj::get_data(const off_t fst, const off_t lst, bufferlist& bl)
+{
+ RGWPutObj_CB cb(this);
+ RGWGetObj_Filter* filter = &cb;
+ boost::optional<RGWGetObj_Decompress> decompress;
+ std::unique_ptr<RGWGetObj_Filter> decrypt;
+ RGWCompressionInfo cs_info;
+ map<string, bufferlist> attrs;
+ map<string, bufferlist>::iterator attr_iter;
+ int ret = 0;
+
+ uint64_t obj_size;
+ int64_t new_ofs, new_end;
+
+ new_ofs = fst;
+ new_end = lst;
+
+ rgw_obj_key obj_key(copy_source_object_name, copy_source_version_id);
+ rgw_obj obj(copy_source_bucket_info.bucket, obj_key);
+
+ RGWRados::Object op_target(store, copy_source_bucket_info, *static_cast<RGWObjectCtx *>(s->obj_ctx), obj);
+ RGWRados::Object::Read read_op(&op_target);
+ read_op.params.obj_size = &obj_size;
+ read_op.params.attrs = &attrs;
+
+ ret = read_op.prepare();
+ if (ret < 0)
+ return ret;
+
+ bool need_decompress;
+ op_ret = rgw_compression_info_from_attrset(attrs, need_decompress, cs_info);
+ if (op_ret < 0) {
+ ldpp_dout(s, 0) << "ERROR: failed to decode compression info" << dendl;
+ return -EIO;
+ }
+
+ bool partial_content = true;
+ if (need_decompress)
+ {
+ obj_size = cs_info.orig_size;
+ decompress.emplace(s->cct, &cs_info, partial_content, filter);
+ filter = &*decompress;
+ }
+
+ attr_iter = attrs.find(RGW_ATTR_MANIFEST);
+ op_ret = this->get_decrypt_filter(&decrypt,
+ filter,
+ attrs,
+ attr_iter != attrs.end() ? &(attr_iter->second) : nullptr);
+ if (decrypt != nullptr) {
+ filter = decrypt.get();
+ }
+ if (op_ret < 0) {
+ return ret;
+ }
+
+ ret = read_op.range_to_ofs(obj_size, new_ofs, new_end);
+ if (ret < 0)
+ return ret;
+
+ filter->fixup_range(new_ofs, new_end);
+ ret = read_op.iterate(new_ofs, new_end, filter);
+
+ if (ret >= 0)
+ ret = filter->flush();
+
+ bl.claim_append(bl_aux);
+
+ return ret;
+}
+
+// special handling for compression type = "random" with multipart uploads
+static CompressorRef get_compressor_plugin(const req_state *s,
+ const std::string& compression_type)
+{
+ if (compression_type != "random") {
+ return Compressor::create(s->cct, compression_type);
+ }
+
+ bool is_multipart{false};
+ const auto& upload_id = s->info.args.get("uploadId", &is_multipart);
+
+ if (!is_multipart) {
+ return Compressor::create(s->cct, compression_type);
+ }
+
+ // use a hash of the multipart upload id so all parts use the same plugin
+ const auto alg = std::hash<std::string>{}(upload_id) % Compressor::COMP_ALG_LAST;
+ if (alg == Compressor::COMP_ALG_NONE) {
+ return nullptr;
+ }
+ return Compressor::create(s->cct, alg);
+}
+
+void RGWPutObj::execute()
+{
+ char supplied_md5_bin[CEPH_CRYPTO_MD5_DIGESTSIZE + 1];
+ char supplied_md5[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 1];
+ char calc_md5[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 1];
+ unsigned char m[CEPH_CRYPTO_MD5_DIGESTSIZE];
+ MD5 hash;
+ bufferlist bl, aclbl, bs;
+ int len;
+
+ off_t fst;
+ off_t lst;
+
+ bool need_calc_md5 = (dlo_manifest == NULL) && (slo_info == NULL);
+ perfcounter->inc(l_rgw_put);
+ // report latency on return
+ auto put_lat = make_scope_guard([&] {
+ perfcounter->tinc(l_rgw_put_lat, s->time_elapsed());
+ });
+
+ op_ret = -EINVAL;
+ if (s->object.empty()) {
+ return;
+ }
+
+ if (!s->bucket_exists) {
+ op_ret = -ERR_NO_SUCH_BUCKET;
+ return;
+ }
+
+
+ op_ret = get_system_versioning_params(s, &olh_epoch, &version_id);
+ if (op_ret < 0) {
+ ldpp_dout(this, 20) << "get_system_versioning_params() returned ret="
+ << op_ret << dendl;
+ return;
+ }
+
+ if (supplied_md5_b64) {
+ need_calc_md5 = true;
+
+ ldpp_dout(this, 15) << "supplied_md5_b64=" << supplied_md5_b64 << dendl;
+ op_ret = ceph_unarmor(supplied_md5_bin, &supplied_md5_bin[CEPH_CRYPTO_MD5_DIGESTSIZE + 1],
+ supplied_md5_b64, supplied_md5_b64 + strlen(supplied_md5_b64));
+ ldpp_dout(this, 15) << "ceph_armor ret=" << op_ret << dendl;
+ if (op_ret != CEPH_CRYPTO_MD5_DIGESTSIZE) {
+ op_ret = -ERR_INVALID_DIGEST;
+ return;
+ }
+
+ buf_to_hex((const unsigned char *)supplied_md5_bin, CEPH_CRYPTO_MD5_DIGESTSIZE, supplied_md5);
+ ldpp_dout(this, 15) << "supplied_md5=" << supplied_md5 << dendl;
+ }
+
+ if (!chunked_upload) { /* with chunked upload we don't know how big is the upload.
+ we also check sizes at the end anyway */
+ op_ret = store->check_quota(s->bucket_owner.get_id(), s->bucket,
+ user_quota, bucket_quota, s->content_length);
+ if (op_ret < 0) {
+ ldpp_dout(this, 20) << "check_quota() returned ret=" << op_ret << dendl;
+ return;
+ }
+ op_ret = store->check_bucket_shards(s->bucket_info, s->bucket, bucket_quota);
+ if (op_ret < 0) {
+ ldpp_dout(this, 20) << "check_bucket_shards() returned ret=" << op_ret << dendl;
+ return;
+ }
+ }
+
+ if (supplied_etag) {
+ strncpy(supplied_md5, supplied_etag, sizeof(supplied_md5) - 1);
+ supplied_md5[sizeof(supplied_md5) - 1] = '\0';
+ }
+
+ const bool multipart = !multipart_upload_id.empty();
+ auto& obj_ctx = *static_cast<RGWObjectCtx*>(s->obj_ctx);
+ rgw_obj obj{s->bucket, s->object};
+
+ /* Handle object versioning of Swift API. */
+ if (! multipart) {
+ op_ret = store->swift_versioning_copy(obj_ctx,
+ s->bucket_owner.get_id(),
+ s->bucket_info,
+ obj);
+ if (op_ret < 0) {
+ return;
+ }
+ }
+
+ // create the object processor
+ rgw::AioThrottle aio(store->ctx()->_conf->rgw_put_obj_min_window_size);
+ using namespace rgw::putobj;
+ constexpr auto max_processor_size = std::max({sizeof(MultipartObjectProcessor),
+ sizeof(AtomicObjectProcessor),
+ sizeof(AppendObjectProcessor)});
+ ceph::static_ptr<ObjectProcessor, max_processor_size> processor;
+
+ rgw_placement_rule *pdest_placement;
+
+ multipart_upload_info upload_info;
+ if (multipart) {
+ RGWMPObj mp(s->object.name, multipart_upload_id);
+
+ op_ret = get_multipart_info(store, s, mp.get_meta(), nullptr, nullptr, &upload_info);
+ if (op_ret < 0) {
+ if (op_ret != -ENOENT) {
+ ldpp_dout(this, 0) << "ERROR: get_multipart_info returned " << op_ret << ": " << cpp_strerror(-op_ret) << dendl;
+ } else {// -ENOENT: raced with upload complete/cancel, no need to spam log
+ ldpp_dout(this, 20) << "failed to get multipart info (returned " << op_ret << ": " << cpp_strerror(-op_ret) << "): probably raced with upload complete / cancel" << dendl;
+ }
+ return;
+ }
+ pdest_placement = &upload_info.dest_placement;
+ ldpp_dout(this, 20) << "dest_placement for part=" << upload_info.dest_placement << dendl;
+ processor.emplace<MultipartObjectProcessor>(
+ &aio, store, s->bucket_info, pdest_placement,
+ s->owner.get_id(), obj_ctx, obj,
+ multipart_upload_id, multipart_part_num, multipart_part_str);
+ } else if(append) {
+ if (s->bucket_info.versioned()) {
+ op_ret = -ERR_INVALID_BUCKET_STATE;
+ return;
+ }
+ pdest_placement = &s->dest_placement;
+ processor.emplace<AppendObjectProcessor>(
+ &aio, store, s->bucket_info, pdest_placement, s->bucket_owner.get_id(),obj_ctx, obj,
+ s->req_id, position, &cur_accounted_size);
+ } else {
+ if (s->bucket_info.versioning_enabled()) {
+ if (!version_id.empty()) {
+ obj.key.set_instance(version_id);
+ } else {
+ store->gen_rand_obj_instance_name(&obj);
+ version_id = obj.key.instance;
+ }
+ }
+ pdest_placement = &s->dest_placement;
+ processor.emplace<AtomicObjectProcessor>(
+ &aio, store, s->bucket_info, pdest_placement,
+ s->bucket_owner.get_id(), obj_ctx, obj, olh_epoch, s->req_id);
+ }
+
+ op_ret = processor->prepare();
+ if (op_ret < 0) {
+ ldpp_dout(this, 20) << "processor->prepare() returned ret=" << op_ret
+ << dendl;
+ return;
+ }
+
+ if ((! copy_source.empty()) && !copy_source_range) {
+ rgw_obj_key obj_key(copy_source_object_name, copy_source_version_id);
+ rgw_obj obj(copy_source_bucket_info.bucket, obj_key.name);
+
+ RGWObjState *astate;
+ op_ret = store->get_obj_state(&obj_ctx, copy_source_bucket_info, obj,
+ &astate, true, false);
+ if (op_ret < 0) {
+ ldpp_dout(this, 0) << "ERROR: get copy source obj state returned with error" << op_ret << dendl;
+ return;
+ }
+ if (!astate->exists){
+ op_ret = -ENOENT;
+ return;
+ }
+ lst = astate->accounted_size - 1;
+ } else {
+ lst = copy_source_range_lst;
+ }
+
+ fst = copy_source_range_fst;
+
+ // no filters by default
+ DataProcessor *filter = processor.get();
+
+ const auto& compression_type = store->svc.zone->get_zone_params().get_compression_type(*pdest_placement);
+ CompressorRef plugin;
+ boost::optional<RGWPutObj_Compress> compressor;
+
+ std::unique_ptr<DataProcessor> encrypt;
+
+ if (!append) { // compression and encryption only apply to full object uploads
+ op_ret = get_encrypt_filter(&encrypt, filter);
+ if (op_ret < 0) {
+ return;
+ }
+ if (encrypt != nullptr) {
+ filter = &*encrypt;
+ } else if (compression_type != "none") {
+ plugin = get_compressor_plugin(s, compression_type);
+ if (!plugin) {
+ ldpp_dout(this, 1) << "Cannot load plugin for compression type "
+ << compression_type << dendl;
+ } else {
+ compressor.emplace(s->cct, plugin, filter);
+ filter = &*compressor;
+ }
+ }
+ }
+ tracepoint(rgw_op, before_data_transfer, s->req_id.c_str());
+ do {
+ bufferlist data;
+ if (fst > lst)
+ break;
+ if (copy_source.empty()) {
+ len = get_data(data);
+ } else {
+ uint64_t cur_lst = min(fst + s->cct->_conf->rgw_max_chunk_size - 1, lst);
+ op_ret = get_data(fst, cur_lst, data);
+ if (op_ret < 0)
+ return;
+ len = data.length();
+ s->content_length += len;
+ fst += len;
+ }
+ if (len < 0) {
+ op_ret = len;
+ ldpp_dout(this, 20) << "get_data() returned ret=" << op_ret << dendl;
+ return;
+ } else if (len == 0) {
+ break;
+ }
+
+ if (need_calc_md5) {
+ hash.Update((const unsigned char *)data.c_str(), data.length());
+ }
+
+ /* update torrrent */
+ torrent.update(data);
+
+ op_ret = filter->process(std::move(data), ofs);
+ if (op_ret < 0) {
+ ldpp_dout(this, 20) << "processor->process() returned ret="
+ << op_ret << dendl;
+ return;
+ }
+
+ ofs += len;
+ } while (len > 0);
+ tracepoint(rgw_op, after_data_transfer, s->req_id.c_str(), ofs);
+
+ // flush any data in filters
+ op_ret = filter->process({}, ofs);
+ if (op_ret < 0) {
+ return;
+ }
+
+ if (!chunked_upload && ofs != s->content_length) {
+ op_ret = -ERR_REQUEST_TIMEOUT;
+ return;
+ }
+ s->obj_size = ofs;
+
+ perfcounter->inc(l_rgw_put_b, s->obj_size);
+
+ op_ret = do_aws4_auth_completion();
+ if (op_ret < 0) {
+ return;
+ }
+
+ op_ret = store->check_quota(s->bucket_owner.get_id(), s->bucket,
+ user_quota, bucket_quota, s->obj_size);
+ if (op_ret < 0) {
+ ldpp_dout(this, 20) << "second check_quota() returned op_ret=" << op_ret << dendl;
+ return;
+ }
+
+ op_ret = store->check_bucket_shards(s->bucket_info, s->bucket, bucket_quota);
+ if (op_ret < 0) {
+ ldpp_dout(this, 20) << "check_bucket_shards() returned ret=" << op_ret << dendl;
+ return;
+ }
+
+ hash.Final(m);
+
+ if (compressor && compressor->is_compressed()) {
+ bufferlist tmp;
+ RGWCompressionInfo cs_info;
+ cs_info.compression_type = plugin->get_type_name();
+ cs_info.orig_size = s->obj_size;
+ cs_info.blocks = move(compressor->get_compression_blocks());
+ encode(cs_info, tmp);
+ attrs[RGW_ATTR_COMPRESSION] = tmp;
+ ldpp_dout(this, 20) << "storing " << RGW_ATTR_COMPRESSION
+ << " with type=" << cs_info.compression_type
+ << ", orig_size=" << cs_info.orig_size
+ << ", blocks=" << cs_info.blocks.size() << dendl;
+ }
+
+ buf_to_hex(m, CEPH_CRYPTO_MD5_DIGESTSIZE, calc_md5);
+
+ etag = calc_md5;
+
+ if (supplied_md5_b64 && strcmp(calc_md5, supplied_md5)) {
+ op_ret = -ERR_BAD_DIGEST;
+ return;
+ }
+
+ policy.encode(aclbl);
+ emplace_attr(RGW_ATTR_ACL, std::move(aclbl));
+
+ if (dlo_manifest) {
+ op_ret = encode_dlo_manifest_attr(dlo_manifest, attrs);
+ if (op_ret < 0) {
+ ldpp_dout(this, 0) << "bad user manifest: " << dlo_manifest << dendl;
+ return;
+ }
+ }
+
+ if (slo_info) {
+ bufferlist manifest_bl;
+ encode(*slo_info, manifest_bl);
+ emplace_attr(RGW_ATTR_SLO_MANIFEST, std::move(manifest_bl));
+ }
+
+ if (supplied_etag && etag.compare(supplied_etag) != 0) {
+ op_ret = -ERR_UNPROCESSABLE_ENTITY;
+ return;
+ }
+ bl.append(etag.c_str(), etag.size());
+ emplace_attr(RGW_ATTR_ETAG, std::move(bl));
+
+ populate_with_generic_attrs(s, attrs);
+ op_ret = rgw_get_request_metadata(s->cct, s->info, attrs);
+ if (op_ret < 0) {
+ return;
+ }
+ encode_delete_at_attr(delete_at, attrs);
+ encode_obj_tags_attr(obj_tags.get(), attrs);
+ rgw_cond_decode_objtags(s, attrs);
+
+ /* Add a custom metadata to expose the information whether an object
+ * is an SLO or not. Appending the attribute must be performed AFTER
+ * processing any input from user in order to prohibit overwriting. */
+ if (slo_info) {
+ bufferlist slo_userindicator_bl;
+ slo_userindicator_bl.append("True", 4);
+ emplace_attr(RGW_ATTR_SLO_UINDICATOR, std::move(slo_userindicator_bl));
+ }
+ if (obj_legal_hold) {
+ bufferlist obj_legal_hold_bl;
+ obj_legal_hold->encode(obj_legal_hold_bl);
+ emplace_attr(RGW_ATTR_OBJECT_LEGAL_HOLD, std::move(obj_legal_hold_bl));
+ }
+ if (obj_retention) {
+ bufferlist obj_retention_bl;
+ obj_retention->encode(obj_retention_bl);
+ emplace_attr(RGW_ATTR_OBJECT_RETENTION, std::move(obj_retention_bl));
+ }
+
+ tracepoint(rgw_op, processor_complete_enter, s->req_id.c_str());
+ op_ret = processor->complete(s->obj_size, etag, &mtime, real_time(), attrs,
+ (delete_at ? *delete_at : real_time()), if_match, if_nomatch,
+ (user_data.empty() ? nullptr : &user_data), nullptr, nullptr);
+ tracepoint(rgw_op, processor_complete_exit, s->req_id.c_str());
+
+ /* produce torrent */
+ if (s->cct->_conf->rgw_torrent_flag && (ofs == torrent.get_data_len()))
+ {
+ torrent.init(s, store);
+ torrent.set_create_date(mtime);
+ op_ret = torrent.complete();
+ if (0 != op_ret)
+ {
+ ldpp_dout(this, 0) << "ERROR: torrent.handle_data() returned " << op_ret << dendl;
+ return;
+ }
+ }
+
+ // send request to notification manager
+ const auto ret = rgw::notify::publish(s, obj.key, s->obj_size, mtime, etag, rgw::notify::ObjectCreatedPut, store);
+ if (ret < 0) {
+ ldpp_dout(this, 5) << "WARNING: publishing notification failed, with error: " << ret << dendl;
+ // TODO: we should have conf to make send a blocking coroutine and reply with error in case sending failed
+ // this should be global conf (probably returnign a different handler)
+ // so we don't need to read the configured values before we perform it
+ }
+}
+
+int RGWPostObj::verify_permission()
+{
+ return 0;
+}
+
+void RGWPostObj::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+void RGWPostObj::execute()
+{
+ boost::optional<RGWPutObj_Compress> compressor;
+ CompressorRef plugin;
+ char supplied_md5[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 1];
+
+ /* Read in the data from the POST form. */
+ op_ret = get_params();
+ if (op_ret < 0) {
+ return;
+ }
+
+ op_ret = verify_params();
+ if (op_ret < 0) {
+ return;
+ }
+
+ if (s->iam_policy || ! s->iam_user_policies.empty()) {
+ auto usr_policy_res = eval_user_policies(s->iam_user_policies, s->env,
+ boost::none,
+ rgw::IAM::s3PutObject,
+ rgw_obj(s->bucket, s->object));
+ if (usr_policy_res == Effect::Deny) {
+ op_ret = -EACCES;
+ return;
+ }
+
+ rgw::IAM::Effect e = Effect::Pass;
+ if (s->iam_policy) {
+ e = s->iam_policy->eval(s->env, *s->auth.identity,
+ rgw::IAM::s3PutObject,
+ rgw_obj(s->bucket, s->object));
+ }
+ if (e == Effect::Deny) {
+ op_ret = -EACCES;
+ return;
+ } else if (usr_policy_res == Effect::Pass && e == Effect::Pass && !verify_bucket_permission_no_policy(this, s, RGW_PERM_WRITE)) {
+ op_ret = -EACCES;
+ return;
+ }
+ } else if (!verify_bucket_permission_no_policy(this, s, RGW_PERM_WRITE)) {
+ op_ret = -EACCES;
+ return;
+ }
+
+ /* Start iteration over data fields. It's necessary as Swift's FormPost
+ * is capable to handle multiple files in single form. */
+ do {
+ char calc_md5[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 1];
+ unsigned char m[CEPH_CRYPTO_MD5_DIGESTSIZE];
+ MD5 hash;
+ ceph::buffer::list bl, aclbl;
+ int len = 0;
+
+ op_ret = store->check_quota(s->bucket_owner.get_id(),
+ s->bucket,
+ user_quota,
+ bucket_quota,
+ s->content_length);
+ if (op_ret < 0) {
+ return;
+ }
+
+ op_ret = store->check_bucket_shards(s->bucket_info, s->bucket, bucket_quota);
+ if (op_ret < 0) {
+ return;
+ }
+
+ if (supplied_md5_b64) {
+ char supplied_md5_bin[CEPH_CRYPTO_MD5_DIGESTSIZE + 1];
+ ldpp_dout(this, 15) << "supplied_md5_b64=" << supplied_md5_b64 << dendl;
+ op_ret = ceph_unarmor(supplied_md5_bin, &supplied_md5_bin[CEPH_CRYPTO_MD5_DIGESTSIZE + 1],
+ supplied_md5_b64, supplied_md5_b64 + strlen(supplied_md5_b64));
+ ldpp_dout(this, 15) << "ceph_armor ret=" << op_ret << dendl;
+ if (op_ret != CEPH_CRYPTO_MD5_DIGESTSIZE) {
+ op_ret = -ERR_INVALID_DIGEST;
+ return;
+ }
+
+ buf_to_hex((const unsigned char *)supplied_md5_bin, CEPH_CRYPTO_MD5_DIGESTSIZE, supplied_md5);
+ ldpp_dout(this, 15) << "supplied_md5=" << supplied_md5 << dendl;
+ }
+
+ rgw_obj obj(s->bucket, get_current_filename());
+ if (s->bucket_info.versioning_enabled()) {
+ store->gen_rand_obj_instance_name(&obj);
+ }
+
+ rgw::AioThrottle aio(s->cct->_conf->rgw_put_obj_min_window_size);
+
+ using namespace rgw::putobj;
+ AtomicObjectProcessor processor(&aio, store, s->bucket_info,
+ &s->dest_placement,
+ s->bucket_owner.get_id(),
+ *static_cast<RGWObjectCtx*>(s->obj_ctx),
+ obj, 0, s->req_id);
+ op_ret = processor.prepare();
+ if (op_ret < 0) {
+ return;
+ }
+
+ /* No filters by default. */
+ DataProcessor *filter = &processor;
+
+ std::unique_ptr<DataProcessor> encrypt;
+ op_ret = get_encrypt_filter(&encrypt, filter);
+ if (op_ret < 0) {
+ return;
+ }
+ if (encrypt != nullptr) {
+ filter = encrypt.get();
+ } else {
+ const auto& compression_type = store->svc.zone->get_zone_params().get_compression_type(
+ s->dest_placement);
+ if (compression_type != "none") {
+ plugin = Compressor::create(s->cct, compression_type);
+ if (!plugin) {
+ ldpp_dout(this, 1) << "Cannot load plugin for compression type "
+ << compression_type << dendl;
+ } else {
+ compressor.emplace(s->cct, plugin, filter);
+ filter = &*compressor;
+ }
+ }
+ }
+
+ bool again;
+ do {
+ ceph::bufferlist data;
+ len = get_data(data, again);
+
+ if (len < 0) {
+ op_ret = len;
+ return;
+ }
+
+ if (!len) {
+ break;
+ }
+
+ hash.Update((const unsigned char *)data.c_str(), data.length());
+ op_ret = filter->process(std::move(data), ofs);
+
+ ofs += len;
+
+ if (ofs > max_len) {
+ op_ret = -ERR_TOO_LARGE;
+ return;
+ }
+ } while (again);
+
+ // flush
+ op_ret = filter->process({}, ofs);
+ if (op_ret < 0) {
+ return;
+ }
+
+ if (len < min_len) {
+ op_ret = -ERR_TOO_SMALL;
+ return;
+ }
+
+ s->obj_size = ofs;
+
+
+ op_ret = store->check_quota(s->bucket_owner.get_id(), s->bucket,
+ user_quota, bucket_quota, s->obj_size);
+ if (op_ret < 0) {
+ return;
+ }
+
+ op_ret = store->check_bucket_shards(s->bucket_info, s->bucket, bucket_quota);
+ if (op_ret < 0) {
+ return;
+ }
+
+ hash.Final(m);
+ buf_to_hex(m, CEPH_CRYPTO_MD5_DIGESTSIZE, calc_md5);
+
+ etag = calc_md5;
+
+ if (supplied_md5_b64 && strcmp(calc_md5, supplied_md5)) {
+ op_ret = -ERR_BAD_DIGEST;
+ return;
+ }
+
+ bl.append(etag.c_str(), etag.size());
+ emplace_attr(RGW_ATTR_ETAG, std::move(bl));
+
+ policy.encode(aclbl);
+ emplace_attr(RGW_ATTR_ACL, std::move(aclbl));
+
+ const std::string content_type = get_current_content_type();
+ if (! content_type.empty()) {
+ ceph::bufferlist ct_bl;
+ ct_bl.append(content_type.c_str(), content_type.size() + 1);
+ emplace_attr(RGW_ATTR_CONTENT_TYPE, std::move(ct_bl));
+ }
+
+ if (compressor && compressor->is_compressed()) {
+ ceph::bufferlist tmp;
+ RGWCompressionInfo cs_info;
+ cs_info.compression_type = plugin->get_type_name();
+ cs_info.orig_size = s->obj_size;
+ cs_info.blocks = move(compressor->get_compression_blocks());
+ encode(cs_info, tmp);
+ emplace_attr(RGW_ATTR_COMPRESSION, std::move(tmp));
+ }
+
+ op_ret = processor.complete(s->obj_size, etag, nullptr, real_time(), attrs,
+ (delete_at ? *delete_at : real_time()),
+ nullptr, nullptr, nullptr, nullptr, nullptr);
+ if (op_ret < 0) {
+ return;
+ }
+ } while (is_next_file_to_upload());
+
+ const auto ret = rgw::notify::publish(s, s->object, s->obj_size, ceph::real_clock::now(), etag, rgw::notify::ObjectCreatedPost, store);
+ if (ret < 0) {
+ ldpp_dout(this, 5) << "WARNING: publishing notification failed, with error: " << ret << dendl;
+ // TODO: we should have conf to make send a blocking coroutine and reply with error in case sending failed
+ // this should be global conf (probably returnign a different handler)
+ // so we don't need to read the configured values before we perform it
+ }
+}
+
+
+void RGWPutMetadataAccount::filter_out_temp_url(map<string, bufferlist>& add_attrs,
+ const set<string>& rmattr_names,
+ map<int, string>& temp_url_keys)
+{
+ map<string, bufferlist>::iterator iter;
+
+ iter = add_attrs.find(RGW_ATTR_TEMPURL_KEY1);
+ if (iter != add_attrs.end()) {
+ temp_url_keys[0] = iter->second.c_str();
+ add_attrs.erase(iter);
+ }
+
+ iter = add_attrs.find(RGW_ATTR_TEMPURL_KEY2);
+ if (iter != add_attrs.end()) {
+ temp_url_keys[1] = iter->second.c_str();
+ add_attrs.erase(iter);
+ }
+
+ for (const string& name : rmattr_names) {
+ if (name.compare(RGW_ATTR_TEMPURL_KEY1) == 0) {
+ temp_url_keys[0] = string();
+ }
+ if (name.compare(RGW_ATTR_TEMPURL_KEY2) == 0) {
+ temp_url_keys[1] = string();
+ }
+ }
+}
+
+int RGWPutMetadataAccount::init_processing()
+{
+ /* First, go to the base class. At the time of writing the method was
+ * responsible only for initializing the quota. This isn't necessary
+ * here as we are touching metadata only. I'm putting this call only
+ * for the future. */
+ op_ret = RGWOp::init_processing();
+ if (op_ret < 0) {
+ return op_ret;
+ }
+
+ op_ret = get_params();
+ if (op_ret < 0) {
+ return op_ret;
+ }
+
+ op_ret = rgw_get_user_attrs_by_uid(store, s->user->user_id, orig_attrs,
+ &acct_op_tracker);
+ if (op_ret < 0) {
+ return op_ret;
+ }
+
+ if (has_policy) {
+ bufferlist acl_bl;
+ policy.encode(acl_bl);
+ attrs.emplace(RGW_ATTR_ACL, std::move(acl_bl));
+ }
+
+ op_ret = rgw_get_request_metadata(s->cct, s->info, attrs, false);
+ if (op_ret < 0) {
+ return op_ret;
+ }
+ prepare_add_del_attrs(orig_attrs, rmattr_names, attrs);
+ populate_with_generic_attrs(s, attrs);
+
+ /* Try extract the TempURL-related stuff now to allow verify_permission
+ * evaluate whether we need FULL_CONTROL or not. */
+ filter_out_temp_url(attrs, rmattr_names, temp_url_keys);
+
+ /* The same with quota except a client needs to be reseller admin. */
+ op_ret = filter_out_quota_info(attrs, rmattr_names, new_quota,
+ &new_quota_extracted);
+ if (op_ret < 0) {
+ return op_ret;
+ }
+
+ return 0;
+}
+
+int RGWPutMetadataAccount::verify_permission()
+{
+ if (s->auth.identity->is_anonymous()) {
+ return -EACCES;
+ }
+
+ if (!verify_user_permission_no_policy(this, s, RGW_PERM_WRITE)) {
+ return -EACCES;
+ }
+
+ /* Altering TempURL keys requires FULL_CONTROL. */
+ if (!temp_url_keys.empty() && s->perm_mask != RGW_PERM_FULL_CONTROL) {
+ return -EPERM;
+ }
+
+ /* We are failing this intensionally to allow system user/reseller admin
+ * override in rgw_process.cc. This is the way to specify a given RGWOp
+ * expect extra privileges. */
+ if (new_quota_extracted) {
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+void RGWPutMetadataAccount::execute()
+{
+ /* Params have been extracted earlier. See init_processing(). */
+ RGWUserInfo new_uinfo;
+ op_ret = rgw_get_user_info_by_uid(store, s->user->user_id, new_uinfo,
+ &acct_op_tracker);
+ if (op_ret < 0) {
+ return;
+ }
+
+ /* Handle the TempURL-related stuff. */
+ if (!temp_url_keys.empty()) {
+ for (auto& pair : temp_url_keys) {
+ new_uinfo.temp_url_keys[pair.first] = std::move(pair.second);
+ }
+ }
+
+ /* Handle the quota extracted at the verify_permission step. */
+ if (new_quota_extracted) {
+ new_uinfo.user_quota = std::move(new_quota);
+ }
+
+ /* We are passing here the current (old) user info to allow the function
+ * optimize-out some operations. */
+ op_ret = rgw_store_user_info(store, new_uinfo, s->user,
+ &acct_op_tracker, real_time(), false, &attrs);
+}
+
+int RGWPutMetadataBucket::verify_permission()
+{
+ if (!verify_bucket_permission_no_policy(this, s, RGW_PERM_WRITE)) {
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+void RGWPutMetadataBucket::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+void RGWPutMetadataBucket::execute()
+{
+ op_ret = get_params();
+ if (op_ret < 0) {
+ return;
+ }
+
+ op_ret = rgw_get_request_metadata(s->cct, s->info, attrs, false);
+ if (op_ret < 0) {
+ return;
+ }
+
+ if (!placement_rule.empty() &&
+ placement_rule != s->bucket_info.placement_rule) {
+ op_ret = -EEXIST;
+ return;
+ }
+
+ op_ret = retry_raced_bucket_write(store, s, [this] {
+ /* Encode special metadata first as we're using std::map::emplace under
+ * the hood. This method will add the new items only if the map doesn't
+ * contain such keys yet. */
+ if (has_policy) {
+ if (s->dialect.compare("swift") == 0) {
+ auto old_policy = \
+ static_cast<RGWAccessControlPolicy_SWIFT*>(s->bucket_acl.get());
+ auto new_policy = static_cast<RGWAccessControlPolicy_SWIFT*>(&policy);
+ new_policy->filter_merge(policy_rw_mask, old_policy);
+ policy = *new_policy;
+ }
+ buffer::list bl;
+ policy.encode(bl);
+ emplace_attr(RGW_ATTR_ACL, std::move(bl));
+ }
+
+ if (has_cors) {
+ buffer::list bl;
+ cors_config.encode(bl);
+ emplace_attr(RGW_ATTR_CORS, std::move(bl));
+ }
+
+ /* It's supposed that following functions WILL NOT change any
+ * special attributes (like RGW_ATTR_ACL) if they are already
+ * present in attrs. */
+ prepare_add_del_attrs(s->bucket_attrs, rmattr_names, attrs);
+ populate_with_generic_attrs(s, attrs);
+
+ /* According to the Swift's behaviour and its container_quota
+ * WSGI middleware implementation: anyone with write permissions
+ * is able to set the bucket quota. This stays in contrast to
+ * account quotas that can be set only by clients holding
+ * reseller admin privileges. */
+ op_ret = filter_out_quota_info(attrs, rmattr_names, s->bucket_info.quota);
+ if (op_ret < 0) {
+ return op_ret;
+ }
+
+ if (swift_ver_location) {
+ s->bucket_info.swift_ver_location = *swift_ver_location;
+ s->bucket_info.swift_versioning = (!swift_ver_location->empty());
+ }
+
+ /* Web site of Swift API. */
+ filter_out_website(attrs, rmattr_names, s->bucket_info.website_conf);
+ s->bucket_info.has_website = !s->bucket_info.website_conf.is_empty();
+
+ /* Setting attributes also stores the provided bucket info. Due
+ * to this fact, the new quota settings can be serialized with
+ * the same call. */
+ op_ret = rgw_bucket_set_attrs(store, s->bucket_info, attrs,
+ &s->bucket_info.objv_tracker);
+ return op_ret;
+ });
+}
+
+int RGWPutMetadataObject::verify_permission()
+{
+ // This looks to be something specific to Swift. We could add
+ // operations like swift:PutMetadataObject to the Policy Engine.
+ if (!verify_object_permission_no_policy(this, s, RGW_PERM_WRITE)) {
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+void RGWPutMetadataObject::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+void RGWPutMetadataObject::execute()
+{
+ rgw_obj obj(s->bucket, s->object);
+ rgw_obj target_obj;
+ map<string, bufferlist> attrs, orig_attrs, rmattrs;
+
+ store->set_atomic(s->obj_ctx, obj);
+
+ op_ret = get_params();
+ if (op_ret < 0) {
+ return;
+ }
+
+ op_ret = rgw_get_request_metadata(s->cct, s->info, attrs);
+ if (op_ret < 0) {
+ return;
+ }
+
+ /* check if obj exists, read orig attrs */
+ op_ret = get_obj_attrs(store, s, obj, orig_attrs, &target_obj);
+ if (op_ret < 0) {
+ return;
+ }
+
+ /* Check whether the object has expired. Swift API documentation
+ * stands that we should return 404 Not Found in such case. */
+ if (need_object_expiration() && object_is_expired(orig_attrs)) {
+ op_ret = -ENOENT;
+ return;
+ }
+
+ /* Filter currently existing attributes. */
+ prepare_add_del_attrs(orig_attrs, attrs, rmattrs);
+ populate_with_generic_attrs(s, attrs);
+ encode_delete_at_attr(delete_at, attrs);
+
+ if (dlo_manifest) {
+ op_ret = encode_dlo_manifest_attr(dlo_manifest, attrs);
+ if (op_ret < 0) {
+ ldpp_dout(this, 0) << "bad user manifest: " << dlo_manifest << dendl;
+ return;
+ }
+ }
+
+ op_ret = store->set_attrs(s->obj_ctx, s->bucket_info, target_obj, attrs, &rmattrs);
+}
+
+int RGWDeleteObj::handle_slo_manifest(bufferlist& bl)
+{
+ RGWSLOInfo slo_info;
+ auto bliter = bl.cbegin();
+ try {
+ decode(slo_info, bliter);
+ } catch (buffer::error& err) {
+ ldpp_dout(this, 0) << "ERROR: failed to decode slo manifest" << dendl;
+ return -EIO;
+ }
+
+ try {
+ deleter = std::unique_ptr<RGWBulkDelete::Deleter>(\
+ new RGWBulkDelete::Deleter(this, store, s));
+ } catch (const std::bad_alloc&) {
+ return -ENOMEM;
+ }
+
+ list<RGWBulkDelete::acct_path_t> items;
+ for (const auto& iter : slo_info.entries) {
+ const string& path_str = iter.path;
+
+ const size_t sep_pos = path_str.find('/', 1 /* skip first slash */);
+ if (boost::string_view::npos == sep_pos) {
+ return -EINVAL;
+ }
+
+ RGWBulkDelete::acct_path_t path;
+
+ path.bucket_name = url_decode(path_str.substr(1, sep_pos - 1));
+ path.obj_key = url_decode(path_str.substr(sep_pos + 1));
+
+ items.push_back(path);
+ }
+
+ /* Request removal of the manifest object itself. */
+ RGWBulkDelete::acct_path_t path;
+ path.bucket_name = s->bucket_name;
+ path.obj_key = s->object;
+ items.push_back(path);
+
+ int ret = deleter->delete_chunk(items);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWDeleteObj::verify_permission()
+{
+ int op_ret = get_params();
+ if (op_ret) {
+ return op_ret;
+ }
+ if (s->iam_policy || ! s->iam_user_policies.empty()) {
+ if (s->bucket_info.obj_lock_enabled() && bypass_governance_mode) {
+ auto r = eval_user_policies(s->iam_user_policies, s->env, boost::none,
+ rgw::IAM::s3BypassGovernanceRetention, ARN(s->bucket, s->object.name));
+ if (r == Effect::Deny) {
+ bypass_perm = false;
+ } else if (r == Effect::Pass && s->iam_policy) {
+ r = s->iam_policy->eval(s->env, *s->auth.identity, rgw::IAM::s3BypassGovernanceRetention,
+ ARN(s->bucket, s->object.name));
+ if (r == Effect::Deny) {
+ bypass_perm = false;
+ }
+ }
+ }
+ auto usr_policy_res = eval_user_policies(s->iam_user_policies, s->env,
+ boost::none,
+ s->object.instance.empty() ?
+ rgw::IAM::s3DeleteObject :
+ rgw::IAM::s3DeleteObjectVersion,
+ ARN(s->bucket, s->object.name));
+ if (usr_policy_res == Effect::Deny) {
+ return -EACCES;
+ }
+
+ rgw::IAM::Effect r = Effect::Pass;
+ if (s->iam_policy) {
+ r = s->iam_policy->eval(s->env, *s->auth.identity,
+ s->object.instance.empty() ?
+ rgw::IAM::s3DeleteObject :
+ rgw::IAM::s3DeleteObjectVersion,
+ ARN(s->bucket, s->object.name));
+ }
+ if (r == Effect::Allow)
+ return 0;
+ else if (r == Effect::Deny)
+ return -EACCES;
+ else if (usr_policy_res == Effect::Allow)
+ return 0;
+ }
+
+ if (!verify_bucket_permission_no_policy(this, s, RGW_PERM_WRITE)) {
+ return -EACCES;
+ }
+
+ if (s->bucket_info.mfa_enabled() &&
+ !s->object.instance.empty() &&
+ !s->mfa_verified) {
+ ldpp_dout(this, 5) << "NOTICE: object delete request with a versioned object, mfa auth not provided" << dendl;
+ return -ERR_MFA_REQUIRED;
+ }
+
+ return 0;
+}
+
+void RGWDeleteObj::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+void RGWDeleteObj::execute()
+{
+ if (!s->bucket_exists) {
+ op_ret = -ERR_NO_SUCH_BUCKET;
+ return;
+ }
+
+ rgw_obj obj(s->bucket, s->object);
+ map<string, bufferlist> attrs;
+
+ bool check_obj_lock = obj.key.have_instance() && s->bucket_info.obj_lock_enabled();
+
+ if (!s->object.empty()) {
+ /* check if obj exists, read orig attrs */
+ op_ret = get_obj_attrs(store, s, obj, attrs);
+
+ if (need_object_expiration() || multipart_delete) {
+ if (op_ret < 0) {
+ // failed to get attributes
+ return;
+ }
+ }
+
+ if (check_obj_lock) {
+ if (op_ret < 0) {
+ if (op_ret == -ENOENT) {
+ /* object maybe delete_marker, skip check_obj_lock*/
+ check_obj_lock = false;
+ } else {
+ // failed to get attributes and check_obj_lock is needed
+ return;
+ }
+ }
+ }
+
+ if (check_obj_lock) {
+ auto aiter = attrs.find(RGW_ATTR_OBJECT_RETENTION);
+ if (aiter != attrs.end()) {
+ RGWObjectRetention obj_retention;
+ try {
+ decode(obj_retention, aiter->second);
+ } catch (buffer::error& err) {
+ ldpp_dout(this, 0) << "ERROR: failed to decode RGWObjectRetention" << dendl;
+ op_ret = -EIO;
+ return;
+ }
+ if (ceph::real_clock::to_time_t(obj_retention.get_retain_until_date()) > ceph_clock_now()) {
+ if (obj_retention.get_mode().compare("GOVERNANCE") != 0 || !bypass_perm || !bypass_governance_mode) {
+ op_ret = -EACCES;
+ return;
+ }
+ }
+ }
+ aiter = attrs.find(RGW_ATTR_OBJECT_LEGAL_HOLD);
+ if (aiter != attrs.end()) {
+ RGWObjectLegalHold obj_legal_hold;
+ try {
+ decode(obj_legal_hold, aiter->second);
+ } catch (buffer::error& err) {
+ ldpp_dout(this, 0) << "ERROR: failed to decode RGWObjectLegalHold" << dendl;
+ op_ret = -EIO;
+ return;
+ }
+ if (obj_legal_hold.is_enabled()) {
+ op_ret = -EACCES;
+ return;
+ }
+ }
+ }
+
+ if (multipart_delete) {
+ const auto slo_attr = attrs.find(RGW_ATTR_SLO_MANIFEST);
+
+ if (slo_attr != attrs.end()) {
+ op_ret = handle_slo_manifest(slo_attr->second);
+ if (op_ret < 0) {
+ ldpp_dout(this, 0) << "ERROR: failed to handle slo manifest ret=" << op_ret << dendl;
+ }
+ } else {
+ op_ret = -ERR_NOT_SLO_MANIFEST;
+ }
+
+ return;
+ }
+
+ RGWObjectCtx *obj_ctx = static_cast<RGWObjectCtx *>(s->obj_ctx);
+ obj_ctx->set_atomic(obj);
+
+ bool ver_restored = false;
+ op_ret = store->swift_versioning_restore(*s->sysobj_ctx, *obj_ctx, s->bucket_owner.get_id(),
+ s->bucket_info, obj, ver_restored);
+ if (op_ret < 0) {
+ return;
+ }
+
+ if (!ver_restored) {
+ /* Swift's versioning mechanism hasn't found any previous version of
+ * the object that could be restored. This means we should proceed
+ * with the regular delete path. */
+ RGWRados::Object del_target(store, s->bucket_info, *obj_ctx, obj);
+ RGWRados::Object::Delete del_op(&del_target);
+
+ op_ret = get_system_versioning_params(s, &del_op.params.olh_epoch,
+ &del_op.params.marker_version_id);
+ if (op_ret < 0) {
+ return;
+ }
+
+ del_op.params.bucket_owner = s->bucket_owner.get_id();
+ del_op.params.versioning_status = s->bucket_info.versioning_status();
+ del_op.params.obj_owner = s->owner;
+ del_op.params.unmod_since = unmod_since;
+ del_op.params.high_precision_time = s->system_request; /* system request uses high precision time */
+
+ op_ret = del_op.delete_obj();
+ if (op_ret >= 0) {
+ delete_marker = del_op.result.delete_marker;
+ version_id = del_op.result.version_id;
+ }
+
+ /* Check whether the object has expired. Swift API documentation
+ * stands that we should return 404 Not Found in such case. */
+ if (need_object_expiration() && object_is_expired(attrs)) {
+ op_ret = -ENOENT;
+ return;
+ }
+ }
+
+ if (op_ret == -ECANCELED) {
+ op_ret = 0;
+ }
+ if (op_ret == -ERR_PRECONDITION_FAILED && no_precondition_error) {
+ op_ret = 0;
+ }
+
+ // cache the objects tags and metadata into the requests
+ // so it could be used in the notification mechanism
+ try {
+ populate_tags_in_request(s, attrs);
+ } catch (buffer::error& err) {
+ ldpp_dout(this, 5) << "WARNING: failed to populate delete request with object tags: " << err.what() << dendl;
+ }
+ populate_metadata_in_request(s, attrs);
+ } else {
+ op_ret = -EINVAL;
+ }
+
+ const auto ret = rgw::notify::publish(s, s->object, s->obj_size, ceph::real_clock::now(), attrs[RGW_ATTR_ETAG].to_str(),
+ delete_marker && s->object.instance.empty() ? rgw::notify::ObjectRemovedDeleteMarkerCreated : rgw::notify::ObjectRemovedDelete,
+ store);
+ if (ret < 0) {
+ ldpp_dout(this, 5) << "WARNING: publishing notification failed, with error: " << ret << dendl;
+ // TODO: we should have conf to make send a blocking coroutine and reply with error in case sending failed
+ // this should be global conf (probably returnign a different handler)
+ // so we don't need to read the configured values before we perform it
+ }
+}
+
+bool RGWCopyObj::parse_copy_location(const boost::string_view& url_src,
+ string& bucket_name,
+ rgw_obj_key& key)
+{
+ boost::string_view name_str;
+ boost::string_view params_str;
+
+ // search for ? before url-decoding so we don't accidentally match %3F
+ size_t pos = url_src.find('?');
+ if (pos == string::npos) {
+ name_str = url_src;
+ } else {
+ name_str = url_src.substr(0, pos);
+ params_str = url_src.substr(pos + 1);
+ }
+
+ boost::string_view dec_src{name_str};
+ if (dec_src[0] == '/')
+ dec_src.remove_prefix(1);
+
+ pos = dec_src.find('/');
+ if (pos == string::npos)
+ return false;
+
+ bucket_name = url_decode(dec_src.substr(0, pos));
+ key.name = url_decode(dec_src.substr(pos + 1));
+
+ if (key.name.empty()) {
+ return false;
+ }
+
+ if (! params_str.empty()) {
+ RGWHTTPArgs args;
+ args.set(params_str.to_string());
+ args.parse();
+
+ key.instance = args.get("versionId", NULL);
+ }
+
+ return true;
+}
+
+int RGWCopyObj::verify_permission()
+{
+ RGWAccessControlPolicy src_acl(s->cct);
+ boost::optional<Policy> src_policy;
+ op_ret = get_params();
+ if (op_ret < 0)
+ return op_ret;
+
+ op_ret = get_system_versioning_params(s, &olh_epoch, &version_id);
+ if (op_ret < 0) {
+ return op_ret;
+ }
+ map<string, bufferlist> src_attrs;
+
+ if (s->bucket_instance_id.empty()) {
+ op_ret = store->get_bucket_info(*s->sysobj_ctx, src_tenant_name, src_bucket_name, src_bucket_info, NULL, &src_attrs);
+ } else {
+ /* will only happen in intra region sync where the source and dest bucket is the same */
+ op_ret = store->get_bucket_instance_info(*s->sysobj_ctx, s->bucket_instance_id, src_bucket_info, NULL, &src_attrs);
+ }
+ if (op_ret < 0) {
+ if (op_ret == -ENOENT) {
+ op_ret = -ERR_NO_SUCH_BUCKET;
+ }
+ return op_ret;
+ }
+
+ src_bucket = src_bucket_info.bucket;
+
+ /* get buckets info (source and dest) */
+ if (s->local_source && source_zone.empty()) {
+ rgw_obj src_obj(src_bucket, src_object);
+ store->set_atomic(s->obj_ctx, src_obj);
+ store->set_prefetch_data(s->obj_ctx, src_obj);
+
+ rgw_placement_rule src_placement;
+
+ /* check source object permissions */
+ op_ret = read_obj_policy(store, s, src_bucket_info, src_attrs, &src_acl, &src_placement.storage_class,
+ src_policy, src_bucket, src_object);
+ if (op_ret < 0) {
+ return op_ret;
+ }
+
+ /* follow up on previous checks that required reading source object head */
+ if (need_to_check_storage_class) {
+ src_placement.inherit_from(src_bucket_info.placement_rule);
+
+ op_ret = check_storage_class(src_placement);
+ if (op_ret < 0) {
+ return op_ret;
+ }
+ }
+
+ /* admin request overrides permission checks */
+ if (!s->auth.identity->is_admin_of(src_acl.get_owner().get_id())) {
+ if (src_policy) {
+ auto e = src_policy->eval(s->env, *s->auth.identity,
+ src_object.instance.empty() ?
+ rgw::IAM::s3GetObject :
+ rgw::IAM::s3GetObjectVersion,
+ ARN(src_obj));
+ if (e == Effect::Deny) {
+ return -EACCES;
+ } else if (e == Effect::Pass &&
+ !src_acl.verify_permission(this, *s->auth.identity, s->perm_mask,
+ RGW_PERM_READ)) {
+ return -EACCES;
+ }
+ } else if (!src_acl.verify_permission(this, *s->auth.identity,
+ s->perm_mask,
+ RGW_PERM_READ)) {
+ return -EACCES;
+ }
+ }
+ }
+
+ RGWAccessControlPolicy dest_bucket_policy(s->cct);
+ map<string, bufferlist> dest_attrs;
+
+ if (src_bucket_name.compare(dest_bucket_name) == 0) { /* will only happen if s->local_source
+ or intra region sync */
+ dest_bucket_info = src_bucket_info;
+ dest_attrs = src_attrs;
+ } else {
+ op_ret = store->get_bucket_info(*s->sysobj_ctx, dest_tenant_name, dest_bucket_name,
+ dest_bucket_info, nullptr, &dest_attrs);
+ if (op_ret < 0) {
+ if (op_ret == -ENOENT) {
+ op_ret = -ERR_NO_SUCH_BUCKET;
+ }
+ return op_ret;
+ }
+ }
+
+ dest_bucket = dest_bucket_info.bucket;
+
+ rgw_obj dest_obj(dest_bucket, dest_object);
+ store->set_atomic(s->obj_ctx, dest_obj);
+
+ /* check dest bucket permissions */
+ op_ret = read_bucket_policy(store, s, dest_bucket_info, dest_attrs,
+ &dest_bucket_policy, dest_bucket);
+ if (op_ret < 0) {
+ return op_ret;
+ }
+ auto dest_iam_policy = get_iam_policy_from_attr(s->cct, store, dest_attrs, dest_bucket.tenant);
+ /* admin request overrides permission checks */
+ if (! s->auth.identity->is_admin_of(dest_policy.get_owner().get_id())){
+ if (dest_iam_policy != boost::none) {
+ rgw_add_to_iam_environment(s->env, "s3:x-amz-copy-source", copy_source);
+ if (md_directive)
+ rgw_add_to_iam_environment(s->env, "s3:x-amz-metadata-directive",
+ *md_directive);
+
+ auto e = dest_iam_policy->eval(s->env, *s->auth.identity,
+ rgw::IAM::s3PutObject,
+ ARN(dest_obj));
+ if (e == Effect::Deny) {
+ return -EACCES;
+ } else if (e == Effect::Pass &&
+ ! dest_bucket_policy.verify_permission(this,
+ *s->auth.identity,
+ s->perm_mask,
+ RGW_PERM_WRITE)){
+ return -EACCES;
+ }
+ } else if (! dest_bucket_policy.verify_permission(this, *s->auth.identity, s->perm_mask,
+ RGW_PERM_WRITE)) {
+ return -EACCES;
+ }
+
+ }
+
+ op_ret = init_dest_policy();
+ if (op_ret < 0) {
+ return op_ret;
+ }
+
+ return 0;
+}
+
+
+int RGWCopyObj::init_common()
+{
+ if (if_mod) {
+ if (parse_time(if_mod, &mod_time) < 0) {
+ op_ret = -EINVAL;
+ return op_ret;
+ }
+ mod_ptr = &mod_time;
+ }
+
+ if (if_unmod) {
+ if (parse_time(if_unmod, &unmod_time) < 0) {
+ op_ret = -EINVAL;
+ return op_ret;
+ }
+ unmod_ptr = &unmod_time;
+ }
+
+ bufferlist aclbl;
+ dest_policy.encode(aclbl);
+ emplace_attr(RGW_ATTR_ACL, std::move(aclbl));
+
+ op_ret = rgw_get_request_metadata(s->cct, s->info, attrs);
+ if (op_ret < 0) {
+ return op_ret;
+ }
+ populate_with_generic_attrs(s, attrs);
+
+ return 0;
+}
+
+static void copy_obj_progress_cb(off_t ofs, void *param)
+{
+ RGWCopyObj *op = static_cast<RGWCopyObj *>(param);
+ op->progress_cb(ofs);
+}
+
+void RGWCopyObj::progress_cb(off_t ofs)
+{
+ if (!s->cct->_conf->rgw_copy_obj_progress)
+ return;
+
+ if (ofs - last_ofs < s->cct->_conf->rgw_copy_obj_progress_every_bytes)
+ return;
+
+ send_partial_response(ofs);
+
+ last_ofs = ofs;
+}
+
+void RGWCopyObj::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+void RGWCopyObj::execute()
+{
+ if (init_common() < 0)
+ return;
+
+ rgw_obj src_obj(src_bucket, src_object);
+ rgw_obj dst_obj(dest_bucket, dest_object);
+
+ RGWObjectCtx& obj_ctx = *static_cast<RGWObjectCtx *>(s->obj_ctx);
+ if ( ! version_id.empty()) {
+ dst_obj.key.set_instance(version_id);
+ } else if (dest_bucket_info.versioning_enabled()) {
+ store->gen_rand_obj_instance_name(&dst_obj);
+ }
+
+ obj_ctx.set_atomic(src_obj);
+ obj_ctx.set_atomic(dst_obj);
+
+ encode_delete_at_attr(delete_at, attrs);
+
+ if (!s->system_request) { // no quota enforcement for system requests
+ // get src object size (cached in obj_ctx from verify_permission())
+ RGWObjState* astate = nullptr;
+ op_ret = store->get_obj_state(s->obj_ctx, src_bucket_info, src_obj,
+ &astate, true, false);
+ if (op_ret < 0) {
+ return;
+ }
+ // enforce quota against the destination bucket owner
+ op_ret = store->check_quota(dest_bucket_info.owner,
+ dest_bucket_info.bucket,
+ user_quota, bucket_quota,
+ astate->accounted_size);
+ if (op_ret < 0) {
+ return;
+ }
+ }
+
+ bool high_precision_time = (s->system_request);
+
+ /* Handle object versioning of Swift API. In case of copying to remote this
+ * should fail gently (op_ret == 0) as the dst_obj will not exist here. */
+ op_ret = store->swift_versioning_copy(obj_ctx,
+ dest_bucket_info.owner,
+ dest_bucket_info,
+ dst_obj);
+ if (op_ret < 0) {
+ return;
+ }
+
+ op_ret = store->copy_obj(obj_ctx,
+ s->user->user_id,
+ &s->info,
+ source_zone,
+ dst_obj,
+ src_obj,
+ dest_bucket_info,
+ src_bucket_info,
+ s->dest_placement,
+ &src_mtime,
+ &mtime,
+ mod_ptr,
+ unmod_ptr,
+ high_precision_time,
+ if_match,
+ if_nomatch,
+ attrs_mod,
+ copy_if_newer,
+ attrs, RGWObjCategory::Main,
+ olh_epoch,
+ (delete_at ? *delete_at : real_time()),
+ (version_id.empty() ? NULL : &version_id),
+ &s->req_id, /* use req_id as tag */
+ &etag,
+ copy_obj_progress_cb, (void *)this
+ );
+
+ const auto ret = rgw::notify::publish(s, s->object, s->obj_size, mtime, etag, rgw::notify::ObjectCreatedCopy, store);
+ if (ret < 0) {
+ ldpp_dout(this, 5) << "WARNING: publishing notification failed, with error: " << ret << dendl;
+ // TODO: we should have conf to make send a blocking coroutine and reply with error in case sending failed
+ // this should be global conf (probably returnign a different handler)
+ // so we don't need to read the configured values before we perform it
+ }
+}
+
+int RGWGetACLs::verify_permission()
+{
+ bool perm;
+ if (!s->object.empty()) {
+ auto iam_action = s->object.instance.empty() ?
+ rgw::IAM::s3GetObjectAcl :
+ rgw::IAM::s3GetObjectVersionAcl;
+
+ if (s->iam_policy && s->iam_policy->has_partial_conditional(S3_EXISTING_OBJTAG)){
+ rgw_obj obj = rgw_obj(s->bucket, s->object);
+ rgw_iam_add_existing_objtags(store, s, obj, iam_action);
+ }
+ if (! s->iam_user_policies.empty()) {
+ for (auto& user_policy : s->iam_user_policies) {
+ if (user_policy.has_partial_conditional(S3_EXISTING_OBJTAG)) {
+ rgw_obj obj = rgw_obj(s->bucket, s->object);
+ rgw_iam_add_existing_objtags(store, s, obj, iam_action);
+ }
+ }
+ }
+ perm = verify_object_permission(this, s, iam_action);
+ } else {
+ if (!s->bucket_exists) {
+ return -ERR_NO_SUCH_BUCKET;
+ }
+ perm = verify_bucket_permission(this, s, rgw::IAM::s3GetBucketAcl);
+ }
+ if (!perm)
+ return -EACCES;
+
+ return 0;
+}
+
+void RGWGetACLs::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+void RGWGetACLs::execute()
+{
+ stringstream ss;
+ RGWAccessControlPolicy* const acl = \
+ (!s->object.empty() ? s->object_acl.get() : s->bucket_acl.get());
+ RGWAccessControlPolicy_S3* const s3policy = \
+ static_cast<RGWAccessControlPolicy_S3*>(acl);
+ s3policy->to_xml(ss);
+ acls = ss.str();
+}
+
+
+
+int RGWPutACLs::verify_permission()
+{
+ bool perm;
+
+ rgw_add_to_iam_environment(s->env, "s3:x-amz-acl", s->canned_acl);
+
+ rgw_add_grant_to_iam_environment(s->env, s);
+ if (!s->object.empty()) {
+ auto iam_action = s->object.instance.empty() ? rgw::IAM::s3PutObjectAcl : rgw::IAM::s3PutObjectVersionAcl;
+ auto obj = rgw_obj(s->bucket, s->object);
+ op_ret = rgw_iam_add_existing_objtags(store, s, obj, iam_action);
+ perm = verify_object_permission(this, s, iam_action);
+ } else {
+ perm = verify_bucket_permission(this, s, rgw::IAM::s3PutBucketAcl);
+ }
+ if (!perm)
+ return -EACCES;
+
+ return 0;
+}
+
+int RGWGetLC::verify_permission()
+{
+ bool perm;
+ perm = verify_bucket_permission(this, s, rgw::IAM::s3GetLifecycleConfiguration);
+ if (!perm)
+ return -EACCES;
+
+ return 0;
+}
+
+int RGWPutLC::verify_permission()
+{
+ bool perm;
+ perm = verify_bucket_permission(this, s, rgw::IAM::s3PutLifecycleConfiguration);
+ if (!perm)
+ return -EACCES;
+
+ return 0;
+}
+
+int RGWDeleteLC::verify_permission()
+{
+ bool perm;
+ perm = verify_bucket_permission(this, s, rgw::IAM::s3PutLifecycleConfiguration);
+ if (!perm)
+ return -EACCES;
+
+ return 0;
+}
+
+void RGWPutACLs::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+void RGWGetLC::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+void RGWPutLC::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+void RGWDeleteLC::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+void RGWPutACLs::execute()
+{
+ bufferlist bl;
+
+ RGWAccessControlPolicy_S3 *policy = NULL;
+ RGWACLXMLParser_S3 parser(s->cct);
+ RGWAccessControlPolicy_S3 new_policy(s->cct);
+ stringstream ss;
+ rgw_obj obj;
+
+ op_ret = 0; /* XXX redundant? */
+
+ if (!parser.init()) {
+ op_ret = -EINVAL;
+ return;
+ }
+
+
+ RGWAccessControlPolicy* const existing_policy = \
+ (s->object.empty() ? s->bucket_acl.get() : s->object_acl.get());
+
+ owner = existing_policy->get_owner();
+
+ op_ret = get_params();
+ if (op_ret < 0) {
+ if (op_ret == -ERANGE) {
+ ldpp_dout(this, 4) << "The size of request xml data is larger than the max limitation, data size = "
+ << s->length << dendl;
+ op_ret = -ERR_MALFORMED_XML;
+ s->err.message = "The XML you provided was larger than the maximum " +
+ std::to_string(s->cct->_conf->rgw_max_put_param_size) +
+ " bytes allowed.";
+ }
+ return;
+ }
+
+ char* buf = data.c_str();
+ ldpp_dout(this, 15) << "read len=" << data.length() << " data=" << (buf ? buf : "") << dendl;
+
+ if (!s->canned_acl.empty() && data.length() > 0) {
+ op_ret = -EINVAL;
+ return;
+ }
+
+ if (!s->canned_acl.empty() || s->has_acl_header) {
+ op_ret = get_policy_from_state(store, s, ss);
+ if (op_ret < 0)
+ return;
+
+ data.clear();
+ data.append(ss.str());
+ }
+
+ if (!parser.parse(data.c_str(), data.length(), 1)) {
+ op_ret = -EINVAL;
+ return;
+ }
+ policy = static_cast<RGWAccessControlPolicy_S3 *>(parser.find_first("AccessControlPolicy"));
+ if (!policy) {
+ op_ret = -EINVAL;
+ return;
+ }
+
+ const RGWAccessControlList& req_acl = policy->get_acl();
+ const multimap<string, ACLGrant>& req_grant_map = req_acl.get_grant_map();
+#define ACL_GRANTS_MAX_NUM 100
+ int max_num = s->cct->_conf->rgw_acl_grants_max_num;
+ if (max_num < 0) {
+ max_num = ACL_GRANTS_MAX_NUM;
+ }
+
+ int grants_num = req_grant_map.size();
+ if (grants_num > max_num) {
+ ldpp_dout(this, 4) << "An acl can have up to " << max_num
+ << " grants, request acl grants num: " << grants_num << dendl;
+ op_ret = -ERR_MALFORMED_ACL_ERROR;
+ s->err.message = "The request is rejected, because the acl grants number you requested is larger than the maximum "
+ + std::to_string(max_num)
+ + " grants allowed in an acl.";
+ return;
+ }
+
+ // forward bucket acl requests to meta master zone
+ if (s->object.empty() && !store->svc.zone->is_meta_master()) {
+ bufferlist in_data;
+ // include acl data unless it was generated from a canned_acl
+ if (s->canned_acl.empty()) {
+ in_data.append(data);
+ }
+ op_ret = forward_request_to_master(s, NULL, store, in_data, NULL);
+ if (op_ret < 0) {
+ ldpp_dout(this, 0) << "forward_request_to_master returned ret=" << op_ret << dendl;
+ return;
+ }
+ }
+
+ if (s->cct->_conf->subsys.should_gather<ceph_subsys_rgw, 15>()) {
+ ldpp_dout(this, 15) << "Old AccessControlPolicy";
+ policy->to_xml(*_dout);
+ *_dout << dendl;
+ }
+
+ op_ret = policy->rebuild(store, &owner, new_policy);
+ if (op_ret < 0)
+ return;
+
+ if (s->cct->_conf->subsys.should_gather<ceph_subsys_rgw, 15>()) {
+ ldpp_dout(this, 15) << "New AccessControlPolicy:";
+ new_policy.to_xml(*_dout);
+ *_dout << dendl;
+ }
+
+ new_policy.encode(bl);
+ map<string, bufferlist> attrs;
+
+ if (!s->object.empty()) {
+ obj = rgw_obj(s->bucket, s->object);
+ store->set_atomic(s->obj_ctx, obj);
+ //if instance is empty, we should modify the latest object
+ op_ret = modify_obj_attr(store, s, obj, RGW_ATTR_ACL, bl);
+ } else {
+ attrs = s->bucket_attrs;
+ attrs[RGW_ATTR_ACL] = bl;
+ op_ret = rgw_bucket_set_attrs(store, s->bucket_info, attrs, &s->bucket_info.objv_tracker);
+ }
+ if (op_ret == -ECANCELED) {
+ op_ret = 0; /* lost a race, but it's ok because acls are immutable */
+ }
+}
+
+void RGWPutLC::execute()
+{
+ bufferlist bl;
+
+ RGWLifecycleConfiguration_S3 config(s->cct);
+ RGWXMLParser parser;
+ RGWLifecycleConfiguration_S3 new_config(s->cct);
+
+ content_md5 = s->info.env->get("HTTP_CONTENT_MD5");
+ if (content_md5 == nullptr) {
+ op_ret = -ERR_INVALID_REQUEST;
+ s->err.message = "Missing required header for this request: Content-MD5";
+ ldpp_dout(this, 5) << s->err.message << dendl;
+ return;
+ }
+
+ std::string content_md5_bin;
+ try {
+ content_md5_bin = rgw::from_base64(boost::string_view(content_md5));
+ } catch (...) {
+ s->err.message = "Request header Content-MD5 contains character "
+ "that is not base64 encoded.";
+ ldpp_dout(this, 5) << s->err.message << dendl;
+ op_ret = -ERR_BAD_DIGEST;
+ return;
+ }
+
+ if (!parser.init()) {
+ op_ret = -EINVAL;
+ return;
+ }
+
+ op_ret = get_params();
+ if (op_ret < 0)
+ return;
+
+ char* buf = data.c_str();
+ ldpp_dout(this, 15) << "read len=" << data.length() << " data=" << (buf ? buf : "") << dendl;
+
+ MD5 data_hash;
+ unsigned char data_hash_res[CEPH_CRYPTO_MD5_DIGESTSIZE];
+ data_hash.Update(reinterpret_cast<const unsigned char*>(buf), data.length());
+ data_hash.Final(data_hash_res);
+
+ if (memcmp(data_hash_res, content_md5_bin.c_str(), CEPH_CRYPTO_MD5_DIGESTSIZE) != 0) {
+ op_ret = -ERR_BAD_DIGEST;
+ s->err.message = "The Content-MD5 you specified did not match what we received.";
+ ldpp_dout(this, 5) << s->err.message
+ << " Specified content md5: " << content_md5
+ << ", calculated content md5: " << data_hash_res
+ << dendl;
+ return;
+ }
+
+ if (!parser.parse(buf, data.length(), 1)) {
+ op_ret = -ERR_MALFORMED_XML;
+ return;
+ }
+
+ try {
+ RGWXMLDecoder::decode_xml("LifecycleConfiguration", config, &parser);
+ } catch (RGWXMLDecoder::err& err) {
+ ldpp_dout(this, 5) << "Bad lifecycle configuration: " << err << dendl;
+ op_ret = -ERR_MALFORMED_XML;
+ return;
+ }
+
+ op_ret = config.rebuild(store, new_config);
+ if (op_ret < 0)
+ return;
+
+ if (s->cct->_conf->subsys.should_gather<ceph_subsys_rgw, 15>()) {
+ XMLFormatter xf;
+ new_config.dump_xml(&xf);
+ stringstream ss;
+ xf.flush(ss);
+ ldpp_dout(this, 15) << "New LifecycleConfiguration:" << ss.str() << dendl;
+ }
+
+ if (!store->svc.zone->is_meta_master()) {
+ op_ret = forward_request_to_master(s, nullptr, store, data, nullptr);
+ if (op_ret < 0) {
+ ldpp_dout(this, 0) << "forward_request_to_master returned ret=" << op_ret << dendl;
+ return;
+ }
+ }
+
+ op_ret = store->get_lc()->set_bucket_config(s->bucket_info, s->bucket_attrs, &new_config);
+ if (op_ret < 0) {
+ return;
+ }
+ return;
+}
+
+void RGWDeleteLC::execute()
+{
+ if (!store->svc.zone->is_meta_master()) {
+ bufferlist data;
+ op_ret = forward_request_to_master(s, nullptr, store, data, nullptr);
+ if (op_ret < 0) {
+ ldpp_dout(this, 0) << "forward_request_to_master returned ret=" << op_ret << dendl;
+ return;
+ }
+ }
+
+ op_ret = store->get_lc()->remove_bucket_config(s->bucket_info, s->bucket_attrs);
+ if (op_ret < 0) {
+ return;
+ }
+ return;
+}
+
+int RGWGetCORS::verify_permission()
+{
+ return verify_bucket_owner_or_policy(s, rgw::IAM::s3GetBucketCORS);
+}
+
+void RGWGetCORS::execute()
+{
+ op_ret = read_bucket_cors();
+ if (op_ret < 0)
+ return ;
+
+ if (!cors_exist) {
+ ldpp_dout(this, 2) << "No CORS configuration set yet for this bucket" << dendl;
+ op_ret = -ERR_NO_CORS_FOUND;
+ return;
+ }
+}
+
+int RGWPutCORS::verify_permission()
+{
+ return verify_bucket_owner_or_policy(s, rgw::IAM::s3PutBucketCORS);
+}
+
+void RGWPutCORS::execute()
+{
+ rgw_raw_obj obj;
+
+ op_ret = get_params();
+ if (op_ret < 0)
+ return;
+
+ if (!store->svc.zone->is_meta_master()) {
+ op_ret = forward_request_to_master(s, NULL, store, in_data, nullptr);
+ if (op_ret < 0) {
+ ldpp_dout(this, 0) << "forward_request_to_master returned ret=" << op_ret << dendl;
+ return;
+ }
+ }
+
+ op_ret = retry_raced_bucket_write(store, s, [this] {
+ map<string, bufferlist> attrs = s->bucket_attrs;
+ attrs[RGW_ATTR_CORS] = cors_bl;
+ return rgw_bucket_set_attrs(store, s->bucket_info, attrs, &s->bucket_info.objv_tracker);
+ });
+}
+
+int RGWDeleteCORS::verify_permission()
+{
+ // No separate delete permission
+ return verify_bucket_owner_or_policy(s, rgw::IAM::s3PutBucketCORS);
+}
+
+void RGWDeleteCORS::execute()
+{
+ if (!store->svc.zone->is_meta_master()) {
+ bufferlist data;
+ op_ret = forward_request_to_master(s, nullptr, store, data, nullptr);
+ if (op_ret < 0) {
+ ldpp_dout(this, 0) << "forward_request_to_master returned ret=" << op_ret << dendl;
+ return;
+ }
+ }
+
+ op_ret = retry_raced_bucket_write(store, s, [this] {
+ op_ret = read_bucket_cors();
+ if (op_ret < 0)
+ return op_ret;
+
+ if (!cors_exist) {
+ ldpp_dout(this, 2) << "No CORS configuration set yet for this bucket" << dendl;
+ op_ret = -ENOENT;
+ return op_ret;
+ }
+
+ map<string, bufferlist> attrs = s->bucket_attrs;
+ attrs.erase(RGW_ATTR_CORS);
+ op_ret = rgw_bucket_set_attrs(store, s->bucket_info, attrs,
+ &s->bucket_info.objv_tracker);
+ if (op_ret < 0) {
+ ldpp_dout(this, 0) << "RGWLC::RGWDeleteCORS() failed to set attrs on bucket=" << s->bucket.name
+ << " returned err=" << op_ret << dendl;
+ }
+ return op_ret;
+ });
+}
+
+void RGWOptionsCORS::get_response_params(string& hdrs, string& exp_hdrs, unsigned *max_age) {
+ get_cors_response_headers(rule, req_hdrs, hdrs, exp_hdrs, max_age);
+}
+
+int RGWOptionsCORS::validate_cors_request(RGWCORSConfiguration *cc) {
+ rule = cc->host_name_rule(origin);
+ if (!rule) {
+ ldpp_dout(this, 10) << "There is no cors rule present for " << origin << dendl;
+ return -ENOENT;
+ }
+
+ if (!validate_cors_rule_method(rule, req_meth)) {
+ return -ENOENT;
+ }
+
+ if (!validate_cors_rule_header(rule, req_hdrs)) {
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+void RGWOptionsCORS::execute()
+{
+ op_ret = read_bucket_cors();
+ if (op_ret < 0)
+ return;
+
+ origin = s->info.env->get("HTTP_ORIGIN");
+ if (!origin) {
+ ldpp_dout(this, 0) << "Missing mandatory Origin header" << dendl;
+ op_ret = -EINVAL;
+ return;
+ }
+ req_meth = s->info.env->get("HTTP_ACCESS_CONTROL_REQUEST_METHOD");
+ if (!req_meth) {
+ ldpp_dout(this, 0) << "Missing mandatory Access-control-request-method header" << dendl;
+ op_ret = -EINVAL;
+ return;
+ }
+ if (!cors_exist) {
+ ldpp_dout(this, 2) << "No CORS configuration set yet for this bucket" << dendl;
+ op_ret = -ENOENT;
+ return;
+ }
+ req_hdrs = s->info.env->get("HTTP_ACCESS_CONTROL_REQUEST_HEADERS");
+ op_ret = validate_cors_request(&bucket_cors);
+ if (!rule) {
+ origin = req_meth = NULL;
+ return;
+ }
+ return;
+}
+
+int RGWGetRequestPayment::verify_permission()
+{
+ return verify_bucket_owner_or_policy(s, rgw::IAM::s3GetBucketRequestPayment);
+}
+
+void RGWGetRequestPayment::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+void RGWGetRequestPayment::execute()
+{
+ requester_pays = s->bucket_info.requester_pays;
+}
+
+int RGWSetRequestPayment::verify_permission()
+{
+ return verify_bucket_owner_or_policy(s, rgw::IAM::s3PutBucketRequestPayment);
+}
+
+void RGWSetRequestPayment::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+void RGWSetRequestPayment::execute()
+{
+
+ if (!store->svc.zone->is_meta_master()) {
+ op_ret = forward_request_to_master(s, nullptr, store, in_data, nullptr);
+ if (op_ret < 0) {
+ ldpp_dout(this, 0) << "forward_request_to_master returned ret=" << op_ret << dendl;
+ return;
+ }
+ }
+
+ op_ret = get_params();
+
+ if (op_ret < 0)
+ return;
+
+ s->bucket_info.requester_pays = requester_pays;
+ op_ret = store->put_bucket_instance_info(s->bucket_info, false, real_time(),
+ &s->bucket_attrs);
+ if (op_ret < 0) {
+ ldpp_dout(this, 0) << "NOTICE: put_bucket_info on bucket=" << s->bucket.name
+ << " returned err=" << op_ret << dendl;
+ return;
+ }
+}
+
+int RGWInitMultipart::verify_permission()
+{
+ if (s->iam_policy || ! s->iam_user_policies.empty()) {
+ auto usr_policy_res = eval_user_policies(s->iam_user_policies, s->env,
+ boost::none,
+ rgw::IAM::s3PutObject,
+ rgw_obj(s->bucket, s->object));
+ if (usr_policy_res == Effect::Deny) {
+ return -EACCES;
+ }
+
+ rgw::IAM::Effect e = Effect::Pass;
+ if (s->iam_policy) {
+ e = s->iam_policy->eval(s->env, *s->auth.identity,
+ rgw::IAM::s3PutObject,
+ rgw_obj(s->bucket, s->object));
+ }
+ if (e == Effect::Allow) {
+ return 0;
+ } else if (e == Effect::Deny) {
+ return -EACCES;
+ } else if (usr_policy_res == Effect::Allow) {
+ return 0;
+ }
+ }
+
+ if (!verify_bucket_permission_no_policy(this, s, RGW_PERM_WRITE)) {
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+void RGWInitMultipart::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+void RGWInitMultipart::execute()
+{
+ bufferlist aclbl;
+ map<string, bufferlist> attrs;
+ rgw_obj obj;
+
+ if (get_params() < 0)
+ return;
+
+ if (s->object.empty())
+ return;
+
+ policy.encode(aclbl);
+ attrs[RGW_ATTR_ACL] = aclbl;
+
+ populate_with_generic_attrs(s, attrs);
+
+ /* select encryption mode */
+ op_ret = prepare_encryption(attrs);
+ if (op_ret != 0)
+ return;
+
+ op_ret = rgw_get_request_metadata(s->cct, s->info, attrs);
+ if (op_ret < 0) {
+ return;
+ }
+
+ do {
+ char buf[33];
+ gen_rand_alphanumeric(s->cct, buf, sizeof(buf) - 1);
+ upload_id = MULTIPART_UPLOAD_ID_PREFIX; /* v2 upload id */
+ upload_id.append(buf);
+
+ string tmp_obj_name;
+ RGWMPObj mp(s->object.name, upload_id);
+ tmp_obj_name = mp.get_meta();
+
+ obj.init_ns(s->bucket, tmp_obj_name, mp_ns);
+ // the meta object will be indexed with 0 size, we c
+ obj.set_in_extra_data(true);
+ obj.index_hash_source = s->object.name;
+
+ RGWRados::Object op_target(store, s->bucket_info, *static_cast<RGWObjectCtx *>(s->obj_ctx), obj);
+ op_target.set_versioning_disabled(true); /* no versioning for multipart meta */
+
+ RGWRados::Object::Write obj_op(&op_target);
+
+ obj_op.meta.owner = s->owner.get_id();
+ obj_op.meta.category = RGWObjCategory::MultiMeta;
+ obj_op.meta.flags = PUT_OBJ_CREATE_EXCL;
+
+ multipart_upload_info upload_info;
+ upload_info.dest_placement = s->dest_placement;
+
+ bufferlist bl;
+ encode(upload_info, bl);
+ obj_op.meta.data = &bl;
+
+ op_ret = obj_op.write_meta(bl.length(), 0, attrs);
+ } while (op_ret == -EEXIST);
+
+ const auto ret = rgw::notify::publish(s, s->object, s->obj_size, ceph::real_clock::now(), attrs[RGW_ATTR_ETAG].to_str(), rgw::notify::ObjectCreatedPost, store);
+ if (ret < 0) {
+ ldpp_dout(this, 5) << "WARNING: publishing notification failed, with error: " << ret << dendl;
+ // TODO: we should have conf to make send a blocking coroutine and reply with error in case sending failed
+ // this should be global conf (probably returnign a different handler)
+ // so we don't need to read the configured values before we perform it
+ }
+}
+
+int RGWCompleteMultipart::verify_permission()
+{
+ if (s->iam_policy || ! s->iam_user_policies.empty()) {
+ auto usr_policy_res = eval_user_policies(s->iam_user_policies, s->env,
+ boost::none,
+ rgw::IAM::s3PutObject,
+ rgw_obj(s->bucket, s->object));
+ if (usr_policy_res == Effect::Deny) {
+ return -EACCES;
+ }
+
+ rgw::IAM::Effect e = Effect::Pass;
+ if (s->iam_policy) {
+ e = s->iam_policy->eval(s->env, *s->auth.identity,
+ rgw::IAM::s3PutObject,
+ rgw_obj(s->bucket, s->object));
+ }
+ if (e == Effect::Allow) {
+ return 0;
+ } else if (e == Effect::Deny) {
+ return -EACCES;
+ } else if (usr_policy_res == Effect::Allow) {
+ return 0;
+ }
+ }
+
+ if (!verify_bucket_permission_no_policy(this, s, RGW_PERM_WRITE)) {
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+void RGWCompleteMultipart::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+void RGWCompleteMultipart::execute()
+{
+ RGWMultiCompleteUpload *parts;
+ map<int, string>::iterator iter;
+ RGWMultiXMLParser parser;
+ string meta_oid;
+ map<uint32_t, RGWUploadPartInfo> obj_parts;
+ map<uint32_t, RGWUploadPartInfo>::iterator obj_iter;
+ map<string, bufferlist> attrs;
+ off_t ofs = 0;
+ MD5 hash;
+ char final_etag[CEPH_CRYPTO_MD5_DIGESTSIZE];
+ char final_etag_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 16];
+ bufferlist etag_bl;
+ rgw_obj meta_obj;
+ rgw_obj target_obj;
+ RGWMPObj mp;
+ RGWObjManifest manifest;
+ uint64_t olh_epoch = 0;
+
+ op_ret = get_params();
+ if (op_ret < 0)
+ return;
+ op_ret = get_system_versioning_params(s, &olh_epoch, &version_id);
+ if (op_ret < 0) {
+ return;
+ }
+
+ if (!data.length()) {
+ op_ret = -ERR_MALFORMED_XML;
+ return;
+ }
+
+ if (!parser.init()) {
+ op_ret = -EIO;
+ return;
+ }
+
+ if (!parser.parse(data.c_str(), data.length(), 1)) {
+ op_ret = -ERR_MALFORMED_XML;
+ return;
+ }
+
+ parts = static_cast<RGWMultiCompleteUpload *>(parser.find_first("CompleteMultipartUpload"));
+ if (!parts || parts->parts.empty()) {
+ op_ret = -ERR_MALFORMED_XML;
+ return;
+ }
+
+ if ((int)parts->parts.size() >
+ s->cct->_conf->rgw_multipart_part_upload_limit) {
+ op_ret = -ERANGE;
+ return;
+ }
+
+ mp.init(s->object.name, upload_id);
+ meta_oid = mp.get_meta();
+
+ int total_parts = 0;
+ int handled_parts = 0;
+ int max_parts = 1000;
+ int marker = 0;
+ bool truncated;
+ RGWCompressionInfo cs_info;
+ bool compressed = false;
+ uint64_t accounted_size = 0;
+
+ uint64_t min_part_size = s->cct->_conf->rgw_multipart_min_part_size;
+
+ list<rgw_obj_index_key> remove_objs; /* objects to be removed from index listing */
+
+ bool versioned_object = s->bucket_info.versioning_enabled();
+
+ iter = parts->parts.begin();
+
+ meta_obj.init_ns(s->bucket, meta_oid, mp_ns);
+ meta_obj.set_in_extra_data(true);
+ meta_obj.index_hash_source = s->object.name;
+
+ /*take a cls lock on meta_obj to prevent racing completions (or retries)
+ from deleting the parts*/
+ rgw_pool meta_pool;
+ rgw_raw_obj raw_obj;
+ int max_lock_secs_mp =
+ s->cct->_conf.get_val<int64_t>("rgw_mp_lock_max_time");
+ utime_t dur(max_lock_secs_mp, 0);
+
+ store->obj_to_raw((s->bucket_info).placement_rule, meta_obj, &raw_obj);
+ store->get_obj_data_pool((s->bucket_info).placement_rule,
+ meta_obj,&meta_pool);
+ store->open_pool_ctx(meta_pool, serializer.ioctx, true);
+
+ op_ret = serializer.try_lock(raw_obj.oid, dur);
+ if (op_ret < 0) {
+ ldpp_dout(this, 0) << "failed to acquire lock" << dendl;
+ op_ret = -ERR_INTERNAL_ERROR;
+ s->err.message = "This multipart completion is already in progress";
+ return;
+ }
+
+ op_ret = get_obj_attrs(store, s, meta_obj, attrs);
+
+ if (op_ret < 0) {
+ ldpp_dout(this, 0) << "ERROR: failed to get obj attrs, obj=" << meta_obj
+ << " ret=" << op_ret << dendl;
+ return;
+ }
+
+ do {
+ op_ret = list_multipart_parts(store, s, upload_id, meta_oid, max_parts,
+ marker, obj_parts, &marker, &truncated);
+ if (op_ret == -ENOENT) {
+ op_ret = -ERR_NO_SUCH_UPLOAD;
+ }
+ if (op_ret < 0)
+ return;
+
+ total_parts += obj_parts.size();
+ if (!truncated && total_parts != (int)parts->parts.size()) {
+ ldpp_dout(this, 0) << "NOTICE: total parts mismatch: have: " << total_parts
+ << " expected: " << parts->parts.size() << dendl;
+ op_ret = -ERR_INVALID_PART;
+ return;
+ }
+
+ for (obj_iter = obj_parts.begin(); iter != parts->parts.end() && obj_iter != obj_parts.end(); ++iter, ++obj_iter, ++handled_parts) {
+ uint64_t part_size = obj_iter->second.accounted_size;
+ if (handled_parts < (int)parts->parts.size() - 1 &&
+ part_size < min_part_size) {
+ op_ret = -ERR_TOO_SMALL;
+ return;
+ }
+
+ char petag[CEPH_CRYPTO_MD5_DIGESTSIZE];
+ if (iter->first != (int)obj_iter->first) {
+ ldpp_dout(this, 0) << "NOTICE: parts num mismatch: next requested: "
+ << iter->first << " next uploaded: "
+ << obj_iter->first << dendl;
+ op_ret = -ERR_INVALID_PART;
+ return;
+ }
+ string part_etag = rgw_string_unquote(iter->second);
+ if (part_etag.compare(obj_iter->second.etag) != 0) {
+ ldpp_dout(this, 0) << "NOTICE: etag mismatch: part: " << iter->first
+ << " etag: " << iter->second << dendl;
+ op_ret = -ERR_INVALID_PART;
+ return;
+ }
+
+ hex_to_buf(obj_iter->second.etag.c_str(), petag,
+ CEPH_CRYPTO_MD5_DIGESTSIZE);
+ hash.Update((const unsigned char *)petag, sizeof(petag));
+
+ RGWUploadPartInfo& obj_part = obj_iter->second;
+
+ /* update manifest for part */
+ string oid = mp.get_part(obj_iter->second.num);
+ rgw_obj src_obj;
+ src_obj.init_ns(s->bucket, oid, mp_ns);
+
+ if (obj_part.manifest.empty()) {
+ ldpp_dout(this, 0) << "ERROR: empty manifest for object part: obj="
+ << src_obj << dendl;
+ op_ret = -ERR_INVALID_PART;
+ return;
+ } else {
+ manifest.append(obj_part.manifest, store->svc.zone);
+ }
+
+ bool part_compressed = (obj_part.cs_info.compression_type != "none");
+ if ((obj_iter != obj_parts.begin()) &&
+ ((part_compressed != compressed) ||
+ (cs_info.compression_type != obj_part.cs_info.compression_type))) {
+ ldpp_dout(this, 0) << "ERROR: compression type was changed during multipart upload ("
+ << cs_info.compression_type << ">>" << obj_part.cs_info.compression_type << ")" << dendl;
+ op_ret = -ERR_INVALID_PART;
+ return;
+ }
+
+ if (part_compressed) {
+ int64_t new_ofs; // offset in compression data for new part
+ if (cs_info.blocks.size() > 0)
+ new_ofs = cs_info.blocks.back().new_ofs + cs_info.blocks.back().len;
+ else
+ new_ofs = 0;
+ for (const auto& block : obj_part.cs_info.blocks) {
+ compression_block cb;
+ cb.old_ofs = block.old_ofs + cs_info.orig_size;
+ cb.new_ofs = new_ofs;
+ cb.len = block.len;
+ cs_info.blocks.push_back(cb);
+ new_ofs = cb.new_ofs + cb.len;
+ }
+ if (!compressed)
+ cs_info.compression_type = obj_part.cs_info.compression_type;
+ cs_info.orig_size += obj_part.cs_info.orig_size;
+ compressed = true;
+ }
+
+ rgw_obj_index_key remove_key;
+ src_obj.key.get_index_key(&remove_key);
+
+ remove_objs.push_back(remove_key);
+
+ ofs += obj_part.size;
+ accounted_size += obj_part.accounted_size;
+ }
+ } while (truncated);
+ hash.Final((unsigned char *)final_etag);
+
+ buf_to_hex((unsigned char *)final_etag, sizeof(final_etag), final_etag_str);
+ snprintf(&final_etag_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2], sizeof(final_etag_str) - CEPH_CRYPTO_MD5_DIGESTSIZE * 2,
+ "-%lld", (long long)parts->parts.size());
+ etag = final_etag_str;
+ ldpp_dout(this, 10) << "calculated etag: " << final_etag_str << dendl;
+
+ etag_bl.append(final_etag_str, strlen(final_etag_str));
+
+ attrs[RGW_ATTR_ETAG] = etag_bl;
+
+ if (compressed) {
+ // write compression attribute to full object
+ bufferlist tmp;
+ encode(cs_info, tmp);
+ attrs[RGW_ATTR_COMPRESSION] = tmp;
+ }
+
+ target_obj.init(s->bucket, s->object.name);
+ if (versioned_object) {
+ if (!version_id.empty()) {
+ target_obj.key.set_instance(version_id);
+ } else {
+ store->gen_rand_obj_instance_name(&target_obj);
+ version_id = target_obj.key.get_instance();
+ }
+ }
+
+ RGWObjectCtx& obj_ctx = *static_cast<RGWObjectCtx *>(s->obj_ctx);
+
+ obj_ctx.set_atomic(target_obj);
+
+ RGWRados::Object op_target(store, s->bucket_info, *static_cast<RGWObjectCtx *>(s->obj_ctx), target_obj);
+ RGWRados::Object::Write obj_op(&op_target);
+
+ obj_op.meta.manifest = &manifest;
+ obj_op.meta.remove_objs = &remove_objs;
+
+ obj_op.meta.ptag = &s->req_id; /* use req_id as operation tag */
+ obj_op.meta.owner = s->owner.get_id();
+ obj_op.meta.flags = PUT_OBJ_CREATE;
+ obj_op.meta.modify_tail = true;
+ obj_op.meta.completeMultipart = true;
+ obj_op.meta.olh_epoch = olh_epoch;
+ op_ret = obj_op.write_meta(ofs, accounted_size, attrs);
+ if (op_ret < 0)
+ return;
+
+ // remove the upload obj
+ int r = store->delete_obj(*static_cast<RGWObjectCtx *>(s->obj_ctx),
+ s->bucket_info, meta_obj, 0);
+ if (r >= 0) {
+ /* serializer's exclusive lock is released */
+ serializer.clear_locked();
+ } else {
+ ldpp_dout(this, 0) << "WARNING: failed to remove object " << meta_obj << dendl;
+ }
+
+ const auto ret = rgw::notify::publish(s, s->object, s->obj_size, ceph::real_clock::now(), etag, rgw::notify::ObjectCreatedCompleteMultipartUpload, store);
+ if (ret < 0) {
+ ldpp_dout(this, 5) << "WARNING: publishing notification failed, with error: " << ret << dendl;
+ // TODO: we should have conf to make send a blocking coroutine and reply with error in case sending failed
+ // this should be global conf (probably returnign a different handler)
+ // so we don't need to read the configured values before we perform it
+ }
+}
+
+int RGWCompleteMultipart::MPSerializer::try_lock(
+ const std::string& _oid,
+ utime_t dur)
+{
+ oid = _oid;
+ op.assert_exists();
+ lock.set_duration(dur);
+ lock.lock_exclusive(&op);
+ int ret = ioctx.operate(oid, &op);
+ if (! ret) {
+ locked = true;
+ }
+ return ret;
+}
+
+void RGWCompleteMultipart::complete()
+{
+ /* release exclusive lock iff not already */
+ if (unlikely(serializer.locked)) {
+ int r = serializer.unlock();
+ if (r < 0) {
+ ldpp_dout(this, 0) << "WARNING: failed to unlock " << serializer.oid << dendl;
+ }
+ }
+ send_response();
+}
+
+int RGWAbortMultipart::verify_permission()
+{
+ if (s->iam_policy || ! s->iam_user_policies.empty()) {
+ auto usr_policy_res = eval_user_policies(s->iam_user_policies, s->env,
+ boost::none,
+ rgw::IAM::s3AbortMultipartUpload,
+ rgw_obj(s->bucket, s->object));
+ if (usr_policy_res == Effect::Deny) {
+ return -EACCES;
+ }
+
+ rgw::IAM::Effect e = Effect::Pass;
+ if (s->iam_policy) {
+ e = s->iam_policy->eval(s->env, *s->auth.identity,
+ rgw::IAM::s3AbortMultipartUpload,
+ rgw_obj(s->bucket, s->object));
+ }
+ if (e == Effect::Allow) {
+ return 0;
+ } else if (e == Effect::Deny) {
+ return -EACCES;
+ } else if (usr_policy_res == Effect::Allow)
+ return 0;
+ }
+
+ if (!verify_bucket_permission_no_policy(this, s, RGW_PERM_WRITE)) {
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+void RGWAbortMultipart::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+void RGWAbortMultipart::execute()
+{
+ op_ret = -EINVAL;
+ string upload_id;
+ string meta_oid;
+ upload_id = s->info.args.get("uploadId");
+ rgw_obj meta_obj;
+ RGWMPObj mp;
+
+ if (upload_id.empty() || s->object.empty())
+ return;
+
+ mp.init(s->object.name, upload_id);
+ meta_oid = mp.get_meta();
+
+ op_ret = get_multipart_info(store, s, meta_oid, nullptr, nullptr, nullptr);
+ if (op_ret < 0)
+ return;
+
+ RGWObjectCtx *obj_ctx = static_cast<RGWObjectCtx *>(s->obj_ctx);
+ op_ret = abort_multipart_upload(store, s->cct, obj_ctx, s->bucket_info, mp);
+}
+
+int RGWListMultipart::verify_permission()
+{
+ if (!verify_object_permission(this, s, rgw::IAM::s3ListMultipartUploadParts))
+ return -EACCES;
+
+ return 0;
+}
+
+void RGWListMultipart::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+void RGWListMultipart::execute()
+{
+ string meta_oid;
+ RGWMPObj mp;
+
+ op_ret = get_params();
+ if (op_ret < 0)
+ return;
+
+ mp.init(s->object.name, upload_id);
+ meta_oid = mp.get_meta();
+
+ op_ret = get_multipart_info(store, s, meta_oid, &policy, nullptr, nullptr);
+ if (op_ret < 0)
+ return;
+
+ op_ret = list_multipart_parts(store, s, upload_id, meta_oid, max_parts,
+ marker, parts, NULL, &truncated);
+}
+
+int RGWListBucketMultiparts::verify_permission()
+{
+ if (!verify_bucket_permission(this,
+ s,
+ rgw::IAM::s3ListBucketMultipartUploads))
+ return -EACCES;
+
+ return 0;
+}
+
+void RGWListBucketMultiparts::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+void RGWListBucketMultiparts::execute()
+{
+ vector<rgw_bucket_dir_entry> objs;
+ string marker_meta;
+
+ op_ret = get_params();
+ if (op_ret < 0)
+ return;
+
+ if (s->prot_flags & RGW_REST_SWIFT) {
+ string path_args;
+ path_args = s->info.args.get("path");
+ if (!path_args.empty()) {
+ if (!delimiter.empty() || !prefix.empty()) {
+ op_ret = -EINVAL;
+ return;
+ }
+ prefix = path_args;
+ delimiter="/";
+ }
+ }
+ marker_meta = marker.get_meta();
+
+ op_ret = list_bucket_multiparts(store, s->bucket_info, prefix, marker_meta, delimiter,
+ max_uploads, &objs, &common_prefixes, &is_truncated);
+ if (op_ret < 0) {
+ return;
+ }
+
+ if (!objs.empty()) {
+ vector<rgw_bucket_dir_entry>::iterator iter;
+ RGWMultipartUploadEntry entry;
+ for (iter = objs.begin(); iter != objs.end(); ++iter) {
+ rgw_obj_key key(iter->key);
+ if (!entry.mp.from_meta(key.name))
+ continue;
+ entry.obj = *iter;
+ uploads.push_back(entry);
+ }
+ next_marker = entry;
+ }
+}
+
+void RGWGetHealthCheck::execute()
+{
+ if (!g_conf()->rgw_healthcheck_disabling_path.empty() &&
+ (::access(g_conf()->rgw_healthcheck_disabling_path.c_str(), F_OK) == 0)) {
+ /* Disabling path specified & existent in the filesystem. */
+ op_ret = -ERR_SERVICE_UNAVAILABLE; /* 503 */
+ } else {
+ op_ret = 0; /* 200 OK */
+ }
+}
+
+int RGWDeleteMultiObj::verify_permission()
+{
+ if (s->iam_policy || ! s->iam_user_policies.empty()) {
+ auto usr_policy_res = eval_user_policies(s->iam_user_policies, s->env,
+ boost::none,
+ s->object.instance.empty() ?
+ rgw::IAM::s3DeleteObject :
+ rgw::IAM::s3DeleteObjectVersion,
+ ARN(s->bucket));
+ if (usr_policy_res == Effect::Deny) {
+ return -EACCES;
+ }
+
+ rgw::IAM::Effect r = Effect::Pass;
+ if (s->iam_policy) {
+ r = s->iam_policy->eval(s->env, *s->auth.identity,
+ s->object.instance.empty() ?
+ rgw::IAM::s3DeleteObject :
+ rgw::IAM::s3DeleteObjectVersion,
+ ARN(s->bucket));
+ }
+ if (r == Effect::Allow)
+ return 0;
+ else if (r == Effect::Deny)
+ return -EACCES;
+ else if (usr_policy_res == Effect::Allow)
+ return 0;
+ }
+
+ acl_allowed = verify_bucket_permission_no_policy(this, s, RGW_PERM_WRITE);
+ if (!acl_allowed)
+ return -EACCES;
+
+ return 0;
+}
+
+void RGWDeleteMultiObj::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+void RGWDeleteMultiObj::execute()
+{
+ RGWMultiDelDelete *multi_delete;
+ vector<rgw_obj_key>::iterator iter;
+ RGWMultiDelXMLParser parser;
+ RGWObjectCtx *obj_ctx = static_cast<RGWObjectCtx *>(s->obj_ctx);
+ char* buf;
+
+ op_ret = get_params();
+ if (op_ret < 0) {
+ goto error;
+ }
+
+ buf = data.c_str();
+ if (!buf) {
+ op_ret = -EINVAL;
+ goto error;
+ }
+
+ if (!parser.init()) {
+ op_ret = -EINVAL;
+ goto error;
+ }
+
+ if (!parser.parse(buf, data.length(), 1)) {
+ op_ret = -EINVAL;
+ goto error;
+ }
+
+ multi_delete = static_cast<RGWMultiDelDelete *>(parser.find_first("Delete"));
+ if (!multi_delete) {
+ op_ret = -EINVAL;
+ goto error;
+ } else {
+#define DELETE_MULTI_OBJ_MAX_NUM 1000
+ int max_num = s->cct->_conf->rgw_delete_multi_obj_max_num;
+ if (max_num < 0) {
+ max_num = DELETE_MULTI_OBJ_MAX_NUM;
+ }
+ int multi_delete_object_num = multi_delete->objects.size();
+ if (multi_delete_object_num > max_num) {
+ op_ret = -ERR_MALFORMED_XML;
+ goto error;
+ }
+ }
+
+ if (multi_delete->is_quiet())
+ quiet = true;
+
+ if (s->bucket_info.mfa_enabled()) {
+ bool has_versioned = false;
+ for (auto i : multi_delete->objects) {
+ if (!i.instance.empty()) {
+ has_versioned = true;
+ break;
+ }
+ }
+ if (has_versioned && !s->mfa_verified) {
+ ldpp_dout(this, 5) << "NOTICE: multi-object delete request with a versioned object, mfa auth not provided" << dendl;
+ op_ret = -ERR_MFA_REQUIRED;
+ goto error;
+ }
+ }
+
+ begin_response();
+ if (multi_delete->objects.empty()) {
+ goto done;
+ }
+
+ for (iter = multi_delete->objects.begin();
+ iter != multi_delete->objects.end();
+ ++iter) {
+ rgw_obj obj(bucket, *iter);
+ if (s->iam_policy || ! s->iam_user_policies.empty()) {
+ auto usr_policy_res = eval_user_policies(s->iam_user_policies, s->env,
+ boost::none,
+ iter->instance.empty() ?
+ rgw::IAM::s3DeleteObject :
+ rgw::IAM::s3DeleteObjectVersion,
+ ARN(obj));
+ if (usr_policy_res == Effect::Deny) {
+ send_partial_response(*iter, false, "", -EACCES);
+ continue;
+ }
+
+ rgw::IAM::Effect e = Effect::Pass;
+ if (s->iam_policy) {
+ e = s->iam_policy->eval(s->env,
+ *s->auth.identity,
+ iter->instance.empty() ?
+ rgw::IAM::s3DeleteObject :
+ rgw::IAM::s3DeleteObjectVersion,
+ ARN(obj));
+ }
+ if ((e == Effect::Deny) ||
+ (usr_policy_res == Effect::Pass && e == Effect::Pass && !acl_allowed)) {
+ send_partial_response(*iter, false, "", -EACCES);
+ continue;
+ }
+ }
+
+ obj_ctx->set_atomic(obj);
+
+ RGWRados::Object del_target(store, s->bucket_info, *obj_ctx, obj);
+ RGWRados::Object::Delete del_op(&del_target);
+
+ del_op.params.bucket_owner = s->bucket_owner.get_id();
+ del_op.params.versioning_status = s->bucket_info.versioning_status();
+ del_op.params.obj_owner = s->owner;
+
+ op_ret = del_op.delete_obj();
+ if (op_ret == -ENOENT) {
+ op_ret = 0;
+ }
+
+ send_partial_response(*iter, del_op.result.delete_marker,
+ del_op.result.version_id, op_ret);
+
+ const auto obj_state = obj_ctx->get_state(obj);
+ bufferlist etag_bl;
+ const auto etag = obj_state->get_attr(RGW_ATTR_ETAG, etag_bl) ? etag_bl.to_str() : "";
+
+ const auto ret = rgw::notify::publish(s, obj.key, obj_state->size, ceph::real_clock::now(), etag,
+ del_op.result.delete_marker && s->object.instance.empty() ? rgw::notify::ObjectRemovedDeleteMarkerCreated : rgw::notify::ObjectRemovedDelete,
+ store);
+ if (ret < 0) {
+ ldpp_dout(this, 5) << "WARNING: publishing notification failed, with error: " << ret << dendl;
+ // TODO: we should have conf to make send a blocking coroutine and reply with error in case sending failed
+ // this should be global conf (probably returnign a different handler)
+ // so we don't need to read the configured values before we perform it
+ }
+ }
+
+ /* set the return code to zero, errors at this point will be
+ dumped to the response */
+ op_ret = 0;
+
+done:
+ // will likely segfault if begin_response() has not been called
+ end_response();
+ return;
+
+error:
+ send_status();
+ return;
+
+}
+
+bool RGWBulkDelete::Deleter::verify_permission(RGWBucketInfo& binfo,
+ map<string, bufferlist>& battrs,
+ ACLOwner& bucket_owner /* out */)
+{
+ RGWAccessControlPolicy bacl(store->ctx());
+ int ret = read_bucket_policy(store, s, binfo, battrs, &bacl, binfo.bucket);
+ if (ret < 0) {
+ return false;
+ }
+
+ auto policy = get_iam_policy_from_attr(s->cct, store, battrs, binfo.bucket.tenant);
+
+ bucket_owner = bacl.get_owner();
+
+ /* We can use global user_acl because each BulkDelete request is allowed
+ * to work on entities from a single account only. */
+ return verify_bucket_permission(dpp, s, binfo.bucket, s->user_acl.get(),
+ &bacl, policy, s->iam_user_policies, rgw::IAM::s3DeleteBucket);
+}
+
+bool RGWBulkDelete::Deleter::delete_single(const acct_path_t& path)
+{
+ auto& obj_ctx = *static_cast<RGWObjectCtx *>(s->obj_ctx);
+
+ RGWBucketInfo binfo;
+ map<string, bufferlist> battrs;
+ ACLOwner bowner;
+
+ int ret = store->get_bucket_info(*s->sysobj_ctx, s->user->user_id.tenant,
+ path.bucket_name, binfo, nullptr,
+ &battrs);
+ if (ret < 0) {
+ goto binfo_fail;
+ }
+
+ if (!verify_permission(binfo, battrs, bowner)) {
+ ret = -EACCES;
+ goto auth_fail;
+ }
+
+ if (!path.obj_key.empty()) {
+ rgw_obj obj(binfo.bucket, path.obj_key);
+ obj_ctx.set_atomic(obj);
+
+ RGWRados::Object del_target(store, binfo, obj_ctx, obj);
+ RGWRados::Object::Delete del_op(&del_target);
+
+ del_op.params.bucket_owner = binfo.owner;
+ del_op.params.versioning_status = binfo.versioning_status();
+ del_op.params.obj_owner = bowner;
+
+ ret = del_op.delete_obj();
+ if (ret < 0) {
+ goto delop_fail;
+ }
+ } else {
+ RGWObjVersionTracker ot;
+ ot.read_version = binfo.ep_objv;
+
+ ret = store->delete_bucket(binfo, ot);
+ if (0 == ret) {
+ ret = rgw_unlink_bucket(store, binfo.owner, binfo.bucket.tenant,
+ binfo.bucket.name, false);
+ if (ret < 0) {
+ ldpp_dout(s, 0) << "WARNING: failed to unlink bucket: ret=" << ret << dendl;
+ }
+ }
+ if (ret < 0) {
+ goto delop_fail;
+ }
+
+ if (!store->svc.zone->is_meta_master()) {
+ bufferlist in_data;
+ ret = forward_request_to_master(s, &ot.read_version, store, in_data,
+ nullptr);
+ if (ret < 0) {
+ if (ret == -ENOENT) {
+ /* adjust error, we want to return with NoSuchBucket and not
+ * NoSuchKey */
+ ret = -ERR_NO_SUCH_BUCKET;
+ }
+ goto delop_fail;
+ }
+ }
+ }
+
+ num_deleted++;
+ return true;
+
+
+binfo_fail:
+ if (-ENOENT == ret) {
+ ldpp_dout(s, 20) << "cannot find bucket = " << path.bucket_name << dendl;
+ num_unfound++;
+ } else {
+ ldpp_dout(s, 20) << "cannot get bucket info, ret = " << ret << dendl;
+
+ fail_desc_t failed_item = {
+ .err = ret,
+ .path = path
+ };
+ failures.push_back(failed_item);
+ }
+ return false;
+
+auth_fail:
+ ldpp_dout(s, 20) << "wrong auth for " << path << dendl;
+ {
+ fail_desc_t failed_item = {
+ .err = ret,
+ .path = path
+ };
+ failures.push_back(failed_item);
+ }
+ return false;
+
+delop_fail:
+ if (-ENOENT == ret) {
+ ldpp_dout(s, 20) << "cannot find entry " << path << dendl;
+ num_unfound++;
+ } else {
+ fail_desc_t failed_item = {
+ .err = ret,
+ .path = path
+ };
+ failures.push_back(failed_item);
+ }
+ return false;
+}
+
+bool RGWBulkDelete::Deleter::delete_chunk(const std::list<acct_path_t>& paths)
+{
+ ldpp_dout(s, 20) << "in delete_chunk" << dendl;
+ for (auto path : paths) {
+ ldpp_dout(s, 20) << "bulk deleting path: " << path << dendl;
+ delete_single(path);
+ }
+
+ return true;
+}
+
+int RGWBulkDelete::verify_permission()
+{
+ return 0;
+}
+
+void RGWBulkDelete::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+void RGWBulkDelete::execute()
+{
+ deleter = std::unique_ptr<Deleter>(new Deleter(this, store, s));
+
+ bool is_truncated = false;
+ do {
+ list<RGWBulkDelete::acct_path_t> items;
+
+ int ret = get_data(items, &is_truncated);
+ if (ret < 0) {
+ return;
+ }
+
+ ret = deleter->delete_chunk(items);
+ } while (!op_ret && is_truncated);
+
+ return;
+}
+
+
+constexpr std::array<int, 2> RGWBulkUploadOp::terminal_errors;
+
+int RGWBulkUploadOp::verify_permission()
+{
+ if (s->auth.identity->is_anonymous()) {
+ return -EACCES;
+ }
+
+ if (! verify_user_permission_no_policy(this, s, RGW_PERM_WRITE)) {
+ return -EACCES;
+ }
+
+ if (s->user->user_id.tenant != s->bucket_tenant) {
+ ldpp_dout(this, 10) << "user cannot create a bucket in a different tenant"
+ << " (user_id.tenant=" << s->user->user_id.tenant
+ << " requested=" << s->bucket_tenant << ")" << dendl;
+ return -EACCES;
+ }
+
+ if (s->user->max_buckets < 0) {
+ return -EPERM;
+ }
+
+ return 0;
+}
+
+void RGWBulkUploadOp::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+boost::optional<std::pair<std::string, rgw_obj_key>>
+RGWBulkUploadOp::parse_path(const boost::string_ref& path)
+{
+ /* We need to skip all slashes at the beginning in order to preserve
+ * compliance with Swift. */
+ const size_t start_pos = path.find_first_not_of('/');
+
+ if (boost::string_ref::npos != start_pos) {
+ /* Seperator is the first slash after the leading ones. */
+ const size_t sep_pos = path.substr(start_pos).find('/');
+
+ if (boost::string_ref::npos != sep_pos) {
+ const auto bucket_name = path.substr(start_pos, sep_pos - start_pos);
+ const auto obj_name = path.substr(sep_pos + 1);
+
+ return std::make_pair(bucket_name.to_string(),
+ rgw_obj_key(obj_name.to_string()));
+ } else {
+ /* It's guaranteed here that bucket name is at least one character
+ * long and is different than slash. */
+ return std::make_pair(path.substr(start_pos).to_string(),
+ rgw_obj_key());
+ }
+ }
+
+ return none;
+}
+
+std::pair<std::string, std::string>
+RGWBulkUploadOp::handle_upload_path(struct req_state *s)
+{
+ std::string bucket_path, file_prefix;
+ if (! s->init_state.url_bucket.empty()) {
+ file_prefix = bucket_path = s->init_state.url_bucket + "/";
+ if (! s->object.empty()) {
+ std::string& object_name = s->object.name;
+
+ /* As rgw_obj_key::empty() already verified emptiness of s->object.name,
+ * we can safely examine its last element. */
+ if (object_name.back() == '/') {
+ file_prefix.append(object_name);
+ } else {
+ file_prefix.append(object_name).append("/");
+ }
+ }
+ }
+ return std::make_pair(bucket_path, file_prefix);
+}
+
+int RGWBulkUploadOp::handle_dir_verify_permission()
+{
+ if (s->user->max_buckets > 0) {
+ RGWUserBuckets buckets;
+ std::string marker;
+ bool is_truncated = false;
+ op_ret = rgw_read_user_buckets(store, s->user->user_id, buckets,
+ marker, std::string(), s->user->max_buckets,
+ false, &is_truncated);
+ if (op_ret < 0) {
+ return op_ret;
+ }
+
+ if (buckets.count() >= static_cast<size_t>(s->user->max_buckets)) {
+ return -ERR_TOO_MANY_BUCKETS;
+ }
+ }
+
+ return 0;
+}
+
+static void forward_req_info(CephContext *cct, req_info& info, const std::string& bucket_name)
+{
+ /* the request of container or object level will contain bucket name.
+ * only at account level need to append the bucket name */
+ if (info.script_uri.find(bucket_name) != std::string::npos) {
+ return;
+ }
+
+ ldout(cct, 20) << "append the bucket: "<< bucket_name << " to req_info" << dendl;
+ info.script_uri.append("/").append(bucket_name);
+ info.request_uri_aws4 = info.request_uri = info.script_uri;
+ info.effective_uri = "/" + bucket_name;
+}
+
+void RGWBulkUploadOp::init(RGWRados* const store,
+ struct req_state* const s,
+ RGWHandler* const h)
+{
+ RGWOp::init(store, s, h);
+ dir_ctx.emplace(store->svc.sysobj->init_obj_ctx());
+}
+
+int RGWBulkUploadOp::handle_dir(const boost::string_ref path)
+{
+ ldpp_dout(this, 20) << "got directory=" << path << dendl;
+
+ op_ret = handle_dir_verify_permission();
+ if (op_ret < 0) {
+ return op_ret;
+ }
+
+ std::string bucket_name;
+ rgw_obj_key object_junk;
+ std::tie(bucket_name, object_junk) = *parse_path(path);
+
+ rgw_raw_obj obj(store->svc.zone->get_zone_params().domain_root,
+ rgw_make_bucket_entry_name(s->bucket_tenant, bucket_name));
+
+ /* we need to make sure we read bucket info, it's not read before for this
+ * specific request */
+ RGWBucketInfo binfo;
+ std::map<std::string, ceph::bufferlist> battrs;
+ op_ret = store->get_bucket_info(*dir_ctx, s->bucket_tenant, bucket_name,
+ binfo, nullptr, &battrs);
+ if (op_ret < 0 && op_ret != -ENOENT) {
+ return op_ret;
+ }
+ const bool bucket_exists = (op_ret != -ENOENT);
+
+ if (bucket_exists) {
+ RGWAccessControlPolicy old_policy(s->cct);
+ int r = rgw_op_get_bucket_policy_from_attr(s->cct, store, binfo,
+ battrs, &old_policy);
+ if (r >= 0) {
+ if (old_policy.get_owner().get_id().compare(s->user->user_id) != 0) {
+ op_ret = -EEXIST;
+ return op_ret;
+ }
+ }
+ }
+
+ RGWBucketInfo master_info;
+ rgw_bucket *pmaster_bucket = nullptr;
+ uint32_t *pmaster_num_shards = nullptr;
+ real_time creation_time;
+ obj_version objv, ep_objv, *pobjv = nullptr;
+
+ if (! store->svc.zone->is_meta_master()) {
+ JSONParser jp;
+ ceph::bufferlist in_data;
+ req_info info = s->info;
+ forward_req_info(s->cct, info, bucket_name);
+ op_ret = forward_request_to_master(s, nullptr, store, in_data, &jp, &info);
+ if (op_ret < 0) {
+ return op_ret;
+ }
+
+ JSONDecoder::decode_json("entry_point_object_ver", ep_objv, &jp);
+ JSONDecoder::decode_json("object_ver", objv, &jp);
+ JSONDecoder::decode_json("bucket_info", master_info, &jp);
+
+ ldpp_dout(this, 20) << "parsed: objv.tag=" << objv.tag << " objv.ver=" << objv.ver << dendl;
+ ldpp_dout(this, 20) << "got creation_time="<< master_info.creation_time << dendl;
+
+ pmaster_bucket= &master_info.bucket;
+ creation_time = master_info.creation_time;
+ pmaster_num_shards = &master_info.num_shards;
+ pobjv = &objv;
+ } else {
+ pmaster_bucket = nullptr;
+ pmaster_num_shards = nullptr;
+ }
+
+ rgw_placement_rule placement_rule(binfo.placement_rule, s->info.storage_class);
+
+ if (bucket_exists) {
+ rgw_placement_rule selected_placement_rule;
+ rgw_bucket bucket;
+ bucket.tenant = s->bucket_tenant;
+ bucket.name = s->bucket_name;
+ op_ret = store->svc.zone->select_bucket_placement(*(s->user),
+ store->svc.zone->get_zonegroup().get_id(),
+ placement_rule,
+ &selected_placement_rule,
+ nullptr);
+ if (selected_placement_rule != binfo.placement_rule) {
+ op_ret = -EEXIST;
+ ldpp_dout(this, 20) << "non-coherent placement rule" << dendl;
+ return op_ret;
+ }
+ }
+
+ /* Create metadata: ACLs. */
+ std::map<std::string, ceph::bufferlist> attrs;
+ RGWAccessControlPolicy policy;
+ policy.create_default(s->user->user_id, s->user->display_name);
+ ceph::bufferlist aclbl;
+ policy.encode(aclbl);
+ attrs.emplace(RGW_ATTR_ACL, std::move(aclbl));
+
+ RGWQuotaInfo quota_info;
+ const RGWQuotaInfo * pquota_info = nullptr;
+
+ rgw_bucket bucket;
+ bucket.tenant = s->bucket_tenant; /* ignored if bucket exists */
+ bucket.name = bucket_name;
+
+
+ RGWBucketInfo out_info;
+ op_ret = store->create_bucket(*(s->user),
+ bucket,
+ store->svc.zone->get_zonegroup().get_id(),
+ placement_rule, binfo.swift_ver_location,
+ pquota_info, attrs,
+ out_info, pobjv, &ep_objv, creation_time,
+ pmaster_bucket, pmaster_num_shards, true);
+ /* continue if EEXIST and create_bucket will fail below. this way we can
+ * recover from a partial create by retrying it. */
+ ldpp_dout(this, 20) << "rgw_create_bucket returned ret=" << op_ret
+ << ", bucket=" << bucket << dendl;
+
+ if (op_ret && op_ret != -EEXIST) {
+ return op_ret;
+ }
+
+ const bool existed = (op_ret == -EEXIST);
+ if (existed) {
+ /* bucket already existed, might have raced with another bucket creation, or
+ * might be partial bucket creation that never completed. Read existing bucket
+ * info, verify that the reported bucket owner is the current user.
+ * If all is ok then update the user's list of buckets.
+ * Otherwise inform client about a name conflict.
+ */
+ if (out_info.owner.compare(s->user->user_id) != 0) {
+ op_ret = -EEXIST;
+ ldpp_dout(this, 20) << "conflicting bucket name" << dendl;
+ return op_ret;
+ }
+ bucket = out_info.bucket;
+ }
+
+ op_ret = rgw_link_bucket(store, s->user->user_id, bucket,
+ out_info.creation_time, false);
+ if (op_ret && !existed && op_ret != -EEXIST) {
+ /* if it exists (or previously existed), don't remove it! */
+ op_ret = rgw_unlink_bucket(store, s->user->user_id,
+ bucket.tenant, bucket.name);
+ if (op_ret < 0) {
+ ldpp_dout(this, 0) << "WARNING: failed to unlink bucket: ret=" << op_ret << dendl;
+ }
+ } else if (op_ret == -EEXIST || (op_ret == 0 && existed)) {
+ ldpp_dout(this, 20) << "containers already exists" << dendl;
+ op_ret = -ERR_BUCKET_EXISTS;
+ }
+
+ return op_ret;
+}
+
+
+bool RGWBulkUploadOp::handle_file_verify_permission(RGWBucketInfo& binfo,
+ const rgw_obj& obj,
+ std::map<std::string, ceph::bufferlist>& battrs,
+ ACLOwner& bucket_owner /* out */)
+{
+ RGWAccessControlPolicy bacl(store->ctx());
+ op_ret = read_bucket_policy(store, s, binfo, battrs, &bacl, binfo.bucket);
+ if (op_ret < 0) {
+ ldpp_dout(this, 20) << "cannot read_policy() for bucket" << dendl;
+ return false;
+ }
+
+ auto policy = get_iam_policy_from_attr(s->cct, store, battrs, binfo.bucket.tenant);
+
+ bucket_owner = bacl.get_owner();
+ if (policy || ! s->iam_user_policies.empty()) {
+ auto usr_policy_res = eval_user_policies(s->iam_user_policies, s->env,
+ boost::none,
+ rgw::IAM::s3PutObject, obj);
+ if (usr_policy_res == Effect::Deny) {
+ return false;
+ }
+ auto e = policy->eval(s->env, *s->auth.identity,
+ rgw::IAM::s3PutObject, obj);
+ if (e == Effect::Allow) {
+ return true;
+ } else if (e == Effect::Deny) {
+ return false;
+ } else if (usr_policy_res == Effect::Allow) {
+ return true;
+ }
+ }
+
+ return verify_bucket_permission_no_policy(this, s, s->user_acl.get(),
+ &bacl, RGW_PERM_WRITE);
+}
+
+int RGWBulkUploadOp::handle_file(const boost::string_ref path,
+ const size_t size,
+ AlignedStreamGetter& body)
+{
+
+ ldpp_dout(this, 20) << "got file=" << path << ", size=" << size << dendl;
+
+ if (size > static_cast<size_t>(s->cct->_conf->rgw_max_put_size)) {
+ op_ret = -ERR_TOO_LARGE;
+ return op_ret;
+ }
+
+ std::string bucket_name;
+ rgw_obj_key object;
+ std::tie(bucket_name, object) = *parse_path(path);
+
+ auto& obj_ctx = *static_cast<RGWObjectCtx *>(s->obj_ctx);
+ RGWBucketInfo binfo;
+ std::map<std::string, ceph::bufferlist> battrs;
+ ACLOwner bowner;
+ op_ret = store->get_bucket_info(*s->sysobj_ctx, s->user->user_id.tenant,
+ bucket_name, binfo, nullptr, &battrs);
+ if (op_ret == -ENOENT) {
+ ldpp_dout(this, 20) << "non existent directory=" << bucket_name << dendl;
+ } else if (op_ret < 0) {
+ return op_ret;
+ }
+
+ if (! handle_file_verify_permission(binfo,
+ rgw_obj(binfo.bucket, object),
+ battrs, bowner)) {
+ ldpp_dout(this, 20) << "object creation unauthorized" << dendl;
+ op_ret = -EACCES;
+ return op_ret;
+ }
+
+ op_ret = store->check_quota(bowner.get_id(), binfo.bucket,
+ user_quota, bucket_quota, size);
+ if (op_ret < 0) {
+ return op_ret;
+ }
+
+ op_ret = store->check_bucket_shards(s->bucket_info, s->bucket, bucket_quota);
+ if (op_ret < 0) {
+ return op_ret;
+ }
+
+ rgw_obj obj(binfo.bucket, object);
+ if (s->bucket_info.versioning_enabled()) {
+ store->gen_rand_obj_instance_name(&obj);
+ }
+
+ rgw_placement_rule dest_placement = s->dest_placement;
+ dest_placement.inherit_from(binfo.placement_rule);
+
+ rgw::AioThrottle aio(store->ctx()->_conf->rgw_put_obj_min_window_size);
+
+ using namespace rgw::putobj;
+
+ AtomicObjectProcessor processor(&aio, store, binfo, &s->dest_placement, bowner.get_id(),
+ obj_ctx, obj, 0, s->req_id);
+
+ op_ret = processor.prepare();
+ if (op_ret < 0) {
+ ldpp_dout(this, 20) << "cannot prepare processor due to ret=" << op_ret << dendl;
+ return op_ret;
+ }
+
+ /* No filters by default. */
+ DataProcessor *filter = &processor;
+
+ const auto& compression_type = store->svc.zone->get_zone_params().get_compression_type(
+ dest_placement);
+ CompressorRef plugin;
+ boost::optional<RGWPutObj_Compress> compressor;
+ if (compression_type != "none") {
+ plugin = Compressor::create(s->cct, compression_type);
+ if (! plugin) {
+ ldpp_dout(this, 1) << "Cannot load plugin for rgw_compression_type "
+ << compression_type << dendl;
+ } else {
+ compressor.emplace(s->cct, plugin, filter);
+ filter = &*compressor;
+ }
+ }
+
+ /* Upload file content. */
+ ssize_t len = 0;
+ size_t ofs = 0;
+ MD5 hash;
+ do {
+ ceph::bufferlist data;
+ len = body.get_at_most(s->cct->_conf->rgw_max_chunk_size, data);
+
+ ldpp_dout(this, 20) << "body=" << data.c_str() << dendl;
+ if (len < 0) {
+ op_ret = len;
+ return op_ret;
+ } else if (len > 0) {
+ hash.Update((const unsigned char *)data.c_str(), data.length());
+ op_ret = filter->process(std::move(data), ofs);
+ if (op_ret < 0) {
+ ldpp_dout(this, 20) << "filter->process() returned ret=" << op_ret << dendl;
+ return op_ret;
+ }
+
+ ofs += len;
+ }
+
+ } while (len > 0);
+
+ // flush
+ op_ret = filter->process({}, ofs);
+ if (op_ret < 0) {
+ return op_ret;
+ }
+
+ if (ofs != size) {
+ ldpp_dout(this, 10) << "real file size different from declared" << dendl;
+ op_ret = -EINVAL;
+ return op_ret;
+ }
+
+ op_ret = store->check_quota(bowner.get_id(), binfo.bucket,
+ user_quota, bucket_quota, size);
+ if (op_ret < 0) {
+ ldpp_dout(this, 20) << "quota exceeded for path=" << path << dendl;
+ return op_ret;
+ }
+
+ op_ret = store->check_bucket_shards(s->bucket_info, s->bucket, bucket_quota);
+ if (op_ret < 0) {
+ return op_ret;
+ }
+
+ char calc_md5[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 1];
+ unsigned char m[CEPH_CRYPTO_MD5_DIGESTSIZE];
+ hash.Final(m);
+ buf_to_hex(m, CEPH_CRYPTO_MD5_DIGESTSIZE, calc_md5);
+
+ /* Create metadata: ETAG. */
+ std::map<std::string, ceph::bufferlist> attrs;
+ std::string etag = calc_md5;
+ ceph::bufferlist etag_bl;
+ etag_bl.append(etag.c_str(), etag.size() + 1);
+ attrs.emplace(RGW_ATTR_ETAG, std::move(etag_bl));
+
+ /* Create metadata: ACLs. */
+ RGWAccessControlPolicy policy;
+ policy.create_default(s->user->user_id, s->user->display_name);
+ ceph::bufferlist aclbl;
+ policy.encode(aclbl);
+ attrs.emplace(RGW_ATTR_ACL, std::move(aclbl));
+
+ /* Create metadata: compression info. */
+ if (compressor && compressor->is_compressed()) {
+ ceph::bufferlist tmp;
+ RGWCompressionInfo cs_info;
+ cs_info.compression_type = plugin->get_type_name();
+ cs_info.orig_size = s->obj_size;
+ cs_info.blocks = std::move(compressor->get_compression_blocks());
+ encode(cs_info, tmp);
+ attrs.emplace(RGW_ATTR_COMPRESSION, std::move(tmp));
+ }
+
+ /* Complete the transaction. */
+ op_ret = processor.complete(size, etag, nullptr, ceph::real_time(),
+ attrs, ceph::real_time() /* delete_at */,
+ nullptr, nullptr, nullptr, nullptr, nullptr);
+ if (op_ret < 0) {
+ ldpp_dout(this, 20) << "processor::complete returned op_ret=" << op_ret << dendl;
+ }
+
+ return op_ret;
+}
+
+void RGWBulkUploadOp::execute()
+{
+ ceph::bufferlist buffer(64 * 1024);
+
+ ldpp_dout(this, 20) << "start" << dendl;
+
+ /* Create an instance of stream-abstracting class. Having this indirection
+ * allows for easy introduction of decompressors like gzip and bzip2. */
+ auto stream = create_stream();
+ if (! stream) {
+ return;
+ }
+
+ /* Handling the $UPLOAD_PATH accordingly to the Swift's Bulk middleware. See:
+ * https://github.com/openstack/swift/blob/2.13.0/swift/common/middleware/bulk.py#L31-L41 */
+ std::string bucket_path, file_prefix;
+ std::tie(bucket_path, file_prefix) = handle_upload_path(s);
+
+ auto status = rgw::tar::StatusIndicator::create();
+ do {
+ op_ret = stream->get_exactly(rgw::tar::BLOCK_SIZE, buffer);
+ if (op_ret < 0) {
+ ldpp_dout(this, 2) << "cannot read header" << dendl;
+ return;
+ }
+
+ /* We need to re-interpret the buffer as a TAR block. Exactly two blocks
+ * must be tracked to detect out end-of-archive. It occurs when both of
+ * them are empty (zeroed). Tracing this particular inter-block dependency
+ * is responsibility of the rgw::tar::StatusIndicator class. */
+ boost::optional<rgw::tar::HeaderView> header;
+ std::tie(status, header) = rgw::tar::interpret_block(status, buffer);
+
+ if (! status.empty() && header) {
+ /* This specific block isn't empty (entirely zeroed), so we can parse
+ * it as a TAR header and dispatch. At the moment we do support only
+ * regular files and directories. Everything else (symlinks, devices)
+ * will be ignored but won't cease the whole upload. */
+ switch (header->get_filetype()) {
+ case rgw::tar::FileType::NORMAL_FILE: {
+ ldpp_dout(this, 2) << "handling regular file" << dendl;
+
+ boost::string_ref filename = bucket_path.empty() ? header->get_filename() : \
+ file_prefix + header->get_filename().to_string();
+ auto body = AlignedStreamGetter(0, header->get_filesize(),
+ rgw::tar::BLOCK_SIZE, *stream);
+ op_ret = handle_file(filename,
+ header->get_filesize(),
+ body);
+ if (! op_ret) {
+ /* Only regular files counts. */
+ num_created++;
+ } else {
+ failures.emplace_back(op_ret, filename.to_string());
+ }
+ break;
+ }
+ case rgw::tar::FileType::DIRECTORY: {
+ ldpp_dout(this, 2) << "handling regular directory" << dendl;
+
+ boost::string_ref dirname = bucket_path.empty() ? header->get_filename() : bucket_path;
+ op_ret = handle_dir(dirname);
+ if (op_ret < 0 && op_ret != -ERR_BUCKET_EXISTS) {
+ failures.emplace_back(op_ret, dirname.to_string());
+ }
+ break;
+ }
+ default: {
+ /* Not recognized. Skip. */
+ op_ret = 0;
+ break;
+ }
+ }
+
+ /* In case of any problems with sub-request authorization Swift simply
+ * terminates whole upload immediately. */
+ if (boost::algorithm::contains(std::initializer_list<int>{ op_ret },
+ terminal_errors)) {
+ ldpp_dout(this, 2) << "terminating due to ret=" << op_ret << dendl;
+ break;
+ }
+ } else {
+ ldpp_dout(this, 2) << "an empty block" << dendl;
+ op_ret = 0;
+ }
+
+ buffer.clear();
+ } while (! status.eof());
+
+ return;
+}
+
+RGWBulkUploadOp::AlignedStreamGetter::~AlignedStreamGetter()
+{
+ const size_t aligned_legnth = length + (-length % alignment);
+ ceph::bufferlist junk;
+
+ DecoratedStreamGetter::get_exactly(aligned_legnth - position, junk);
+}
+
+ssize_t RGWBulkUploadOp::AlignedStreamGetter::get_at_most(const size_t want,
+ ceph::bufferlist& dst)
+{
+ const size_t max_to_read = std::min(want, length - position);
+ const auto len = DecoratedStreamGetter::get_at_most(max_to_read, dst);
+ if (len > 0) {
+ position += len;
+ }
+ return len;
+}
+
+ssize_t RGWBulkUploadOp::AlignedStreamGetter::get_exactly(const size_t want,
+ ceph::bufferlist& dst)
+{
+ const auto len = DecoratedStreamGetter::get_exactly(want, dst);
+ if (len > 0) {
+ position += len;
+ }
+ return len;
+}
+
+int RGWSetAttrs::verify_permission()
+{
+ // This looks to be part of the RGW-NFS machinery and has no S3 or
+ // Swift equivalent.
+ bool perm;
+ if (!s->object.empty()) {
+ perm = verify_object_permission_no_policy(this, s, RGW_PERM_WRITE);
+ } else {
+ perm = verify_bucket_permission_no_policy(this, s, RGW_PERM_WRITE);
+ }
+ if (!perm)
+ return -EACCES;
+
+ return 0;
+}
+
+void RGWSetAttrs::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+void RGWSetAttrs::execute()
+{
+ op_ret = get_params();
+ if (op_ret < 0)
+ return;
+
+ rgw_obj obj(s->bucket, s->object);
+
+ if (!s->object.empty()) {
+ store->set_atomic(s->obj_ctx, obj);
+ op_ret = store->set_attrs(s->obj_ctx, s->bucket_info, obj, attrs, nullptr);
+ } else {
+ for (auto& iter : attrs) {
+ s->bucket_attrs[iter.first] = std::move(iter.second);
+ }
+ op_ret = rgw_bucket_set_attrs(store, s->bucket_info, s->bucket_attrs,
+ &s->bucket_info.objv_tracker);
+ }
+}
+
+void RGWGetObjLayout::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+void RGWGetObjLayout::execute()
+{
+ rgw_obj obj(s->bucket, s->object);
+ RGWRados::Object target(store,
+ s->bucket_info,
+ *static_cast<RGWObjectCtx *>(s->obj_ctx),
+ rgw_obj(s->bucket, s->object));
+ RGWRados::Object::Read stat_op(&target);
+
+ op_ret = stat_op.prepare();
+ if (op_ret < 0) {
+ return;
+ }
+
+ head_obj = stat_op.state.head_obj;
+
+ op_ret = target.get_manifest(&manifest);
+}
+
+
+int RGWConfigBucketMetaSearch::verify_permission()
+{
+ if (!s->auth.identity->is_owner_of(s->bucket_owner.get_id())) {
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+void RGWConfigBucketMetaSearch::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+void RGWConfigBucketMetaSearch::execute()
+{
+ op_ret = get_params();
+ if (op_ret < 0) {
+ ldpp_dout(this, 20) << "NOTICE: get_params() returned ret=" << op_ret << dendl;
+ return;
+ }
+
+ s->bucket_info.mdsearch_config = mdsearch_config;
+
+ op_ret = store->put_bucket_instance_info(s->bucket_info, false, real_time(), &s->bucket_attrs);
+ if (op_ret < 0) {
+ ldpp_dout(this, 0) << "NOTICE: put_bucket_info on bucket=" << s->bucket.name
+ << " returned err=" << op_ret << dendl;
+ return;
+ }
+}
+
+int RGWGetBucketMetaSearch::verify_permission()
+{
+ if (!s->auth.identity->is_owner_of(s->bucket_owner.get_id())) {
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+void RGWGetBucketMetaSearch::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+int RGWDelBucketMetaSearch::verify_permission()
+{
+ if (!s->auth.identity->is_owner_of(s->bucket_owner.get_id())) {
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+void RGWDelBucketMetaSearch::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+void RGWDelBucketMetaSearch::execute()
+{
+ s->bucket_info.mdsearch_config.clear();
+
+ op_ret = store->put_bucket_instance_info(s->bucket_info, false, real_time(), &s->bucket_attrs);
+ if (op_ret < 0) {
+ ldpp_dout(this, 0) << "NOTICE: put_bucket_info on bucket=" << s->bucket.name
+ << " returned err=" << op_ret << dendl;
+ return;
+ }
+}
+
+
+RGWHandler::~RGWHandler()
+{
+}
+
+int RGWHandler::init(RGWRados *_store,
+ struct req_state *_s,
+ rgw::io::BasicClient *cio)
+{
+ store = _store;
+ s = _s;
+
+ return 0;
+}
+
+int RGWHandler::do_init_permissions()
+{
+ int ret = rgw_build_bucket_policies(store, s);
+ if (ret < 0) {
+ ldpp_dout(s, 10) << "init_permissions on " << s->bucket
+ << " failed, ret=" << ret << dendl;
+ return ret==-ENODATA ? -EACCES : ret;
+ }
+
+ rgw_build_iam_environment(store, s);
+ return ret;
+}
+
+int RGWHandler::do_read_permissions(RGWOp *op, bool only_bucket)
+{
+ if (only_bucket) {
+ /* already read bucket info */
+ return 0;
+ }
+ int ret = rgw_build_object_policies(store, s, op->prefetch_data());
+
+ if (ret < 0) {
+ ldpp_dout(op, 10) << "read_permissions on " << s->bucket << ":"
+ << s->object << " only_bucket=" << only_bucket
+ << " ret=" << ret << dendl;
+ if (ret == -ENODATA)
+ ret = -EACCES;
+ if (s->auth.identity->is_anonymous() && ret == -EACCES)
+ ret = -EPERM;
+ }
+
+ return ret;
+}
+
+int RGWOp::error_handler(int err_no, string *error_content) {
+ return dialect_handler->error_handler(err_no, error_content);
+}
+
+int RGWHandler::error_handler(int err_no, string *error_content) {
+ // This is the do-nothing error handler
+ return err_no;
+}
+
+std::ostream& RGWOp::gen_prefix(std::ostream& out) const
+{
+ // append <dialect>:<op name> to the prefix
+ return s->gen_prefix(out) << s->dialect << ':' << name() << ' ';
+}
+
+void RGWDefaultResponseOp::send_response() {
+ if (op_ret) {
+ set_req_state_err(s, op_ret);
+ }
+ dump_errno(s);
+ end_header(s);
+}
+
+void RGWPutBucketPolicy::send_response()
+{
+ if (!op_ret) {
+ /* A successful Put Bucket Policy should return a 204 on success */
+ op_ret = STATUS_NO_CONTENT;
+ }
+ if (op_ret) {
+ set_req_state_err(s, op_ret);
+ }
+ dump_errno(s);
+ end_header(s);
+}
+
+int RGWPutBucketPolicy::verify_permission()
+{
+ if (!verify_bucket_permission(this, s, rgw::IAM::s3PutBucketPolicy)) {
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+int RGWPutBucketPolicy::get_params()
+{
+ const auto max_size = s->cct->_conf->rgw_max_put_param_size;
+ // At some point when I have more time I want to make a version of
+ // rgw_rest_read_all_input that doesn't use malloc.
+ std::tie(op_ret, data) = rgw_rest_read_all_input(s, max_size, false);
+
+ // And throws exceptions.
+ return op_ret;
+}
+
+void RGWPutBucketPolicy::execute()
+{
+ op_ret = get_params();
+ if (op_ret < 0) {
+ return;
+ }
+
+ if (!store->svc.zone->is_meta_master()) {
+ op_ret = forward_request_to_master(s, NULL, store, data, nullptr);
+ if (op_ret < 0) {
+ ldpp_dout(this, 20) << "forward_request_to_master returned ret=" << op_ret << dendl;
+ return;
+ }
+ }
+
+ try {
+ const Policy p(s->cct, s->bucket_tenant, data);
+ op_ret = retry_raced_bucket_write(store, s, [&p, this] {
+ auto attrs = s->bucket_attrs;
+ attrs[RGW_ATTR_IAM_POLICY].clear();
+ attrs[RGW_ATTR_IAM_POLICY].append(p.text);
+ op_ret = rgw_bucket_set_attrs(store, s->bucket_info, attrs,
+ &s->bucket_info.objv_tracker);
+ return op_ret;
+ });
+ } catch (rgw::IAM::PolicyParseException& e) {
+ ldpp_dout(this, 20) << "failed to parse policy: " << e.what() << dendl;
+ op_ret = -EINVAL;
+ }
+}
+
+void RGWGetBucketPolicy::send_response()
+{
+ if (op_ret) {
+ set_req_state_err(s, op_ret);
+ }
+ dump_errno(s);
+ end_header(s, this, "application/json");
+ dump_body(s, policy);
+}
+
+int RGWGetBucketPolicy::verify_permission()
+{
+ if (!verify_bucket_permission(this, s, rgw::IAM::s3GetBucketPolicy)) {
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+void RGWGetBucketPolicy::execute()
+{
+ auto attrs = s->bucket_attrs;
+ map<string, bufferlist>::iterator aiter = attrs.find(RGW_ATTR_IAM_POLICY);
+ if (aiter == attrs.end()) {
+ ldpp_dout(this, 0) << "can't find bucket IAM POLICY attr bucket_name = "
+ << s->bucket_name << dendl;
+ op_ret = -ERR_NO_SUCH_BUCKET_POLICY;
+ s->err.message = "The bucket policy does not exist";
+ return;
+ } else {
+ policy = attrs[RGW_ATTR_IAM_POLICY];
+
+ if (policy.length() == 0) {
+ ldpp_dout(this, 10) << "The bucket policy does not exist, bucket: "
+ << s->bucket_name << dendl;
+ op_ret = -ERR_NO_SUCH_BUCKET_POLICY;
+ s->err.message = "The bucket policy does not exist";
+ return;
+ }
+ }
+}
+
+void RGWDeleteBucketPolicy::send_response()
+{
+ if (op_ret) {
+ set_req_state_err(s, op_ret);
+ }
+ dump_errno(s);
+ end_header(s);
+}
+
+int RGWDeleteBucketPolicy::verify_permission()
+{
+ if (!verify_bucket_permission(this, s, rgw::IAM::s3DeleteBucketPolicy)) {
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+void RGWDeleteBucketPolicy::execute()
+{
+ op_ret = retry_raced_bucket_write(store, s, [this] {
+ auto attrs = s->bucket_attrs;
+ attrs.erase(RGW_ATTR_IAM_POLICY);
+ op_ret = rgw_bucket_set_attrs(store, s->bucket_info, attrs,
+ &s->bucket_info.objv_tracker);
+ return op_ret;
+ });
+}
+
+void RGWPutBucketObjectLock::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+int RGWPutBucketObjectLock::verify_permission()
+{
+ return verify_bucket_owner_or_policy(s, rgw::IAM::s3PutBucketObjectLockConfiguration);
+}
+
+void RGWPutBucketObjectLock::execute()
+{
+ if (!s->bucket_info.obj_lock_enabled()) {
+ ldpp_dout(this, 0) << "ERROR: object Lock configuration cannot be enabled on existing buckets" << dendl;
+ op_ret = -ERR_INVALID_BUCKET_STATE;
+ return;
+ }
+
+ RGWXMLDecoder::XMLParser parser;
+ if (!parser.init()) {
+ ldpp_dout(this, 0) << "ERROR: failed to initialize parser" << dendl;
+ op_ret = -EINVAL;
+ return;
+ }
+ op_ret = get_params();
+ if (op_ret < 0) {
+ return;
+ }
+ if (!parser.parse(data.c_str(), data.length(), 1)) {
+ op_ret = -ERR_MALFORMED_XML;
+ return;
+ }
+
+ try {
+ RGWXMLDecoder::decode_xml("ObjectLockConfiguration", obj_lock, &parser, true);
+ } catch (RGWXMLDecoder::err& err) {
+ ldout(s->cct, 5) << "unexpected xml:" << err << dendl;
+ op_ret = -ERR_MALFORMED_XML;
+ return;
+ }
+ if (obj_lock.has_rule() && !obj_lock.retention_period_valid()) {
+ ldpp_dout(this, 0) << "ERROR: retention period must be a positive integer value" << dendl;
+ op_ret = -ERR_INVALID_RETENTION_PERIOD;
+ return;
+ }
+
+ if (!store->svc.zone->is_meta_master()) {
+ op_ret = forward_request_to_master(s, NULL, store, data, nullptr);
+ if (op_ret < 0) {
+ ldout(s->cct, 20) << __func__ << "forward_request_to_master returned ret=" << op_ret << dendl;
+ return;
+ }
+ }
+
+ op_ret = retry_raced_bucket_write(store, s, [this] {
+ s->bucket_info.obj_lock = obj_lock;
+ op_ret = store->put_bucket_instance_info(s->bucket_info, false,
+ real_time(), &s->bucket_attrs);
+ return op_ret;
+ });
+ return;
+}
+
+void RGWGetBucketObjectLock::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+int RGWGetBucketObjectLock::verify_permission()
+{
+ return verify_bucket_owner_or_policy(s, rgw::IAM::s3GetBucketObjectLockConfiguration);
+}
+
+void RGWGetBucketObjectLock::execute()
+{
+ if (!s->bucket_info.obj_lock_enabled()) {
+ op_ret = -ERR_NO_SUCH_OBJECT_LOCK_CONFIGURATION;
+ return;
+ }
+}
+
+int RGWPutObjRetention::verify_permission()
+{
+ if (!verify_object_permission(this, s, rgw::IAM::s3PutObjectRetention)) {
+ return -EACCES;
+ }
+ op_ret = get_params();
+ if (op_ret) {
+ return op_ret;
+ }
+ if (bypass_governance_mode) {
+ bypass_perm = verify_object_permission(this, s, rgw::IAM::s3BypassGovernanceRetention);
+ }
+ return 0;
+}
+
+void RGWPutObjRetention::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+void RGWPutObjRetention::execute()
+{
+ if (!s->bucket_info.obj_lock_enabled()) {
+ ldpp_dout(this, 0) << "ERROR: object retention can't be set if bucket object lock not configured" << dendl;
+ op_ret = -ERR_INVALID_REQUEST;
+ return;
+ }
+
+ RGWXMLDecoder::XMLParser parser;
+ if (!parser.init()) {
+ ldpp_dout(this, 0) << "ERROR: failed to initialize parser" << dendl;
+ op_ret = -EINVAL;
+ return;
+ }
+
+ if (!parser.parse(data.c_str(), data.length(), 1)) {
+ op_ret = -ERR_MALFORMED_XML;
+ return;
+ }
+
+ try {
+ RGWXMLDecoder::decode_xml("Retention", obj_retention, &parser, true);
+ } catch (RGWXMLDecoder::err& err) {
+ ldpp_dout(this, 5) << "unexpected xml:" << err << dendl;
+ op_ret = -ERR_MALFORMED_XML;
+ return;
+ }
+
+ if (ceph::real_clock::to_time_t(obj_retention.get_retain_until_date()) < ceph_clock_now()) {
+ ldpp_dout(this, 0) << "ERROR: the retain until date must be in the future" << dendl;
+ op_ret = -EINVAL;
+ return;
+ }
+ bufferlist bl;
+ obj_retention.encode(bl);
+ rgw_obj obj(s->bucket, s->object);
+
+ //check old retention
+ map<string, bufferlist> attrs;
+ op_ret = get_obj_attrs(store, s, obj, attrs);
+ if (op_ret < 0) {
+ ldpp_dout(this, 0) << "ERROR: get obj attr error"<< dendl;
+ return;
+ }
+ auto aiter = attrs.find(RGW_ATTR_OBJECT_RETENTION);
+ if (aiter != attrs.end()) {
+ RGWObjectRetention old_obj_retention;
+ try {
+ decode(old_obj_retention, aiter->second);
+ } catch (buffer::error& err) {
+ ldpp_dout(this, 0) << "ERROR: failed to decode RGWObjectRetention" << dendl;
+ op_ret = -EIO;
+ return;
+ }
+ if (ceph::real_clock::to_time_t(obj_retention.get_retain_until_date()) < ceph::real_clock::to_time_t(old_obj_retention.get_retain_until_date())) {
+ if (old_obj_retention.get_mode().compare("GOVERNANCE") != 0 || !bypass_perm || !bypass_governance_mode) {
+ op_ret = -EACCES;
+ return;
+ }
+ }
+ }
+
+ op_ret = modify_obj_attr(store, s, obj, RGW_ATTR_OBJECT_RETENTION, bl);
+
+ return;
+}
+
+int RGWGetObjRetention::verify_permission()
+{
+ if (!verify_object_permission(this, s, rgw::IAM::s3GetObjectRetention)) {
+ return -EACCES;
+ }
+ return 0;
+}
+
+void RGWGetObjRetention::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+void RGWGetObjRetention::execute()
+{
+ if (!s->bucket_info.obj_lock_enabled()) {
+ ldpp_dout(this, 0) << "ERROR: bucket object lock not configured" << dendl;
+ op_ret = -ERR_INVALID_REQUEST;
+ return;
+ }
+ rgw_obj obj(s->bucket, s->object);
+ map<string, bufferlist> attrs;
+ op_ret = get_obj_attrs(store, s, obj, attrs);
+ if (op_ret < 0) {
+ ldpp_dout(this, 0) << "ERROR: failed to get obj attrs, obj=" << obj
+ << " ret=" << op_ret << dendl;
+ return;
+ }
+ auto aiter = attrs.find(RGW_ATTR_OBJECT_RETENTION);
+ if (aiter == attrs.end()) {
+ op_ret = -ERR_NO_SUCH_OBJECT_LOCK_CONFIGURATION;
+ return;
+ }
+
+ bufferlist::const_iterator iter{&aiter->second};
+ try {
+ obj_retention.decode(iter);
+ } catch (const buffer::error& e) {
+ ldout(s->cct, 0) << __func__ << "decode object retention config failed" << dendl;
+ op_ret = -EIO;
+ return;
+ }
+ return;
+}
+
+int RGWPutObjLegalHold::verify_permission()
+{
+ if (!verify_object_permission(this, s, rgw::IAM::s3PutObjectLegalHold)) {
+ return -EACCES;
+ }
+ return 0;
+}
+
+void RGWPutObjLegalHold::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+void RGWPutObjLegalHold::execute() {
+ if (!s->bucket_info.obj_lock_enabled()) {
+ ldpp_dout(this, 0) << "ERROR: object legal hold can't be set if bucket object lock not configured" << dendl;
+ op_ret = -ERR_INVALID_REQUEST;
+ return;
+ }
+
+ RGWXMLDecoder::XMLParser parser;
+ if (!parser.init()) {
+ ldpp_dout(this, 0) << "ERROR: failed to initialize parser" << dendl;
+ op_ret = -EINVAL;
+ return;
+ }
+
+ op_ret = get_params();
+ if (op_ret < 0)
+ return;
+
+ if (!parser.parse(data.c_str(), data.length(), 1)) {
+ op_ret = -ERR_MALFORMED_XML;
+ return;
+ }
+
+ try {
+ RGWXMLDecoder::decode_xml("LegalHold", obj_legal_hold, &parser, true);
+ } catch (RGWXMLDecoder::err &err) {
+ ldout(s->cct, 5) << "unexpected xml:" << err << dendl;
+ op_ret = -ERR_MALFORMED_XML;
+ return;
+ }
+ bufferlist bl;
+ obj_legal_hold.encode(bl);
+ rgw_obj obj(s->bucket, s->object);
+ //if instance is empty, we should modify the latest object
+ op_ret = modify_obj_attr(store, s, obj, RGW_ATTR_OBJECT_LEGAL_HOLD, bl);
+ return;
+}
+
+int RGWGetObjLegalHold::verify_permission()
+{
+ if (!verify_object_permission(this, s, rgw::IAM::s3GetObjectLegalHold)) {
+ return -EACCES;
+ }
+ return 0;
+}
+
+void RGWGetObjLegalHold::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+void RGWGetObjLegalHold::execute()
+{
+ if (!s->bucket_info.obj_lock_enabled()) {
+ ldpp_dout(this, 0) << "ERROR: bucket object lock not configured" << dendl;
+ op_ret = -ERR_INVALID_REQUEST;
+ return;
+ }
+ rgw_obj obj(s->bucket, s->object);
+ map<string, bufferlist> attrs;
+ op_ret = get_obj_attrs(store, s, obj, attrs);
+ if (op_ret < 0) {
+ ldpp_dout(this, 0) << "ERROR: failed to get obj attrs, obj=" << obj
+ << " ret=" << op_ret << dendl;
+ return;
+ }
+ auto aiter = attrs.find(RGW_ATTR_OBJECT_LEGAL_HOLD);
+ if (aiter == attrs.end()) {
+ op_ret = -ERR_NO_SUCH_OBJECT_LOCK_CONFIGURATION;
+ return;
+ }
+
+ bufferlist::const_iterator iter{&aiter->second};
+ try {
+ obj_legal_hold.decode(iter);
+ } catch (const buffer::error& e) {
+ ldout(s->cct, 0) << __func__ << "decode object legal hold config failed" << dendl;
+ op_ret = -EIO;
+ return;
+ }
+ return;
+}
+
+void RGWGetClusterStat::execute()
+{
+ op_ret = this->store->get_rados_handle()->cluster_stat(stats_op);
+}
+
+
diff --git a/src/rgw/rgw_op.h b/src/rgw/rgw_op.h
new file mode 100644
index 00000000..e76b1258
--- /dev/null
+++ b/src/rgw/rgw_op.h
@@ -0,0 +1,2346 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/**
+ * All operations via the rados gateway are carried out by
+ * small classes known as RGWOps. This class contains a req_state
+ * and each possible command is a subclass of this with a defined
+ * execute() method that does whatever the subclass name implies.
+ * These subclasses must be further subclassed (by interface type)
+ * to provide additional virtual methods such as send_response or get_params.
+ */
+
+#ifndef CEPH_RGW_OP_H
+#define CEPH_RGW_OP_H
+
+#include <limits.h>
+
+#include <array>
+#include <memory>
+#include <string>
+#include <set>
+#include <map>
+#include <vector>
+
+#include <boost/optional.hpp>
+#include <boost/utility/in_place_factory.hpp>
+#include <boost/function.hpp>
+
+#include "common/armor.h"
+#include "common/mime.h"
+#include "common/utf8.h"
+#include "common/ceph_json.h"
+#include "common/ceph_time.h"
+
+#include "rgw_common.h"
+#include "rgw_dmclock.h"
+#include "rgw_rados.h"
+#include "rgw_user.h"
+#include "rgw_bucket.h"
+#include "rgw_acl.h"
+#include "rgw_cors.h"
+#include "rgw_quota.h"
+#include "rgw_putobj.h"
+
+#include "rgw_lc.h"
+#include "rgw_torrent.h"
+#include "rgw_tag.h"
+#include "rgw_object_lock.h"
+#include "cls/lock/cls_lock_client.h"
+#include "cls/rgw/cls_rgw_client.h"
+
+#include "services/svc_sys_obj.h"
+
+#include "include/ceph_assert.h"
+
+using ceph::crypto::SHA1;
+
+struct req_state;
+class RGWOp;
+
+
+namespace rgw {
+namespace auth {
+namespace registry {
+
+class StrategyRegistry;
+
+}
+}
+}
+
+int rgw_op_get_bucket_policy_from_attr(CephContext *cct,
+ RGWRados *store,
+ RGWBucketInfo& bucket_info,
+ map<string, bufferlist>& bucket_attrs,
+ RGWAccessControlPolicy *policy);
+
+class RGWHandler {
+protected:
+ RGWRados* store;
+ struct req_state* s;
+
+ int do_init_permissions();
+ int do_read_permissions(RGWOp* op, bool only_bucket);
+
+public:
+ RGWHandler()
+ : store(nullptr),
+ s(nullptr) {
+ }
+ virtual ~RGWHandler();
+
+ virtual int init(RGWRados* store,
+ struct req_state* _s,
+ rgw::io::BasicClient* cio);
+
+ virtual int init_permissions(RGWOp*) {
+ return 0;
+ }
+
+ virtual int retarget(RGWOp* op, RGWOp** new_op) {
+ *new_op = op;
+ return 0;
+ }
+
+ virtual int read_permissions(RGWOp* op) = 0;
+ virtual int authorize(const DoutPrefixProvider* dpp) = 0;
+ virtual int postauth_init() = 0;
+ virtual int error_handler(int err_no, std::string* error_content);
+ virtual void dump(const string& code, const string& message) const {}
+
+ virtual bool supports_quota() {
+ return true;
+ }
+};
+
+
+
+void rgw_bucket_object_pre_exec(struct req_state *s);
+
+namespace dmc = rgw::dmclock;
+
+/**
+ * Provide the base class for all ops.
+ */
+class RGWOp : public DoutPrefixProvider {
+protected:
+ struct req_state *s;
+ RGWHandler *dialect_handler;
+ RGWRados *store;
+ RGWCORSConfiguration bucket_cors;
+ bool cors_exist;
+ RGWQuotaInfo bucket_quota;
+ RGWQuotaInfo user_quota;
+ int op_ret;
+ int do_aws4_auth_completion();
+
+ virtual int init_quota();
+
+public:
+ RGWOp()
+ : s(nullptr),
+ dialect_handler(nullptr),
+ store(nullptr),
+ cors_exist(false),
+ op_ret(0) {
+ }
+
+ virtual ~RGWOp() = default;
+
+ int get_ret() const { return op_ret; }
+
+ virtual int init_processing() {
+ if (dialect_handler->supports_quota()) {
+ op_ret = init_quota();
+ if (op_ret < 0)
+ return op_ret;
+ }
+
+ return 0;
+ }
+
+ virtual void init(RGWRados *store, struct req_state *s, RGWHandler *dialect_handler) {
+ this->store = store;
+ this->s = s;
+ this->dialect_handler = dialect_handler;
+ }
+ int read_bucket_cors();
+ bool generate_cors_headers(string& origin, string& method, string& headers, string& exp_headers, unsigned *max_age);
+
+ virtual int verify_params() { return 0; }
+ virtual bool prefetch_data() { return false; }
+
+ /* Authenticate requester -- verify its identity.
+ *
+ * NOTE: typically the procedure is common across all operations of the same
+ * dialect (S3, Swift API). However, there are significant exceptions in
+ * both APIs: browser uploads, /info and OPTIONS handlers. All of them use
+ * different, specific authentication schema driving the need for per-op
+ * authentication. The alternative is to duplicate parts of the method-
+ * dispatch logic in RGWHandler::authorize() and pollute it with a lot
+ * of special cases. */
+ virtual int verify_requester(const rgw::auth::StrategyRegistry& auth_registry) {
+ /* TODO(rzarzynski): rename RGWHandler::authorize to generic_authenticate. */
+ return dialect_handler->authorize(this);
+ }
+ virtual int verify_permission() = 0;
+ virtual int verify_op_mask();
+ virtual void pre_exec() {}
+ virtual void execute() = 0;
+ virtual void send_response() {}
+ virtual void complete() {
+ send_response();
+ }
+ virtual const char* name() const = 0;
+ virtual RGWOpType get_type() { return RGW_OP_UNKNOWN; }
+
+ virtual uint32_t op_mask() { return 0; }
+
+ virtual int error_handler(int err_no, string *error_content);
+
+ // implements DoutPrefixProvider
+ std::ostream& gen_prefix(std::ostream& out) const override;
+ CephContext* get_cct() const override { return s->cct; }
+ unsigned get_subsys() const override { return ceph_subsys_rgw; }
+
+ virtual dmc::client_id dmclock_client() { return dmc::client_id::metadata; }
+ virtual dmc::Cost dmclock_cost() { return 1; }
+};
+
+class RGWDefaultResponseOp : public RGWOp {
+public:
+ void send_response() override;
+};
+
+class RGWGetObj_Filter : public RGWGetDataCB
+{
+protected:
+ RGWGetObj_Filter *next{nullptr};
+public:
+ RGWGetObj_Filter() {}
+ explicit RGWGetObj_Filter(RGWGetObj_Filter *next): next(next) {}
+ ~RGWGetObj_Filter() override {}
+ /**
+ * Passes data through filter.
+ * Filter can modify content of bl.
+ * When bl_len == 0 , it means 'flush
+ */
+ int handle_data(bufferlist& bl, off_t bl_ofs, off_t bl_len) override {
+ if (next) {
+ return next->handle_data(bl, bl_ofs, bl_len);
+ }
+ return 0;
+ }
+ /**
+ * Flushes any cached data. Used by RGWGetObjFilter.
+ * Return logic same as handle_data.
+ */
+ virtual int flush() {
+ if (next) {
+ return next->flush();
+ }
+ return 0;
+ }
+ /**
+ * Allows filter to extend range required for successful filtering
+ */
+ virtual int fixup_range(off_t& ofs, off_t& end) {
+ if (next) {
+ return next->fixup_range(ofs, end);
+ }
+ return 0;
+ }
+};
+
+class RGWGetObj : public RGWOp {
+protected:
+ seed torrent; // get torrent
+ const char *range_str;
+ const char *if_mod;
+ const char *if_unmod;
+ const char *if_match;
+ const char *if_nomatch;
+ uint32_t mod_zone_id;
+ uint64_t mod_pg_ver;
+ off_t ofs;
+ uint64_t total_len;
+ off_t start;
+ off_t end;
+ ceph::real_time mod_time;
+ ceph::real_time lastmod;
+ ceph::real_time unmod_time;
+ ceph::real_time *mod_ptr;
+ ceph::real_time *unmod_ptr;
+ map<string, bufferlist> attrs;
+ bool get_data;
+ bool partial_content;
+ bool ignore_invalid_range;
+ bool range_parsed;
+ bool skip_manifest;
+ bool skip_decrypt{false};
+ rgw_obj obj;
+ utime_t gc_invalidate_time;
+ bool is_slo;
+ string lo_etag;
+ bool rgwx_stat; /* extended rgw stat operation */
+ string version_id;
+
+ // compression attrs
+ RGWCompressionInfo cs_info;
+ off_t first_block, last_block;
+ off_t q_ofs, q_len;
+ bool first_data;
+ uint64_t cur_ofs;
+ bufferlist waiting;
+ uint64_t action = 0;
+
+ bool get_retention;
+ bool get_legal_hold;
+
+ int init_common();
+public:
+ RGWGetObj() {
+ range_str = NULL;
+ if_mod = NULL;
+ if_unmod = NULL;
+ if_match = NULL;
+ if_nomatch = NULL;
+ mod_zone_id = 0;
+ mod_pg_ver = 0;
+ start = 0;
+ ofs = 0;
+ total_len = 0;
+ end = -1;
+ mod_ptr = NULL;
+ unmod_ptr = NULL;
+ get_data = false;
+ partial_content = false;
+ range_parsed = false;
+ skip_manifest = false;
+ is_slo = false;
+ first_block = 0;
+ last_block = 0;
+ q_ofs = 0;
+ q_len = 0;
+ first_data = true;
+ cur_ofs = 0;
+ get_retention = false;
+ get_legal_hold = false;
+ }
+
+ bool prefetch_data() override;
+
+ void set_get_data(bool get_data) {
+ this->get_data = get_data;
+ }
+
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+ int parse_range();
+ int read_user_manifest_part(
+ rgw_bucket& bucket,
+ const rgw_bucket_dir_entry& ent,
+ RGWAccessControlPolicy * const bucket_acl,
+ const boost::optional<rgw::IAM::Policy>& bucket_policy,
+ const off_t start_ofs,
+ const off_t end_ofs,
+ bool swift_slo);
+ int handle_user_manifest(const char *prefix);
+ int handle_slo_manifest(bufferlist& bl);
+
+ int get_data_cb(bufferlist& bl, off_t ofs, off_t len);
+
+ virtual int get_params() = 0;
+ virtual int send_response_data_error() = 0;
+ virtual int send_response_data(bufferlist& bl, off_t ofs, off_t len) = 0;
+
+ const char* name() const override { return "get_obj"; }
+ RGWOpType get_type() override { return RGW_OP_GET_OBJ; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_READ; }
+ virtual bool need_object_expiration() { return false; }
+ /**
+ * calculates filter used to decrypt RGW objects data
+ */
+ virtual int get_decrypt_filter(std::unique_ptr<RGWGetObj_Filter>* filter, RGWGetObj_Filter* cb, bufferlist* manifest_bl) {
+ *filter = nullptr;
+ return 0;
+ }
+ dmc::client_id dmclock_client() override { return dmc::client_id::data; }
+};
+
+class RGWGetObj_CB : public RGWGetObj_Filter
+{
+ RGWGetObj *op;
+public:
+ explicit RGWGetObj_CB(RGWGetObj *_op) : op(_op) {}
+ ~RGWGetObj_CB() override {}
+
+ int handle_data(bufferlist& bl, off_t bl_ofs, off_t bl_len) override {
+ return op->get_data_cb(bl, bl_ofs, bl_len);
+ }
+};
+
+class RGWGetObjTags : public RGWOp {
+ protected:
+ bufferlist tags_bl;
+ bool has_tags{false};
+ public:
+ int verify_permission() override;
+ void execute() override;
+ void pre_exec() override;
+
+ virtual void send_response_data(bufferlist& bl) = 0;
+ const char* name() const override { return "get_obj_tags"; }
+ virtual uint32_t op_mask() override { return RGW_OP_TYPE_READ; }
+ RGWOpType get_type() override { return RGW_OP_GET_OBJ_TAGGING; }
+
+};
+
+class RGWPutObjTags : public RGWOp {
+ protected:
+ bufferlist tags_bl;
+ public:
+ int verify_permission() override;
+ void execute() override;
+
+ virtual void send_response() override = 0;
+ virtual int get_params() = 0;
+ const char* name() const override { return "put_obj_tags"; }
+ virtual uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; }
+ RGWOpType get_type() override { return RGW_OP_PUT_OBJ_TAGGING; }
+
+};
+
+class RGWDeleteObjTags: public RGWOp {
+ public:
+ void pre_exec() override;
+ int verify_permission() override;
+ void execute() override;
+
+ const char* name() const override { return "delete_obj_tags"; }
+ virtual uint32_t op_mask() override { return RGW_OP_TYPE_DELETE; }
+ RGWOpType get_type() override { return RGW_OP_DELETE_OBJ_TAGGING;}
+};
+
+class RGWBulkDelete : public RGWOp {
+public:
+ struct acct_path_t {
+ std::string bucket_name;
+ rgw_obj_key obj_key;
+ };
+
+ struct fail_desc_t {
+ int err;
+ acct_path_t path;
+ };
+
+ class Deleter {
+ protected:
+ const DoutPrefixProvider * dpp;
+ unsigned int num_deleted;
+ unsigned int num_unfound;
+ std::list<fail_desc_t> failures;
+
+ RGWRados * const store;
+ req_state * const s;
+
+ public:
+ Deleter(const DoutPrefixProvider* dpp, RGWRados * const str, req_state * const s)
+ : dpp(dpp),
+ num_deleted(0),
+ num_unfound(0),
+ store(str),
+ s(s) {
+ }
+
+ unsigned int get_num_deleted() const {
+ return num_deleted;
+ }
+
+ unsigned int get_num_unfound() const {
+ return num_unfound;
+ }
+
+ const std::list<fail_desc_t> get_failures() const {
+ return failures;
+ }
+
+ bool verify_permission(RGWBucketInfo& binfo,
+ map<string, bufferlist>& battrs,
+ ACLOwner& bucket_owner /* out */);
+ bool delete_single(const acct_path_t& path);
+ bool delete_chunk(const std::list<acct_path_t>& paths);
+ };
+ /* End of Deleter subclass */
+
+ static const size_t MAX_CHUNK_ENTRIES = 1024;
+
+protected:
+ std::unique_ptr<Deleter> deleter;
+
+public:
+ RGWBulkDelete()
+ : deleter(nullptr) {
+ }
+
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+
+ virtual int get_data(std::list<acct_path_t>& items,
+ bool * is_truncated) = 0;
+ void send_response() override = 0;
+
+ const char* name() const override { return "bulk_delete"; }
+ RGWOpType get_type() override { return RGW_OP_BULK_DELETE; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_DELETE; }
+ dmc::client_id dmclock_client() override { return dmc::client_id::data; }
+};
+
+inline ostream& operator<<(ostream& out, const RGWBulkDelete::acct_path_t &o) {
+ return out << o.bucket_name << "/" << o.obj_key;
+}
+
+
+class RGWBulkUploadOp : public RGWOp {
+ boost::optional<RGWSysObjectCtx> dir_ctx;
+
+protected:
+ class fail_desc_t {
+ public:
+ fail_desc_t(const int err, std::string path)
+ : err(err),
+ path(std::move(path)) {
+ }
+
+ const int err;
+ const std::string path;
+ };
+
+ static constexpr std::array<int, 2> terminal_errors = {
+ { -EACCES, -EPERM }
+ };
+
+ /* FIXME: boost::container::small_vector<fail_desc_t, 4> failures; */
+ std::vector<fail_desc_t> failures;
+ size_t num_created;
+
+ class StreamGetter;
+ class DecoratedStreamGetter;
+ class AlignedStreamGetter;
+
+ virtual std::unique_ptr<StreamGetter> create_stream() = 0;
+ virtual void send_response() override = 0;
+
+ boost::optional<std::pair<std::string, rgw_obj_key>>
+ parse_path(const boost::string_ref& path);
+
+ std::pair<std::string, std::string>
+ handle_upload_path(struct req_state *s);
+
+ bool handle_file_verify_permission(RGWBucketInfo& binfo,
+ const rgw_obj& obj,
+ std::map<std::string, ceph::bufferlist>& battrs,
+ ACLOwner& bucket_owner /* out */);
+ int handle_file(boost::string_ref path,
+ size_t size,
+ AlignedStreamGetter& body);
+
+ int handle_dir_verify_permission();
+ int handle_dir(boost::string_ref path);
+
+public:
+ RGWBulkUploadOp()
+ : num_created(0) {
+ }
+
+ void init(RGWRados* const store,
+ struct req_state* const s,
+ RGWHandler* const h) override;
+
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+
+ const char* name() const override { return "bulk_upload"; }
+
+ RGWOpType get_type() override {
+ return RGW_OP_BULK_UPLOAD;
+ }
+
+ uint32_t op_mask() override {
+ return RGW_OP_TYPE_WRITE;
+ }
+ dmc::client_id dmclock_client() override { return dmc::client_id::data; }
+}; /* RGWBulkUploadOp */
+
+
+class RGWBulkUploadOp::StreamGetter {
+public:
+ StreamGetter() = default;
+ virtual ~StreamGetter() = default;
+
+ virtual ssize_t get_at_most(size_t want, ceph::bufferlist& dst) = 0;
+ virtual ssize_t get_exactly(size_t want, ceph::bufferlist& dst) = 0;
+}; /* End of nested subclass StreamGetter */
+
+
+class RGWBulkUploadOp::DecoratedStreamGetter : public StreamGetter {
+ StreamGetter& decoratee;
+
+protected:
+ StreamGetter& get_decoratee() {
+ return decoratee;
+ }
+
+public:
+ explicit DecoratedStreamGetter(StreamGetter& decoratee)
+ : decoratee(decoratee) {
+ }
+ virtual ~DecoratedStreamGetter() = default;
+
+ ssize_t get_at_most(const size_t want, ceph::bufferlist& dst) override {
+ return get_decoratee().get_at_most(want, dst);
+ }
+
+ ssize_t get_exactly(const size_t want, ceph::bufferlist& dst) override {
+ return get_decoratee().get_exactly(want, dst);
+ }
+}; /* RGWBulkUploadOp::DecoratedStreamGetter */
+
+
+class RGWBulkUploadOp::AlignedStreamGetter
+ : public RGWBulkUploadOp::DecoratedStreamGetter {
+ size_t position;
+ size_t length;
+ size_t alignment;
+
+public:
+ template <typename U>
+ AlignedStreamGetter(const size_t position,
+ const size_t length,
+ const size_t alignment,
+ U&& decoratee)
+ : DecoratedStreamGetter(std::forward<U>(decoratee)),
+ position(position),
+ length(length),
+ alignment(alignment) {
+ }
+ virtual ~AlignedStreamGetter();
+ ssize_t get_at_most(size_t want, ceph::bufferlist& dst) override;
+ ssize_t get_exactly(size_t want, ceph::bufferlist& dst) override;
+}; /* RGWBulkUploadOp::AlignedStreamGetter */
+
+
+struct RGWUsageStats {
+ uint64_t bytes_used = 0;
+ uint64_t bytes_used_rounded = 0;
+ uint64_t buckets_count = 0;
+ uint64_t objects_count = 0;
+};
+
+#define RGW_LIST_BUCKETS_LIMIT_MAX 10000
+
+class RGWListBuckets : public RGWOp {
+protected:
+ bool sent_data;
+ std::string marker;
+ std::string end_marker;
+ int64_t limit;
+ uint64_t limit_max;
+ std::map<std::string, ceph::bufferlist> attrs;
+ bool is_truncated;
+
+ RGWUsageStats global_stats;
+ std::map<std::string, RGWUsageStats> policies_stats;
+
+ virtual uint64_t get_default_max() const {
+ return 1000;
+ }
+
+public:
+ RGWListBuckets()
+ : sent_data(false),
+ limit(RGW_LIST_BUCKETS_LIMIT_MAX),
+ limit_max(RGW_LIST_BUCKETS_LIMIT_MAX),
+ is_truncated(false) {
+ }
+
+ int verify_permission() override;
+ void execute() override;
+
+ virtual int get_params() = 0;
+ virtual void handle_listing_chunk(RGWUserBuckets&& buckets) {
+ /* The default implementation, used by e.g. S3, just generates a new
+ * part of listing and sends it client immediately. Swift can behave
+ * differently: when the reverse option is requested, all incoming
+ * instances of RGWUserBuckets are buffered and finally reversed. */
+ return send_response_data(buckets);
+ }
+ virtual void send_response_begin(bool has_buckets) = 0;
+ virtual void send_response_data(RGWUserBuckets& buckets) = 0;
+ virtual void send_response_end() = 0;
+ void send_response() override {}
+
+ virtual bool should_get_stats() { return false; }
+ virtual bool supports_account_metadata() { return false; }
+
+ const char* name() const override { return "list_buckets"; }
+ RGWOpType get_type() override { return RGW_OP_LIST_BUCKETS; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_READ; }
+}; // class RGWListBuckets
+
+class RGWGetUsage : public RGWOp {
+protected:
+ bool sent_data;
+ string start_date;
+ string end_date;
+ int show_log_entries;
+ int show_log_sum;
+ map<string, bool> categories;
+ map<rgw_user_bucket, rgw_usage_log_entry> usage;
+ map<string, rgw_usage_log_entry> summary_map;
+ map<string, cls_user_bucket_entry> buckets_usage;
+ cls_user_header header;
+public:
+ RGWGetUsage() : sent_data(false), show_log_entries(true), show_log_sum(true){
+ }
+
+ int verify_permission() override;
+ void execute() override;
+
+ virtual int get_params() = 0;
+ void send_response() override {}
+
+ virtual bool should_get_stats() { return false; }
+
+ const char* name() const override { return "get_usage"; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_READ; }
+};
+
+class RGWStatAccount : public RGWOp {
+protected:
+ RGWUsageStats global_stats;
+ std::map<std::string, RGWUsageStats> policies_stats;
+
+public:
+ RGWStatAccount() = default;
+
+ int verify_permission() override;
+ void execute() override;
+
+ void send_response() override = 0;
+ const char* name() const override { return "stat_account"; }
+ RGWOpType get_type() override { return RGW_OP_STAT_ACCOUNT; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_READ; }
+};
+
+class RGWListBucket : public RGWOp {
+protected:
+ RGWBucketEnt bucket;
+ string prefix;
+ rgw_obj_key marker;
+ rgw_obj_key next_marker;
+ rgw_obj_key end_marker;
+ string max_keys;
+ string delimiter;
+ string encoding_type;
+ bool list_versions;
+ int max;
+ vector<rgw_bucket_dir_entry> objs;
+ map<string, bool> common_prefixes;
+
+ int default_max;
+ bool is_truncated;
+ bool allow_unordered;
+
+ int shard_id;
+
+ int parse_max_keys();
+
+public:
+ RGWListBucket() : list_versions(false), max(0),
+ default_max(0), is_truncated(false),
+ allow_unordered(false), shard_id(-1) {}
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+
+ virtual int get_params() = 0;
+ void send_response() override = 0;
+ const char* name() const override { return "list_bucket"; }
+ RGWOpType get_type() override { return RGW_OP_LIST_BUCKET; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_READ; }
+ virtual bool need_container_stats() { return false; }
+};
+
+class RGWGetBucketLogging : public RGWOp {
+public:
+ RGWGetBucketLogging() {}
+ int verify_permission() override;
+ void execute() override { }
+
+ void send_response() override = 0;
+ const char* name() const override { return "get_bucket_logging"; }
+ RGWOpType get_type() override { return RGW_OP_GET_BUCKET_LOGGING; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_READ; }
+};
+
+class RGWGetBucketLocation : public RGWOp {
+public:
+ RGWGetBucketLocation() {}
+ ~RGWGetBucketLocation() override {}
+ int verify_permission() override;
+ void execute() override { }
+
+ void send_response() override = 0;
+ const char* name() const override { return "get_bucket_location"; }
+ RGWOpType get_type() override { return RGW_OP_GET_BUCKET_LOCATION; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_READ; }
+};
+
+class RGWGetBucketVersioning : public RGWOp {
+protected:
+ bool versioned{false};
+ bool versioning_enabled{false};
+ bool mfa_enabled{false};
+public:
+ RGWGetBucketVersioning() = default;
+
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+
+ void send_response() override = 0;
+ const char* name() const override { return "get_bucket_versioning"; }
+ RGWOpType get_type() override { return RGW_OP_GET_BUCKET_VERSIONING; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_READ; }
+};
+
+enum BucketVersionStatus {
+ VersioningStatusInvalid = -1,
+ VersioningNotChanged = 0,
+ VersioningEnabled = 1,
+ VersioningSuspended =2,
+};
+
+class RGWSetBucketVersioning : public RGWOp {
+protected:
+ int versioning_status;
+ bool mfa_set_status{false};
+ bool mfa_status{false};
+ bufferlist in_data;
+public:
+ RGWSetBucketVersioning() : versioning_status(VersioningNotChanged) {}
+
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+
+ virtual int get_params() { return 0; }
+
+ void send_response() override = 0;
+ const char* name() const override { return "set_bucket_versioning"; }
+ RGWOpType get_type() override { return RGW_OP_SET_BUCKET_VERSIONING; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; }
+};
+
+class RGWGetBucketWebsite : public RGWOp {
+public:
+ RGWGetBucketWebsite() {}
+
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+
+ void send_response() override = 0;
+ const char* name() const override { return "get_bucket_website"; }
+ RGWOpType get_type() override { return RGW_OP_GET_BUCKET_WEBSITE; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_READ; }
+};
+
+class RGWSetBucketWebsite : public RGWOp {
+protected:
+ bufferlist in_data;
+ RGWBucketWebsiteConf website_conf;
+public:
+ RGWSetBucketWebsite() {}
+
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+
+ virtual int get_params() { return 0; }
+
+ void send_response() override = 0;
+ const char* name() const override { return "set_bucket_website"; }
+ RGWOpType get_type() override { return RGW_OP_SET_BUCKET_WEBSITE; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; }
+};
+
+class RGWDeleteBucketWebsite : public RGWOp {
+public:
+ RGWDeleteBucketWebsite() {}
+
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+
+ void send_response() override = 0;
+ const char* name() const override { return "delete_bucket_website"; }
+ RGWOpType get_type() override { return RGW_OP_SET_BUCKET_WEBSITE; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; }
+};
+
+class RGWStatBucket : public RGWOp {
+protected:
+ RGWBucketEnt bucket;
+
+public:
+ RGWStatBucket() {}
+ ~RGWStatBucket() override {}
+
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+
+ void send_response() override = 0;
+ const char* name() const override { return "stat_bucket"; }
+ RGWOpType get_type() override { return RGW_OP_STAT_BUCKET; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_READ; }
+};
+
+class RGWCreateBucket : public RGWOp {
+protected:
+ RGWAccessControlPolicy policy;
+ string location_constraint;
+ rgw_placement_rule placement_rule;
+ RGWBucketInfo info;
+ obj_version ep_objv;
+ bool has_cors;
+ bool relaxed_region_enforcement;
+ bool obj_lock_enabled;
+ RGWCORSConfiguration cors_config;
+ boost::optional<std::string> swift_ver_location;
+ map<string, buffer::list> attrs;
+ set<string> rmattr_names;
+
+ bufferlist in_data;
+
+ virtual bool need_metadata_upload() const { return false; }
+
+public:
+ RGWCreateBucket() : has_cors(false), relaxed_region_enforcement(false), obj_lock_enabled(false) {}
+
+ void emplace_attr(std::string&& key, buffer::list&& bl) {
+ attrs.emplace(std::move(key), std::move(bl)); /* key and bl are r-value refs */
+ }
+
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+ void init(RGWRados *store, struct req_state *s, RGWHandler *h) override {
+ RGWOp::init(store, s, h);
+ policy.set_ctx(s->cct);
+ relaxed_region_enforcement =
+ s->cct->_conf.get_val<bool>("rgw_relaxed_region_enforcement");
+ }
+ virtual int get_params() { return 0; }
+ void send_response() override = 0;
+ const char* name() const override { return "create_bucket"; }
+ RGWOpType get_type() override { return RGW_OP_CREATE_BUCKET; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; }
+};
+
+class RGWDeleteBucket : public RGWOp {
+protected:
+ RGWObjVersionTracker objv_tracker;
+
+public:
+ RGWDeleteBucket() {}
+
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+
+ void send_response() override = 0;
+ const char* name() const override { return "delete_bucket"; }
+ RGWOpType get_type() override { return RGW_OP_DELETE_BUCKET; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_DELETE; }
+};
+
+struct rgw_slo_entry {
+ string path;
+ string etag;
+ uint64_t size_bytes;
+
+ rgw_slo_entry() : size_bytes(0) {}
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(path, bl);
+ encode(etag, bl);
+ encode(size_bytes, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(path, bl);
+ decode(etag, bl);
+ decode(size_bytes, bl);
+ DECODE_FINISH(bl);
+ }
+
+ void decode_json(JSONObj *obj);
+};
+WRITE_CLASS_ENCODER(rgw_slo_entry)
+
+struct RGWSLOInfo {
+ vector<rgw_slo_entry> entries;
+ uint64_t total_size;
+
+ /* in memory only */
+ bufferlist raw_data;
+
+ RGWSLOInfo() : total_size(0) {}
+ ~RGWSLOInfo() {}
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(entries, bl);
+ encode(total_size, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(entries, bl);
+ decode(total_size, bl);
+ DECODE_FINISH(bl);
+ }
+};
+WRITE_CLASS_ENCODER(RGWSLOInfo)
+
+class RGWPutObj : public RGWOp {
+protected:
+ seed torrent;
+ off_t ofs;
+ const char *supplied_md5_b64;
+ const char *supplied_etag;
+ const char *if_match;
+ const char *if_nomatch;
+ std::string copy_source;
+ const char *copy_source_range;
+ RGWBucketInfo copy_source_bucket_info;
+ string copy_source_tenant_name;
+ string copy_source_bucket_name;
+ string copy_source_object_name;
+ string copy_source_version_id;
+ off_t copy_source_range_fst;
+ off_t copy_source_range_lst;
+ string etag;
+ bool chunked_upload;
+ RGWAccessControlPolicy policy;
+ std::unique_ptr <RGWObjTags> obj_tags;
+ const char *dlo_manifest;
+ RGWSLOInfo *slo_info;
+ map<string, bufferlist> attrs;
+ ceph::real_time mtime;
+ uint64_t olh_epoch;
+ string version_id;
+ bufferlist bl_aux;
+ map<string, string> crypt_http_responses;
+ string user_data;
+
+ std::string multipart_upload_id;
+ std::string multipart_part_str;
+ int multipart_part_num = 0;
+
+ boost::optional<ceph::real_time> delete_at;
+ //append obj
+ bool append;
+ uint64_t position;
+ uint64_t cur_accounted_size;
+
+ //object lock
+ RGWObjectRetention *obj_retention;
+ RGWObjectLegalHold *obj_legal_hold;
+
+public:
+ RGWPutObj() : ofs(0),
+ supplied_md5_b64(NULL),
+ supplied_etag(NULL),
+ if_match(NULL),
+ if_nomatch(NULL),
+ copy_source_range(NULL),
+ copy_source_range_fst(0),
+ copy_source_range_lst(0),
+ chunked_upload(0),
+ dlo_manifest(NULL),
+ slo_info(NULL),
+ olh_epoch(0),
+ append(false),
+ position(0),
+ cur_accounted_size(0),
+ obj_retention(nullptr),
+ obj_legal_hold(nullptr) {}
+
+ ~RGWPutObj() override {
+ delete slo_info;
+ delete obj_retention;
+ delete obj_legal_hold;
+ }
+
+ void init(RGWRados *store, struct req_state *s, RGWHandler *h) override {
+ RGWOp::init(store, s, h);
+ policy.set_ctx(s->cct);
+ }
+
+ void emplace_attr(std::string&& key, buffer::list&& bl) {
+ attrs.emplace(std::move(key), std::move(bl)); /* key and bl are r-value refs */
+ }
+
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+
+ /* this is for cases when copying data from other object */
+ virtual int get_decrypt_filter(std::unique_ptr<RGWGetObj_Filter>* filter,
+ RGWGetObj_Filter* cb,
+ map<string, bufferlist>& attrs,
+ bufferlist* manifest_bl) {
+ *filter = nullptr;
+ return 0;
+ }
+ virtual int get_encrypt_filter(std::unique_ptr<rgw::putobj::DataProcessor> *filter,
+ rgw::putobj::DataProcessor *cb) {
+ return 0;
+ }
+
+ int get_data_cb(bufferlist& bl, off_t bl_ofs, off_t bl_len);
+ int get_data(const off_t fst, const off_t lst, bufferlist& bl);
+
+ virtual int get_params() = 0;
+ virtual int get_data(bufferlist& bl) = 0;
+ void send_response() override = 0;
+ const char* name() const override { return "put_obj"; }
+ RGWOpType get_type() override { return RGW_OP_PUT_OBJ; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; }
+ dmc::client_id dmclock_client() override { return dmc::client_id::data; }
+};
+
+class RGWPostObj : public RGWOp {
+protected:
+ off_t min_len;
+ off_t max_len;
+ int len;
+ off_t ofs;
+ const char *supplied_md5_b64;
+ const char *supplied_etag;
+ string etag;
+ RGWAccessControlPolicy policy;
+ map<string, bufferlist> attrs;
+ boost::optional<ceph::real_time> delete_at;
+
+ /* Must be called after get_data() or the result is undefined. */
+ virtual std::string get_current_filename() const = 0;
+ virtual std::string get_current_content_type() const = 0;
+ virtual bool is_next_file_to_upload() {
+ return false;
+ }
+public:
+ RGWPostObj() : min_len(0),
+ max_len(LLONG_MAX),
+ len(0),
+ ofs(0),
+ supplied_md5_b64(nullptr),
+ supplied_etag(nullptr) {
+ }
+
+ void emplace_attr(std::string&& key, buffer::list&& bl) {
+ attrs.emplace(std::move(key), std::move(bl)); /* key and bl are r-value refs */
+ }
+
+ void init(RGWRados *store, struct req_state *s, RGWHandler *h) override {
+ RGWOp::init(store, s, h);
+ policy.set_ctx(s->cct);
+ }
+
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+
+ virtual int get_encrypt_filter(std::unique_ptr<rgw::putobj::DataProcessor> *filter,
+ rgw::putobj::DataProcessor *cb) {
+ return 0;
+ }
+ virtual int get_params() = 0;
+ virtual int get_data(ceph::bufferlist& bl, bool& again) = 0;
+ void send_response() override = 0;
+ const char* name() const override { return "post_obj"; }
+ RGWOpType get_type() override { return RGW_OP_POST_OBJ; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; }
+ dmc::client_id dmclock_client() override { return dmc::client_id::data; }
+};
+
+class RGWPutMetadataAccount : public RGWOp {
+protected:
+ std::set<std::string> rmattr_names;
+ std::map<std::string, bufferlist> attrs, orig_attrs;
+ std::map<int, std::string> temp_url_keys;
+ RGWQuotaInfo new_quota;
+ bool new_quota_extracted;
+
+ RGWObjVersionTracker acct_op_tracker;
+
+ RGWAccessControlPolicy policy;
+ bool has_policy;
+
+public:
+ RGWPutMetadataAccount()
+ : new_quota_extracted(false),
+ has_policy(false) {
+ }
+
+ void init(RGWRados *store, struct req_state *s, RGWHandler *h) override {
+ RGWOp::init(store, s, h);
+ policy.set_ctx(s->cct);
+ }
+ int init_processing() override;
+ int verify_permission() override;
+ void pre_exec() override { }
+ void execute() override;
+
+ virtual int get_params() = 0;
+ void send_response() override = 0;
+ virtual void filter_out_temp_url(map<string, bufferlist>& add_attrs,
+ const set<string>& rmattr_names,
+ map<int, string>& temp_url_keys);
+ const char* name() const override { return "put_account_metadata"; }
+ RGWOpType get_type() override { return RGW_OP_PUT_METADATA_ACCOUNT; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; }
+};
+
+class RGWPutMetadataBucket : public RGWOp {
+protected:
+ map<string, buffer::list> attrs;
+ set<string> rmattr_names;
+ bool has_policy, has_cors;
+ uint32_t policy_rw_mask;
+ RGWAccessControlPolicy policy;
+ RGWCORSConfiguration cors_config;
+ rgw_placement_rule placement_rule;
+ boost::optional<std::string> swift_ver_location;
+
+public:
+ RGWPutMetadataBucket()
+ : has_policy(false), has_cors(false), policy_rw_mask(0)
+ {}
+
+ void emplace_attr(std::string&& key, buffer::list&& bl) {
+ attrs.emplace(std::move(key), std::move(bl)); /* key and bl are r-value refs */
+ }
+
+ void init(RGWRados *store, struct req_state *s, RGWHandler *h) override {
+ RGWOp::init(store, s, h);
+ policy.set_ctx(s->cct);
+ }
+
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+
+ virtual int get_params() = 0;
+ void send_response() override = 0;
+ const char* name() const override { return "put_bucket_metadata"; }
+ RGWOpType get_type() override { return RGW_OP_PUT_METADATA_BUCKET; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; }
+};
+
+class RGWPutMetadataObject : public RGWOp {
+protected:
+ RGWAccessControlPolicy policy;
+ boost::optional<ceph::real_time> delete_at;
+ const char *dlo_manifest;
+
+public:
+ RGWPutMetadataObject()
+ : dlo_manifest(NULL)
+ {}
+
+ void init(RGWRados *store, struct req_state *s, RGWHandler *h) override {
+ RGWOp::init(store, s, h);
+ policy.set_ctx(s->cct);
+ }
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+
+ virtual int get_params() = 0;
+ void send_response() override = 0;
+ const char* name() const override { return "put_obj_metadata"; }
+ RGWOpType get_type() override { return RGW_OP_PUT_METADATA_OBJECT; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; }
+ virtual bool need_object_expiration() { return false; }
+};
+
+class RGWDeleteObj : public RGWOp {
+protected:
+ bool delete_marker;
+ bool multipart_delete;
+ string version_id;
+ ceph::real_time unmod_since; /* if unmodified since */
+ bool no_precondition_error;
+ std::unique_ptr<RGWBulkDelete::Deleter> deleter;
+ bool bypass_perm;
+ bool bypass_governance_mode;
+
+public:
+ RGWDeleteObj()
+ : delete_marker(false),
+ multipart_delete(false),
+ no_precondition_error(false),
+ deleter(nullptr),
+ bypass_perm(true),
+ bypass_governance_mode(false) {
+ }
+
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+ int handle_slo_manifest(bufferlist& bl);
+
+ virtual int get_params() { return 0; }
+ void send_response() override = 0;
+ const char* name() const override { return "delete_obj"; }
+ RGWOpType get_type() override { return RGW_OP_DELETE_OBJ; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_DELETE; }
+ virtual bool need_object_expiration() { return false; }
+ dmc::client_id dmclock_client() override { return dmc::client_id::data; }
+};
+
+class RGWCopyObj : public RGWOp {
+protected:
+ RGWAccessControlPolicy dest_policy;
+ const char *if_mod;
+ const char *if_unmod;
+ const char *if_match;
+ const char *if_nomatch;
+ // Required or it is not a copy operation
+ std::string_view copy_source;
+ // Not actually required
+ std::optional<std::string_view> md_directive;
+
+ off_t ofs;
+ off_t len;
+ off_t end;
+ ceph::real_time mod_time;
+ ceph::real_time unmod_time;
+ ceph::real_time *mod_ptr;
+ ceph::real_time *unmod_ptr;
+ map<string, buffer::list> attrs;
+ string src_tenant_name, src_bucket_name;
+ rgw_bucket src_bucket;
+ rgw_obj_key src_object;
+ string dest_tenant_name, dest_bucket_name;
+ rgw_bucket dest_bucket;
+ string dest_object;
+ ceph::real_time src_mtime;
+ ceph::real_time mtime;
+ RGWRados::AttrsMod attrs_mod;
+ RGWBucketInfo src_bucket_info;
+ RGWBucketInfo dest_bucket_info;
+ string source_zone;
+ string etag;
+
+ off_t last_ofs;
+
+ string version_id;
+ uint64_t olh_epoch;
+
+ boost::optional<ceph::real_time> delete_at;
+ bool copy_if_newer;
+
+ bool need_to_check_storage_class = false;
+
+ int init_common();
+
+public:
+ RGWCopyObj() {
+ if_mod = NULL;
+ if_unmod = NULL;
+ if_match = NULL;
+ if_nomatch = NULL;
+ ofs = 0;
+ len = 0;
+ end = -1;
+ mod_ptr = NULL;
+ unmod_ptr = NULL;
+ attrs_mod = RGWRados::ATTRSMOD_NONE;
+ last_ofs = 0;
+ olh_epoch = 0;
+ copy_if_newer = false;
+ }
+
+ static bool parse_copy_location(const boost::string_view& src,
+ string& bucket_name,
+ rgw_obj_key& object);
+
+ void emplace_attr(std::string&& key, buffer::list&& bl) {
+ attrs.emplace(std::move(key), std::move(bl));
+ }
+
+ void init(RGWRados *store, struct req_state *s, RGWHandler *h) override {
+ RGWOp::init(store, s, h);
+ dest_policy.set_ctx(s->cct);
+ }
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+ void progress_cb(off_t ofs);
+
+ virtual int check_storage_class(const rgw_placement_rule& src_placement) {
+ return 0;
+ }
+
+ virtual int init_dest_policy() { return 0; }
+ virtual int get_params() = 0;
+ virtual void send_partial_response(off_t ofs) {}
+ void send_response() override = 0;
+ const char* name() const override { return "copy_obj"; }
+ RGWOpType get_type() override { return RGW_OP_COPY_OBJ; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; }
+ dmc::client_id dmclock_client() override { return dmc::client_id::data; }
+};
+
+class RGWGetACLs : public RGWOp {
+protected:
+ string acls;
+
+public:
+ RGWGetACLs() {}
+
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+
+ void send_response() override = 0;
+ const char* name() const override { return "get_acls"; }
+ RGWOpType get_type() override { return RGW_OP_GET_ACLS; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_READ; }
+};
+
+class RGWPutACLs : public RGWOp {
+protected:
+ bufferlist data;
+ ACLOwner owner;
+
+public:
+ RGWPutACLs() {}
+ ~RGWPutACLs() override {}
+
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+
+ virtual int get_policy_from_state(RGWRados *store, struct req_state *s, stringstream& ss) { return 0; }
+ virtual int get_params() = 0;
+ void send_response() override = 0;
+ const char* name() const override { return "put_acls"; }
+ RGWOpType get_type() override { return RGW_OP_PUT_ACLS; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; }
+};
+
+class RGWGetLC : public RGWOp {
+protected:
+
+public:
+ RGWGetLC() { }
+ ~RGWGetLC() override { }
+
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override = 0;
+
+ void send_response() override = 0;
+ const char* name() const override { return "get_lifecycle"; }
+ RGWOpType get_type() override { return RGW_OP_GET_LC; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_READ; }
+};
+
+class RGWPutLC : public RGWOp {
+protected:
+ bufferlist data;
+ const char *content_md5;
+ string cookie;
+
+public:
+ RGWPutLC() {
+ content_md5 = nullptr;
+ }
+ ~RGWPutLC() override {}
+
+ void init(RGWRados *store, struct req_state *s, RGWHandler *dialect_handler) override {
+#define COOKIE_LEN 16
+ char buf[COOKIE_LEN + 1];
+
+ RGWOp::init(store, s, dialect_handler);
+ gen_rand_alphanumeric(s->cct, buf, sizeof(buf) - 1);
+ cookie = buf;
+ }
+
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+
+// virtual int get_policy_from_state(RGWRados *store, struct req_state *s, stringstream& ss) { return 0; }
+ virtual int get_params() = 0;
+ void send_response() override = 0;
+ const char* name() const override { return "put_lifecycle"; }
+ RGWOpType get_type() override { return RGW_OP_PUT_LC; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; }
+};
+
+class RGWDeleteLC : public RGWOp {
+protected:
+ size_t len;
+ char *data;
+
+public:
+ RGWDeleteLC() {
+ len = 0;
+ data = NULL;
+ }
+ ~RGWDeleteLC() override {
+ free(data);
+ }
+
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+
+ void send_response() override = 0;
+ const char* name() const override { return "delete_lifecycle"; }
+ RGWOpType get_type() override { return RGW_OP_DELETE_LC; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; }
+};
+
+class RGWGetCORS : public RGWOp {
+protected:
+
+public:
+ RGWGetCORS() {}
+
+ int verify_permission() override;
+ void execute() override;
+
+ void send_response() override = 0;
+ const char* name() const override { return "get_cors"; }
+ RGWOpType get_type() override { return RGW_OP_GET_CORS; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_READ; }
+};
+
+class RGWPutCORS : public RGWOp {
+protected:
+ bufferlist cors_bl;
+ bufferlist in_data;
+
+public:
+ RGWPutCORS() {}
+ ~RGWPutCORS() override {}
+
+ int verify_permission() override;
+ void execute() override;
+
+ virtual int get_params() = 0;
+ void send_response() override = 0;
+ const char* name() const override { return "put_cors"; }
+ RGWOpType get_type() override { return RGW_OP_PUT_CORS; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; }
+};
+
+class RGWDeleteCORS : public RGWOp {
+protected:
+
+public:
+ RGWDeleteCORS() {}
+
+ int verify_permission() override;
+ void execute() override;
+
+ void send_response() override = 0;
+ const char* name() const override { return "delete_cors"; }
+ RGWOpType get_type() override { return RGW_OP_DELETE_CORS; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; }
+};
+
+class RGWOptionsCORS : public RGWOp {
+protected:
+ RGWCORSRule *rule;
+ const char *origin, *req_hdrs, *req_meth;
+
+public:
+ RGWOptionsCORS() : rule(NULL), origin(NULL),
+ req_hdrs(NULL), req_meth(NULL) {
+ }
+
+ int verify_permission() override {return 0;}
+ int validate_cors_request(RGWCORSConfiguration *cc);
+ void execute() override;
+ void get_response_params(string& allowed_hdrs, string& exp_hdrs, unsigned *max_age);
+ void send_response() override = 0;
+ const char* name() const override { return "options_cors"; }
+ RGWOpType get_type() override { return RGW_OP_OPTIONS_CORS; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_READ; }
+};
+
+class RGWGetRequestPayment : public RGWOp {
+protected:
+ bool requester_pays;
+
+public:
+ RGWGetRequestPayment() : requester_pays(0) {}
+
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+
+ void send_response() override = 0;
+ const char* name() const override { return "get_request_payment"; }
+ RGWOpType get_type() override { return RGW_OP_GET_REQUEST_PAYMENT; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_READ; }
+};
+
+class RGWSetRequestPayment : public RGWOp {
+protected:
+ bool requester_pays;
+ bufferlist in_data;
+public:
+ RGWSetRequestPayment() : requester_pays(false) {}
+
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+
+ virtual int get_params() { return 0; }
+
+ void send_response() override = 0;
+ const char* name() const override { return "set_request_payment"; }
+ RGWOpType get_type() override { return RGW_OP_SET_REQUEST_PAYMENT; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; }
+};
+
+class RGWInitMultipart : public RGWOp {
+protected:
+ string upload_id;
+ RGWAccessControlPolicy policy;
+
+public:
+ RGWInitMultipart() {}
+
+ void init(RGWRados *store, struct req_state *s, RGWHandler *h) override {
+ RGWOp::init(store, s, h);
+ policy.set_ctx(s->cct);
+ }
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+
+ virtual int get_params() = 0;
+ void send_response() override = 0;
+ const char* name() const override { return "init_multipart"; }
+ RGWOpType get_type() override { return RGW_OP_INIT_MULTIPART; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; }
+ virtual int prepare_encryption(map<string, bufferlist>& attrs) { return 0; }
+};
+
+class RGWCompleteMultipart : public RGWOp {
+protected:
+ string upload_id;
+ string etag;
+ string version_id;
+ bufferlist data;
+
+ struct MPSerializer {
+ librados::IoCtx ioctx;
+ rados::cls::lock::Lock lock;
+ librados::ObjectWriteOperation op;
+ std::string oid;
+ bool locked;
+
+ MPSerializer() : lock("RGWCompleteMultipart"), locked(false)
+ {}
+
+ int try_lock(const std::string& oid, utime_t dur);
+
+ int unlock() {
+ return lock.unlock(&ioctx, oid);
+ }
+
+ void clear_locked() {
+ locked = false;
+ }
+ } serializer;
+
+public:
+ RGWCompleteMultipart() {}
+ ~RGWCompleteMultipart() override {}
+
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+ void complete() override;
+
+ virtual int get_params() = 0;
+ void send_response() override = 0;
+ const char* name() const override { return "complete_multipart"; }
+ RGWOpType get_type() override { return RGW_OP_COMPLETE_MULTIPART; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; }
+};
+
+class RGWAbortMultipart : public RGWOp {
+public:
+ RGWAbortMultipart() {}
+
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+
+ void send_response() override = 0;
+ const char* name() const override { return "abort_multipart"; }
+ RGWOpType get_type() override { return RGW_OP_ABORT_MULTIPART; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_DELETE; }
+};
+
+class RGWListMultipart : public RGWOp {
+protected:
+ string upload_id;
+ map<uint32_t, RGWUploadPartInfo> parts;
+ int max_parts;
+ int marker;
+ RGWAccessControlPolicy policy;
+ bool truncated;
+
+public:
+ RGWListMultipart() {
+ max_parts = 1000;
+ marker = 0;
+ truncated = false;
+ }
+
+ void init(RGWRados *store, struct req_state *s, RGWHandler *h) override {
+ RGWOp::init(store, s, h);
+ policy = RGWAccessControlPolicy(s->cct);
+ }
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+
+ virtual int get_params() = 0;
+ void send_response() override = 0;
+ const char* name() const override { return "list_multipart"; }
+ RGWOpType get_type() override { return RGW_OP_LIST_MULTIPART; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_READ; }
+};
+
+struct RGWMultipartUploadEntry {
+ rgw_bucket_dir_entry obj;
+ RGWMPObj mp;
+
+ friend std::ostream& operator<<(std::ostream& out,
+ const RGWMultipartUploadEntry& e) {
+ constexpr char quote = '"';
+ return out << "RGWMultipartUploadEntry{ obj.key=" <<
+ quote << e.obj.key << quote << " mp=" << e.mp << " }";
+ }
+};
+
+class RGWListBucketMultiparts : public RGWOp {
+protected:
+ string prefix;
+ RGWMPObj marker;
+ RGWMultipartUploadEntry next_marker;
+ int max_uploads;
+ string delimiter;
+ vector<RGWMultipartUploadEntry> uploads;
+ map<string, bool> common_prefixes;
+ bool is_truncated;
+ int default_max;
+
+public:
+ RGWListBucketMultiparts() {
+ max_uploads = 0;
+ is_truncated = false;
+ default_max = 0;
+ }
+
+ void init(RGWRados *store, struct req_state *s, RGWHandler *h) override {
+ RGWOp::init(store, s, h);
+ max_uploads = default_max;
+ }
+
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+
+ virtual int get_params() = 0;
+ void send_response() override = 0;
+ const char* name() const override { return "list_bucket_multiparts"; }
+ RGWOpType get_type() override { return RGW_OP_LIST_BUCKET_MULTIPARTS; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_READ; }
+};
+
+
+class RGWGetCrossDomainPolicy : public RGWOp {
+public:
+ RGWGetCrossDomainPolicy() = default;
+ ~RGWGetCrossDomainPolicy() override = default;
+
+ int verify_permission() override {
+ return 0;
+ }
+
+ void execute() override {
+ op_ret = 0;
+ }
+
+ const char* name() const override { return "get_crossdomain_policy"; }
+
+ RGWOpType get_type() override {
+ return RGW_OP_GET_CROSS_DOMAIN_POLICY;
+ }
+
+ uint32_t op_mask() override {
+ return RGW_OP_TYPE_READ;
+ }
+};
+
+
+class RGWGetHealthCheck : public RGWOp {
+public:
+ RGWGetHealthCheck() = default;
+ ~RGWGetHealthCheck() override = default;
+
+ int verify_permission() override {
+ return 0;
+ }
+
+ void execute() override;
+
+ const char* name() const override { return "get_health_check"; }
+
+ RGWOpType get_type() override {
+ return RGW_OP_GET_HEALTH_CHECK;
+ }
+
+ uint32_t op_mask() override {
+ return RGW_OP_TYPE_READ;
+ }
+};
+
+
+class RGWDeleteMultiObj : public RGWOp {
+protected:
+ bufferlist data;
+ rgw_bucket bucket;
+ bool quiet;
+ bool status_dumped;
+ bool acl_allowed = false;
+
+public:
+ RGWDeleteMultiObj() {
+ quiet = false;
+ status_dumped = false;
+ }
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+
+ virtual int get_params() = 0;
+ virtual void send_status() = 0;
+ virtual void begin_response() = 0;
+ virtual void send_partial_response(rgw_obj_key& key, bool delete_marker,
+ const string& marker_version_id, int ret) = 0;
+ virtual void end_response() = 0;
+ const char* name() const override { return "multi_object_delete"; }
+ RGWOpType get_type() override { return RGW_OP_DELETE_MULTI_OBJ; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_DELETE; }
+};
+
+class RGWInfo: public RGWOp {
+public:
+ RGWInfo() = default;
+ ~RGWInfo() override = default;
+
+ int verify_permission() override { return 0; }
+ const char* name() const override { return "get info"; }
+ RGWOpType get_type() override { return RGW_OP_GET_INFO; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_READ; }
+};
+
+extern int rgw_build_bucket_policies(RGWRados* store, struct req_state* s);
+extern int rgw_build_object_policies(RGWRados *store, struct req_state *s,
+ bool prefetch_data);
+extern void rgw_build_iam_environment(RGWRados* store,
+ struct req_state* s);
+extern vector<rgw::IAM::Policy> get_iam_user_policy_from_attr(CephContext* cct,
+ RGWRados* store,
+ map<string, bufferlist>& attrs,
+ const string& tenant);
+
+static inline int get_system_versioning_params(req_state *s,
+ uint64_t *olh_epoch,
+ string *version_id)
+{
+ if (!s->system_request) {
+ return 0;
+ }
+
+ if (olh_epoch) {
+ string epoch_str = s->info.args.get(RGW_SYS_PARAM_PREFIX "versioned-epoch");
+ if (!epoch_str.empty()) {
+ string err;
+ *olh_epoch = strict_strtol(epoch_str.c_str(), 10, &err);
+ if (!err.empty()) {
+ lsubdout(s->cct, rgw, 0) << "failed to parse versioned-epoch param"
+ << dendl;
+ return -EINVAL;
+ }
+ }
+ }
+
+ if (version_id) {
+ *version_id = s->info.args.get(RGW_SYS_PARAM_PREFIX "version-id");
+ }
+
+ return 0;
+} /* get_system_versioning_params */
+
+static inline void format_xattr(std::string &xattr)
+{
+ /* If the extended attribute is not valid UTF-8, we encode it using
+ * quoted-printable encoding.
+ */
+ if ((check_utf8(xattr.c_str(), xattr.length()) != 0) ||
+ (check_for_control_characters(xattr.c_str(), xattr.length()) != 0)) {
+ static const char MIME_PREFIX_STR[] = "=?UTF-8?Q?";
+ static const int MIME_PREFIX_LEN = sizeof(MIME_PREFIX_STR) - 1;
+ static const char MIME_SUFFIX_STR[] = "?=";
+ static const int MIME_SUFFIX_LEN = sizeof(MIME_SUFFIX_STR) - 1;
+ int mlen = mime_encode_as_qp(xattr.c_str(), NULL, 0);
+ char *mime = new char[MIME_PREFIX_LEN + mlen + MIME_SUFFIX_LEN + 1];
+ strcpy(mime, MIME_PREFIX_STR);
+ mime_encode_as_qp(xattr.c_str(), mime + MIME_PREFIX_LEN, mlen);
+ strcpy(mime + MIME_PREFIX_LEN + (mlen - 1), MIME_SUFFIX_STR);
+ xattr.assign(mime);
+ delete [] mime;
+ }
+} /* format_xattr */
+
+/**
+ * Get the HTTP request metadata out of the req_state as a
+ * map(<attr_name, attr_contents>, where attr_name is RGW_ATTR_PREFIX.HTTP_NAME)
+ * s: The request state
+ * attrs: will be filled up with attrs mapped as <attr_name, attr_contents>
+ * On success returns 0.
+ * On failure returns a negative error code.
+ *
+ */
+static inline int rgw_get_request_metadata(CephContext* const cct,
+ struct req_info& info,
+ std::map<std::string, ceph::bufferlist>& attrs,
+ const bool allow_empty_attrs = true)
+{
+ static const std::set<std::string> blacklisted_headers = {
+ "x-amz-server-side-encryption-customer-algorithm",
+ "x-amz-server-side-encryption-customer-key",
+ "x-amz-server-side-encryption-customer-key-md5",
+ "x-amz-storage-class"
+ };
+
+ size_t valid_meta_count = 0;
+ for (auto& kv : info.x_meta_map) {
+ const std::string& name = kv.first;
+ std::string& xattr = kv.second;
+
+ if (blacklisted_headers.count(name) == 1) {
+ lsubdout(cct, rgw, 10) << "skipping x>> " << name << dendl;
+ continue;
+ } else if (allow_empty_attrs || !xattr.empty()) {
+ lsubdout(cct, rgw, 10) << "x>> " << name << ":" << xattr << dendl;
+ format_xattr(xattr);
+
+ std::string attr_name(RGW_ATTR_PREFIX);
+ attr_name.append(name);
+
+ /* Check roughly whether we aren't going behind the limit on attribute
+ * name. Passing here doesn't guarantee that an OSD will accept that
+ * as ObjectStore::get_max_attr_name_length() can set the limit even
+ * lower than the "osd_max_attr_name_len" configurable. */
+ const auto max_attr_name_len = cct->_conf->rgw_max_attr_name_len;
+ if (max_attr_name_len && attr_name.length() > max_attr_name_len) {
+ return -ENAMETOOLONG;
+ }
+
+ /* Similar remarks apply to the check for value size. We're veryfing
+ * it early at the RGW's side as it's being claimed in /info. */
+ const auto max_attr_size = cct->_conf->rgw_max_attr_size;
+ if (max_attr_size && xattr.length() > max_attr_size) {
+ return -EFBIG;
+ }
+
+ /* Swift allows administrators to limit the number of metadats items
+ * send _in a single request_. */
+ const auto max_attrs_num_in_req = cct->_conf->rgw_max_attrs_num_in_req;
+ if (max_attrs_num_in_req &&
+ ++valid_meta_count > max_attrs_num_in_req) {
+ return -E2BIG;
+ }
+
+ auto rval = attrs.emplace(std::move(attr_name), ceph::bufferlist());
+ /* At the moment the value of the freshly created attribute key-value
+ * pair is an empty bufferlist. */
+
+ ceph::bufferlist& bl = rval.first->second;
+ bl.append(xattr.c_str(), xattr.size() + 1);
+ }
+ }
+
+ return 0;
+} /* rgw_get_request_metadata */
+
+static inline void encode_delete_at_attr(boost::optional<ceph::real_time> delete_at,
+ map<string, bufferlist>& attrs)
+{
+ if (delete_at == boost::none) {
+ return;
+ }
+
+ bufferlist delatbl;
+ encode(*delete_at, delatbl);
+ attrs[RGW_ATTR_DELETE_AT] = delatbl;
+} /* encode_delete_at_attr */
+
+static inline void encode_obj_tags_attr(RGWObjTags* obj_tags, map<string, bufferlist>& attrs)
+{
+ if (obj_tags == nullptr){
+ // we assume the user submitted a tag format which we couldn't parse since
+ // this wouldn't be parsed later by get/put obj tags, lets delete if the
+ // attr was populated
+ return;
+ }
+
+ bufferlist tagsbl;
+ obj_tags->encode(tagsbl);
+ attrs[RGW_ATTR_TAGS] = tagsbl;
+}
+
+static inline int encode_dlo_manifest_attr(const char * const dlo_manifest,
+ map<string, bufferlist>& attrs)
+{
+ string dm = dlo_manifest;
+
+ if (dm.find('/') == string::npos) {
+ return -EINVAL;
+ }
+
+ bufferlist manifest_bl;
+ manifest_bl.append(dlo_manifest, strlen(dlo_manifest) + 1);
+ attrs[RGW_ATTR_USER_MANIFEST] = manifest_bl;
+
+ return 0;
+} /* encode_dlo_manifest_attr */
+
+static inline void complete_etag(MD5& hash, string *etag)
+{
+ char etag_buf[CEPH_CRYPTO_MD5_DIGESTSIZE];
+ char etag_buf_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 16];
+
+ hash.Final((unsigned char *)etag_buf);
+ buf_to_hex((const unsigned char *)etag_buf, CEPH_CRYPTO_MD5_DIGESTSIZE,
+ etag_buf_str);
+
+ *etag = etag_buf_str;
+} /* complete_etag */
+
+class RGWSetAttrs : public RGWOp {
+protected:
+ map<string, buffer::list> attrs;
+
+public:
+ RGWSetAttrs() {}
+ ~RGWSetAttrs() override {}
+
+ void emplace_attr(std::string&& key, buffer::list&& bl) {
+ attrs.emplace(std::move(key), std::move(bl));
+ }
+
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+
+ virtual int get_params() = 0;
+ void send_response() override = 0;
+ const char* name() const override { return "set_attrs"; }
+ RGWOpType get_type() override { return RGW_OP_SET_ATTRS; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; }
+};
+
+class RGWGetObjLayout : public RGWOp {
+protected:
+ RGWObjManifest *manifest{nullptr};
+ rgw_raw_obj head_obj;
+
+public:
+ RGWGetObjLayout() {
+ }
+
+ int check_caps(RGWUserCaps& caps) {
+ return caps.check_cap("admin", RGW_CAP_READ);
+ }
+ int verify_permission() override {
+ return check_caps(s->user->caps);
+ }
+ void pre_exec() override;
+ void execute() override;
+
+ const char* name() const override { return "get_obj_layout"; }
+ virtual RGWOpType get_type() override { return RGW_OP_GET_OBJ_LAYOUT; }
+ virtual uint32_t op_mask() override { return RGW_OP_TYPE_READ; }
+};
+
+class RGWPutBucketPolicy : public RGWOp {
+ bufferlist data;
+public:
+ RGWPutBucketPolicy() = default;
+ ~RGWPutBucketPolicy() {
+ }
+ void send_response() override;
+ int verify_permission() override;
+ uint32_t op_mask() override {
+ return RGW_OP_TYPE_WRITE;
+ }
+ void execute() override;
+ int get_params();
+ const char* name() const override { return "put_bucket_policy"; }
+ RGWOpType get_type() override {
+ return RGW_OP_PUT_BUCKET_POLICY;
+ }
+};
+
+class RGWGetBucketPolicy : public RGWOp {
+ buffer::list policy;
+public:
+ RGWGetBucketPolicy() = default;
+ void send_response() override;
+ int verify_permission() override;
+ uint32_t op_mask() override {
+ return RGW_OP_TYPE_READ;
+ }
+ void execute() override;
+ const char* name() const override { return "get_bucket_policy"; }
+ RGWOpType get_type() override {
+ return RGW_OP_GET_BUCKET_POLICY;
+ }
+};
+
+class RGWDeleteBucketPolicy : public RGWOp {
+public:
+ RGWDeleteBucketPolicy() = default;
+ void send_response() override;
+ int verify_permission() override;
+ uint32_t op_mask() override {
+ return RGW_OP_TYPE_WRITE;
+ }
+ void execute() override;
+ int get_params();
+ const char* name() const override { return "delete_bucket_policy"; }
+ RGWOpType get_type() override {
+ return RGW_OP_DELETE_BUCKET_POLICY;
+ }
+};
+
+class RGWPutBucketObjectLock : public RGWOp {
+protected:
+ bufferlist data;
+ bufferlist obj_lock_bl;
+ RGWObjectLock obj_lock;
+public:
+ RGWPutBucketObjectLock() = default;
+ ~RGWPutBucketObjectLock() {}
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+ virtual void send_response() = 0;
+ virtual int get_params() = 0;
+ const char* name() const override { return "put_bucket_object_lock"; }
+ RGWOpType get_type() override { return RGW_OP_PUT_BUCKET_OBJ_LOCK; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; }
+};
+
+class RGWGetBucketObjectLock : public RGWOp {
+public:
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+ virtual void send_response() = 0;
+ const char* name() const override {return "get_bucket_object_lock"; }
+ RGWOpType get_type() override { return RGW_OP_GET_BUCKET_OBJ_LOCK; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_READ; }
+};
+
+class RGWPutObjRetention : public RGWOp {
+protected:
+ bufferlist data;
+ RGWObjectRetention obj_retention;
+ bool bypass_perm;
+ bool bypass_governance_mode;
+public:
+ RGWPutObjRetention():bypass_perm(true), bypass_governance_mode(false) {}
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+ virtual void send_response() override = 0;
+ virtual int get_params() = 0;
+ const char* name() const override { return "put_obj_retention"; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; }
+ RGWOpType get_type() override { return RGW_OP_PUT_OBJ_RETENTION; }
+};
+
+class RGWGetObjRetention : public RGWOp {
+protected:
+ RGWObjectRetention obj_retention;
+public:
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+ virtual void send_response() = 0;
+ const char* name() const override {return "get_obj_retention"; }
+ RGWOpType get_type() override { return RGW_OP_GET_OBJ_RETENTION; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_READ; }
+};
+
+class RGWPutObjLegalHold : public RGWOp {
+protected:
+ bufferlist data;
+ RGWObjectLegalHold obj_legal_hold;
+public:
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+ virtual void send_response() override = 0;
+ virtual int get_params() = 0;
+ const char* name() const override { return "put_obj_legal_hold"; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; }
+ RGWOpType get_type() override { return RGW_OP_PUT_OBJ_LEGAL_HOLD; }
+};
+
+class RGWGetObjLegalHold : public RGWOp {
+protected:
+ RGWObjectLegalHold obj_legal_hold;
+public:
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+ virtual void send_response() = 0;
+ const char* name() const override {return "get_obj_legal_hold"; }
+ RGWOpType get_type() override { return RGW_OP_GET_OBJ_LEGAL_HOLD; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_READ; }
+};
+
+
+class RGWConfigBucketMetaSearch : public RGWOp {
+protected:
+ std::map<std::string, uint32_t> mdsearch_config;
+public:
+ RGWConfigBucketMetaSearch() {}
+
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+
+ virtual int get_params() = 0;
+ const char* name() const override { return "config_bucket_meta_search"; }
+ virtual RGWOpType get_type() override { return RGW_OP_CONFIG_BUCKET_META_SEARCH; }
+ virtual uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; }
+};
+
+class RGWGetBucketMetaSearch : public RGWOp {
+public:
+ RGWGetBucketMetaSearch() {}
+
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override {}
+
+ const char* name() const override { return "get_bucket_meta_search"; }
+ virtual RGWOpType get_type() override { return RGW_OP_GET_BUCKET_META_SEARCH; }
+ virtual uint32_t op_mask() override { return RGW_OP_TYPE_READ; }
+};
+
+class RGWDelBucketMetaSearch : public RGWOp {
+public:
+ RGWDelBucketMetaSearch() {}
+
+ int verify_permission() override;
+ void pre_exec() override;
+ void execute() override;
+
+ const char* name() const override { return "delete_bucket_meta_search"; }
+ virtual RGWOpType delete_type() { return RGW_OP_DEL_BUCKET_META_SEARCH; }
+ virtual uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; }
+};
+
+class RGWGetClusterStat : public RGWOp {
+protected:
+ struct rados_cluster_stat_t stats_op;
+public:
+ RGWGetClusterStat() {}
+
+ void init(RGWRados *store, struct req_state *s, RGWHandler *h) override {
+ RGWOp::init(store, s, h);
+ }
+ int verify_permission() override {return 0;}
+ virtual void send_response() override = 0;
+ virtual int get_params() = 0;
+ void execute() override;
+ const char* name() const override { return "get_cluster_stat"; }
+ dmc::client_id dmclock_client() override { return dmc::client_id::admin; }
+};
+
+static inline int parse_value_and_bound(
+ const string &input,
+ int &output,
+ const long lower_bound,
+ const long upper_bound,
+ const long default_val)
+{
+ if (!input.empty()) {
+ char *endptr;
+ output = strtol(input.c_str(), &endptr, 10);
+ if (endptr) {
+ if (endptr == input.c_str()) return -EINVAL;
+ while (*endptr && isspace(*endptr)) // ignore white space
+ endptr++;
+ if (*endptr) {
+ return -EINVAL;
+ }
+ }
+ if(output > upper_bound) {
+ output = upper_bound;
+ }
+ if(output < lower_bound) {
+ output = lower_bound;
+ }
+ } else {
+ output = default_val;
+ }
+
+ return 0;
+}
+
+
+#endif /* CEPH_RGW_OP_H */
diff --git a/src/rgw/rgw_opa.cc b/src/rgw/rgw_opa.cc
new file mode 100644
index 00000000..2331beb6
--- /dev/null
+++ b/src/rgw/rgw_opa.cc
@@ -0,0 +1,82 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "rgw_opa.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+int rgw_opa_authorize(RGWOp *& op,
+ req_state * const s)
+{
+
+ ldpp_dout(op, 2) << "authorizing request using OPA" << dendl;
+
+ /* get OPA url */
+ const string& opa_url = s->cct->_conf->rgw_opa_url;
+ if (opa_url == "") {
+ ldpp_dout(op, 2) << "OPA_URL not provided" << dendl;
+ return -ERR_INVALID_REQUEST;
+ }
+ ldpp_dout(op, 2) << "OPA URL= " << opa_url.c_str() << dendl;
+
+ /* get authentication token for OPA */
+ const string& opa_token = s->cct->_conf->rgw_opa_token;
+
+ int ret;
+ bufferlist bl;
+ RGWHTTPTransceiver req(s->cct, "POST", opa_url.c_str(), &bl);
+
+ /* set required headers for OPA request */
+ req.append_header("X-Auth-Token", opa_token);
+ req.append_header("Content-Type", "application/json");
+
+ /* check if we want to verify OPA server SSL certificate */
+ req.set_verify_ssl(s->cct->_conf->rgw_opa_verify_ssl);
+
+ /* create json request body */
+ JSONFormatter jf;
+ jf.open_object_section("");
+ jf.open_object_section("input");
+ jf.dump_string("method", s->info.env->get("REQUEST_METHOD"));
+ jf.dump_string("relative_uri", s->relative_uri.c_str());
+ jf.dump_string("decoded_uri", s->decoded_uri.c_str());
+ jf.dump_string("params", s->info.request_params.c_str());
+ jf.dump_string("request_uri_aws4", s->info.request_uri_aws4.c_str());
+ jf.dump_string("object_name", s->object.name.c_str());
+ jf.dump_string("subuser", s->auth.identity->get_subuser().c_str());
+ jf.dump_object("user_info", *s->user);
+ jf.dump_object("bucket_info", s->bucket_info);
+ jf.close_section();
+ jf.close_section();
+
+ std::stringstream ss;
+ jf.flush(ss);
+ req.set_post_data(ss.str());
+ req.set_send_length(ss.str().length());
+
+ /* send request */
+ ret = req.process();
+ if (ret < 0) {
+ ldpp_dout(op, 2) << "OPA process error:" << bl.c_str() << dendl;
+ return ret;
+ }
+
+ /* check OPA response */
+ JSONParser parser;
+ if (!parser.parse(bl.c_str(), bl.length())) {
+ ldpp_dout(op, 2) << "OPA parse error: malformed json" << dendl;
+ return -EINVAL;
+ }
+
+ bool opa_result;
+ JSONDecoder::decode_json("result", opa_result, &parser);
+
+ if (opa_result == false) {
+ ldpp_dout(op, 2) << "OPA rejecting request" << dendl;
+ return -EPERM;
+ }
+
+ ldpp_dout(op, 2) << "OPA accepting request" << dendl;
+ return 0;
+}
diff --git a/src/rgw/rgw_opa.h b/src/rgw/rgw_opa.h
new file mode 100644
index 00000000..2f87e45e
--- /dev/null
+++ b/src/rgw/rgw_opa.h
@@ -0,0 +1,14 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RGW_OPA_H
+#define RGW_OPA_H
+
+#include "rgw_common.h"
+#include "rgw_op.h"
+
+/* authorize request using OPA */
+int rgw_opa_authorize(RGWOp*& op,
+ req_state* s);
+
+#endif /* RGW_OPA_H */
diff --git a/src/rgw/rgw_orphan.cc b/src/rgw/rgw_orphan.cc
new file mode 100644
index 00000000..832076b7
--- /dev/null
+++ b/src/rgw/rgw_orphan.cc
@@ -0,0 +1,1523 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <string>
+
+
+#include "common/config.h"
+#include "common/Formatter.h"
+#include "common/errno.h"
+
+#include "rgw_rados.h"
+#include "rgw_op.h"
+#include "rgw_multi.h"
+#include "rgw_orphan.h"
+#include "rgw_zone.h"
+#include "rgw_bucket.h"
+
+#include "services/svc_zone.h"
+#include "services/svc_sys_obj.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+#define DEFAULT_NUM_SHARDS 64
+
+static string obj_fingerprint(const string& oid, const char *force_ns = NULL)
+{
+ ssize_t pos = oid.find('_');
+ if (pos < 0) {
+ cerr << "ERROR: object does not have a bucket marker: " << oid << std::endl;
+ }
+
+ string obj_marker = oid.substr(0, pos);
+
+ rgw_obj_key key;
+
+ rgw_obj_key::parse_raw_oid(oid.substr(pos + 1), &key);
+
+ if (key.ns.empty()) {
+ return oid;
+ }
+
+ string s = oid;
+
+ if (force_ns) {
+ rgw_bucket b;
+ rgw_obj new_obj(b, key);
+ s = obj_marker + "_" + new_obj.get_oid();
+ }
+
+ /* cut out suffix */
+ size_t i = s.size() - 1;
+ for (; i >= s.size() - 10; --i) {
+ char c = s[i];
+ if (!isdigit(c) && c != '.' && c != '_') {
+ break;
+ }
+ }
+
+ return s.substr(0, i + 1);
+}
+
+int RGWOrphanStore::read_job(const string& job_name, RGWOrphanSearchState & state)
+{
+ set<string> keys;
+ map<string, bufferlist> vals;
+ keys.insert(job_name);
+ int r = ioctx.omap_get_vals_by_keys(oid, keys, &vals);
+ if (r < 0) {
+ return r;
+ }
+
+ map<string, bufferlist>::iterator iter = vals.find(job_name);
+ if (iter == vals.end()) {
+ return -ENOENT;
+ }
+
+ try {
+ bufferlist& bl = iter->second;
+ decode(state, bl);
+ } catch (buffer::error& err) {
+ lderr(store->ctx()) << "ERROR: could not decode buffer" << dendl;
+ return -EIO;
+ }
+
+ return 0;
+}
+
+int RGWOrphanStore::write_job(const string& job_name, const RGWOrphanSearchState& state)
+{
+ map<string, bufferlist> vals;
+ bufferlist bl;
+ encode(state, bl);
+ vals[job_name] = bl;
+ int r = ioctx.omap_set(oid, vals);
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+int RGWOrphanStore::remove_job(const string& job_name)
+{
+ set<string> keys;
+ keys.insert(job_name);
+
+ int r = ioctx.omap_rm_keys(oid, keys);
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+int RGWOrphanStore::list_jobs(map <string,RGWOrphanSearchState>& job_list)
+{
+ map <string,bufferlist> vals;
+ int MAX_READ=1024;
+ string marker="";
+ int r = 0;
+
+ // loop through all the omap vals from index object, storing them to job_list,
+ // read in batches of 1024, we update the marker every iteration and exit the
+ // loop when we find that total size read out is less than batch size
+ do {
+ r = ioctx.omap_get_vals(oid, marker, MAX_READ, &vals);
+ if (r < 0) {
+ return r;
+ }
+ r = vals.size();
+
+ for (const auto &it : vals) {
+ marker=it.first;
+ RGWOrphanSearchState state;
+ try {
+ bufferlist bl = it.second;
+ decode(state, bl);
+ } catch (buffer::error& err) {
+ lderr(store->ctx()) << "ERROR: could not decode buffer" << dendl;
+ return -EIO;
+ }
+ job_list[it.first] = state;
+ }
+ } while (r == MAX_READ);
+
+ return 0;
+}
+
+int RGWOrphanStore::init()
+{
+ const rgw_pool& log_pool = store->svc.zone->get_zone_params().log_pool;
+ int r = rgw_init_ioctx(store->get_rados_handle(), log_pool, ioctx);
+ if (r < 0) {
+ cerr << "ERROR: failed to open log pool (" << log_pool << " ret=" << r << std::endl;
+ return r;
+ }
+
+ return 0;
+}
+
+int RGWOrphanStore::store_entries(const string& oid, const map<string, bufferlist>& entries)
+{
+ librados::ObjectWriteOperation op;
+ op.omap_set(entries);
+ cout << "storing " << entries.size() << " entries at " << oid << std::endl;
+ ldout(store->ctx(), 20) << "storing " << entries.size() << " entries at " << oid << ": " << dendl;
+ for (map<string, bufferlist>::const_iterator iter = entries.begin(); iter != entries.end(); ++iter) {
+ ldout(store->ctx(), 20) << " > " << iter->first << dendl;
+ }
+ int ret = ioctx.operate(oid, &op);
+ if (ret < 0) {
+ lderr(store->ctx()) << "ERROR: " << __func__ << "(" << oid << ") returned ret=" << ret << dendl;
+ }
+
+ return 0;
+}
+
+int RGWOrphanStore::read_entries(const string& oid, const string& marker, map<string, bufferlist> *entries, bool *truncated)
+{
+#define MAX_OMAP_GET 100
+ int ret = ioctx.omap_get_vals(oid, marker, MAX_OMAP_GET, entries);
+ if (ret < 0 && ret != -ENOENT) {
+ cerr << "ERROR: " << __func__ << "(" << oid << ") returned ret=" << cpp_strerror(-ret) << std::endl;
+ }
+
+ *truncated = (entries->size() == MAX_OMAP_GET);
+
+ return 0;
+}
+
+int RGWOrphanSearch::init(const string& job_name, RGWOrphanSearchInfo *info, bool _detailed_mode)
+{
+ int r = orphan_store.init();
+ if (r < 0) {
+ return r;
+ }
+
+ constexpr int64_t MAX_LIST_OBJS_ENTRIES=100;
+
+ max_list_bucket_entries = std::max(store->ctx()->_conf->rgw_list_bucket_min_readahead,
+ MAX_LIST_OBJS_ENTRIES);
+
+ detailed_mode = _detailed_mode;
+ RGWOrphanSearchState state;
+ r = orphan_store.read_job(job_name, state);
+ if (r < 0 && r != -ENOENT) {
+ lderr(store->ctx()) << "ERROR: failed to read state ret=" << r << dendl;
+ return r;
+ }
+
+ if (r == 0) {
+ search_info = state.info;
+ search_stage = state.stage;
+ } else if (info) { /* r == -ENOENT, initiate a new job if info was provided */
+ search_info = *info;
+ search_info.job_name = job_name;
+ search_info.num_shards = (info->num_shards ? info->num_shards : DEFAULT_NUM_SHARDS);
+ search_info.start_time = ceph_clock_now();
+ search_stage = RGWOrphanSearchStage(ORPHAN_SEARCH_STAGE_INIT);
+
+ r = save_state();
+ if (r < 0) {
+ lderr(store->ctx()) << "ERROR: failed to write state ret=" << r << dendl;
+ return r;
+ }
+ } else {
+ lderr(store->ctx()) << "ERROR: job not found" << dendl;
+ return r;
+ }
+
+ index_objs_prefix = RGW_ORPHAN_INDEX_PREFIX + string(".");
+ index_objs_prefix += job_name;
+
+ for (int i = 0; i < search_info.num_shards; i++) {
+ char buf[128];
+
+ snprintf(buf, sizeof(buf), "%s.rados.%d", index_objs_prefix.c_str(), i);
+ all_objs_index[i] = buf;
+
+ snprintf(buf, sizeof(buf), "%s.buckets.%d", index_objs_prefix.c_str(), i);
+ buckets_instance_index[i] = buf;
+
+ snprintf(buf, sizeof(buf), "%s.linked.%d", index_objs_prefix.c_str(), i);
+ linked_objs_index[i] = buf;
+ }
+ return 0;
+}
+
+int RGWOrphanSearch::log_oids(map<int, string>& log_shards, map<int, list<string> >& oids)
+{
+ map<int, list<string> >::iterator miter = oids.begin();
+
+ list<log_iter_info> liters; /* a list of iterator pairs for begin and end */
+
+ for (; miter != oids.end(); ++miter) {
+ log_iter_info info;
+ info.oid = log_shards[miter->first];
+ info.cur = miter->second.begin();
+ info.end = miter->second.end();
+ liters.push_back(info);
+ }
+
+ list<log_iter_info>::iterator list_iter;
+ while (!liters.empty()) {
+ list_iter = liters.begin();
+
+ while (list_iter != liters.end()) {
+ log_iter_info& cur_info = *list_iter;
+
+ list<string>::iterator& cur = cur_info.cur;
+ list<string>::iterator& end = cur_info.end;
+
+ map<string, bufferlist> entries;
+#define MAX_OMAP_SET_ENTRIES 100
+ for (int j = 0; cur != end && j != MAX_OMAP_SET_ENTRIES; ++cur, ++j) {
+ ldout(store->ctx(), 20) << "adding obj: " << *cur << dendl;
+ entries[*cur] = bufferlist();
+ }
+
+ int ret = orphan_store.store_entries(cur_info.oid, entries);
+ if (ret < 0) {
+ return ret;
+ }
+ list<log_iter_info>::iterator tmp = list_iter;
+ ++list_iter;
+ if (cur == end) {
+ liters.erase(tmp);
+ }
+ }
+ }
+ return 0;
+}
+
+int RGWOrphanSearch::build_all_oids_index()
+{
+ librados::IoCtx ioctx;
+
+ int ret = rgw_init_ioctx(store->get_rados_handle(), search_info.pool, ioctx);
+ if (ret < 0) {
+ lderr(store->ctx()) << __func__ << ": rgw_init_ioctx() returned ret=" << ret << dendl;
+ return ret;
+ }
+
+ ioctx.set_namespace(librados::all_nspaces);
+ librados::NObjectIterator i = ioctx.nobjects_begin();
+ librados::NObjectIterator i_end = ioctx.nobjects_end();
+
+ map<int, list<string> > oids;
+
+ int count = 0;
+ uint64_t total = 0;
+
+ cout << "logging all objects in the pool" << std::endl;
+
+ for (; i != i_end; ++i) {
+ string nspace = i->get_nspace();
+ string oid = i->get_oid();
+ string locator = i->get_locator();
+
+ ssize_t pos = oid.find('_');
+ if (pos < 0) {
+ cout << "unidentified oid: " << oid << ", skipping" << std::endl;
+ /* what is this object, oids should be in the format of <bucket marker>_<obj>,
+ * skip this entry
+ */
+ continue;
+ }
+ string stripped_oid = oid.substr(pos + 1);
+ rgw_obj_key key;
+ if (!rgw_obj_key::parse_raw_oid(stripped_oid, &key)) {
+ cout << "cannot parse oid: " << oid << ", skipping" << std::endl;
+ continue;
+ }
+
+ if (key.ns.empty()) {
+ /* skipping head objects, we don't want to remove these as they are mutable and
+ * cleaning them up is racy (can race with object removal and a later recreation)
+ */
+ cout << "skipping head object: oid=" << oid << std::endl;
+ continue;
+ }
+
+ string oid_fp = obj_fingerprint(oid);
+
+ ldout(store->ctx(), 20) << "oid_fp=" << oid_fp << dendl;
+
+ int shard = orphan_shard(oid_fp);
+ oids[shard].push_back(oid);
+
+#define COUNT_BEFORE_FLUSH 1000
+ ++total;
+ if (++count >= COUNT_BEFORE_FLUSH) {
+ ldout(store->ctx(), 1) << "iterated through " << total << " objects" << dendl;
+ ret = log_oids(all_objs_index, oids);
+ if (ret < 0) {
+ cerr << __func__ << ": ERROR: log_oids() returned ret=" << ret << std::endl;
+ return ret;
+ }
+ count = 0;
+ oids.clear();
+ }
+ }
+ ret = log_oids(all_objs_index, oids);
+ if (ret < 0) {
+ cerr << __func__ << ": ERROR: log_oids() returned ret=" << ret << std::endl;
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWOrphanSearch::build_buckets_instance_index()
+{
+ void *handle;
+ int max = 1000;
+ string section = "bucket.instance";
+ int ret = store->meta_mgr->list_keys_init(section, &handle);
+ if (ret < 0) {
+ lderr(store->ctx()) << "ERROR: can't get key: " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ map<int, list<string> > instances;
+
+ bool truncated;
+
+ RGWObjectCtx obj_ctx(store);
+
+ int count = 0;
+ uint64_t total = 0;
+
+ do {
+ list<string> keys;
+ ret = store->meta_mgr->list_keys_next(handle, max, keys, &truncated);
+ if (ret < 0) {
+ lderr(store->ctx()) << "ERROR: lists_keys_next(): " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ for (list<string>::iterator iter = keys.begin(); iter != keys.end(); ++iter) {
+ ++total;
+ ldout(store->ctx(), 10) << "bucket_instance=" << *iter << " total=" << total << dendl;
+ int shard = orphan_shard(*iter);
+ instances[shard].push_back(*iter);
+
+ if (++count >= COUNT_BEFORE_FLUSH) {
+ ret = log_oids(buckets_instance_index, instances);
+ if (ret < 0) {
+ lderr(store->ctx()) << __func__ << ": ERROR: log_oids() returned ret=" << ret << dendl;
+ return ret;
+ }
+ count = 0;
+ instances.clear();
+ }
+ }
+
+ } while (truncated);
+
+ ret = log_oids(buckets_instance_index, instances);
+ if (ret < 0) {
+ lderr(store->ctx()) << __func__ << ": ERROR: log_oids() returned ret=" << ret << dendl;
+ return ret;
+ }
+ store->meta_mgr->list_keys_complete(handle);
+
+ return 0;
+}
+
+int RGWOrphanSearch::handle_stat_result(map<int, list<string> >& oids, RGWRados::Object::Stat::Result& result)
+{
+ set<string> obj_oids;
+ rgw_bucket& bucket = result.obj.bucket;
+ if (!result.has_manifest) { /* a very very old object, or part of a multipart upload during upload */
+ const string loc = bucket.bucket_id + "_" + result.obj.get_oid();
+ obj_oids.insert(obj_fingerprint(loc));
+
+ /*
+ * multipart parts don't have manifest on them, it's in the meta object. Instead of reading the
+ * meta object, just add a "shadow" object to the mix
+ */
+ obj_oids.insert(obj_fingerprint(loc, "shadow"));
+ } else {
+ RGWObjManifest& manifest = result.manifest;
+
+ if (!detailed_mode &&
+ manifest.get_obj_size() <= manifest.get_head_size()) {
+ ldout(store->ctx(), 5) << "skipping object as it fits in a head" << dendl;
+ return 0;
+ }
+
+ RGWObjManifest::obj_iterator miter;
+ for (miter = manifest.obj_begin(); miter != manifest.obj_end(); ++miter) {
+ const rgw_raw_obj& loc = miter.get_location().get_raw_obj(store);
+ string s = loc.oid;
+ obj_oids.insert(obj_fingerprint(s));
+ }
+ }
+
+ for (set<string>::iterator iter = obj_oids.begin(); iter != obj_oids.end(); ++iter) {
+ ldout(store->ctx(), 20) << __func__ << ": oid for obj=" << result.obj << ": " << *iter << dendl;
+
+ int shard = orphan_shard(*iter);
+ oids[shard].push_back(*iter);
+ }
+
+ return 0;
+}
+
+int RGWOrphanSearch::pop_and_handle_stat_op(map<int, list<string> >& oids, std::deque<RGWRados::Object::Stat>& ops)
+{
+ RGWRados::Object::Stat& front_op = ops.front();
+
+ int ret = front_op.wait();
+ if (ret < 0) {
+ if (ret != -ENOENT) {
+ lderr(store->ctx()) << "ERROR: stat_async() returned error: " << cpp_strerror(-ret) << dendl;
+ }
+ goto done;
+ }
+ ret = handle_stat_result(oids, front_op.result);
+ if (ret < 0) {
+ lderr(store->ctx()) << "ERROR: handle_stat_response() returned error: " << cpp_strerror(-ret) << dendl;
+ }
+done:
+ ops.pop_front();
+ return ret;
+}
+
+int RGWOrphanSearch::build_linked_oids_for_bucket(const string& bucket_instance_id, map<int, list<string> >& oids)
+{
+ RGWObjectCtx obj_ctx(store);
+ auto sysobj_ctx = store->svc.sysobj->init_obj_ctx();
+
+ rgw_bucket orphan_bucket;
+ int shard_id;
+ int ret = rgw_bucket_parse_bucket_key(store->ctx(), bucket_instance_id,
+ &orphan_bucket, &shard_id);
+ if (ret < 0) {
+ ldout(store->ctx(),0) << __func__ << " failed to parse bucket instance: "
+ << bucket_instance_id << " skipping" << dendl;
+ return ret;
+ }
+
+ RGWBucketInfo cur_bucket_info;
+ ret = store->get_bucket_info(sysobj_ctx, orphan_bucket.tenant,
+ orphan_bucket.name, cur_bucket_info, nullptr);
+ if (ret < 0) {
+ if (ret == -ENOENT) {
+ /* probably raced with bucket removal */
+ return 0;
+ }
+ lderr(store->ctx()) << __func__ << ": ERROR: RGWRados::get_bucket_instance_info() returned ret=" << ret << dendl;
+ return ret;
+ }
+
+ if (cur_bucket_info.bucket.bucket_id != orphan_bucket.bucket_id) {
+ ldout(store->ctx(), 0) << __func__ << ": Skipping stale bucket instance: "
+ << orphan_bucket.name << ": "
+ << orphan_bucket.bucket_id << dendl;
+ return 0;
+ }
+
+ if (cur_bucket_info.reshard_status == CLS_RGW_RESHARD_IN_PROGRESS) {
+ ldout(store->ctx(), 0) << __func__ << ": reshard in progress. Skipping "
+ << orphan_bucket.name << ": "
+ << orphan_bucket.bucket_id << dendl;
+ return 0;
+ }
+
+ RGWBucketInfo bucket_info;
+ ret = store->get_bucket_instance_info(sysobj_ctx, bucket_instance_id, bucket_info, nullptr, nullptr);
+ if (ret < 0) {
+ if (ret == -ENOENT) {
+ /* probably raced with bucket removal */
+ return 0;
+ }
+ lderr(store->ctx()) << __func__ << ": ERROR: RGWRados::get_bucket_instance_info() returned ret=" << ret << dendl;
+ return ret;
+ }
+
+ ldout(store->ctx(), 10) << "building linked oids for bucket instance: " << bucket_instance_id << dendl;
+ RGWRados::Bucket target(store, bucket_info);
+ RGWRados::Bucket::List list_op(&target);
+
+ string marker;
+ list_op.params.marker = rgw_obj_key(marker);
+ list_op.params.list_versions = true;
+ list_op.params.enforce_ns = false;
+
+ bool truncated;
+
+ deque<RGWRados::Object::Stat> stat_ops;
+
+ do {
+ vector<rgw_bucket_dir_entry> result;
+
+ ret = list_op.list_objects(max_list_bucket_entries,
+ &result, nullptr, &truncated);
+ if (ret < 0) {
+ cerr << "ERROR: store->list_objects(): " << cpp_strerror(-ret) << std::endl;
+ return ret;
+ }
+
+ for (vector<rgw_bucket_dir_entry>::iterator iter = result.begin(); iter != result.end(); ++iter) {
+ rgw_bucket_dir_entry& entry = *iter;
+ if (entry.key.instance.empty()) {
+ ldout(store->ctx(), 20) << "obj entry: " << entry.key.name << dendl;
+ } else {
+ ldout(store->ctx(), 20) << "obj entry: " << entry.key.name << " [" << entry.key.instance << "]" << dendl;
+ }
+
+ ldout(store->ctx(), 20) << __func__ << ": entry.key.name=" << entry.key.name << " entry.key.instance=" << entry.key.instance << dendl;
+
+ if (!detailed_mode &&
+ entry.meta.accounted_size <= (uint64_t)store->ctx()->_conf->rgw_max_chunk_size) {
+ ldout(store->ctx(),5) << __func__ << "skipping stat as the object " << entry.key.name
+ << "fits in a head" << dendl;
+ continue;
+ }
+
+ rgw_obj obj(bucket_info.bucket, entry.key);
+
+ RGWRados::Object op_target(store, bucket_info, obj_ctx, obj);
+
+ stat_ops.push_back(RGWRados::Object::Stat(&op_target));
+ RGWRados::Object::Stat& op = stat_ops.back();
+
+
+ ret = op.stat_async();
+ if (ret < 0) {
+ lderr(store->ctx()) << "ERROR: stat_async() returned error: " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ if (stat_ops.size() >= max_concurrent_ios) {
+ ret = pop_and_handle_stat_op(oids, stat_ops);
+ if (ret < 0) {
+ if (ret != -ENOENT) {
+ lderr(store->ctx()) << "ERROR: stat_async() returned error: " << cpp_strerror(-ret) << dendl;
+ }
+ }
+ }
+ if (oids.size() >= COUNT_BEFORE_FLUSH) {
+ ret = log_oids(linked_objs_index, oids);
+ if (ret < 0) {
+ cerr << __func__ << ": ERROR: log_oids() returned ret=" << ret << std::endl;
+ return ret;
+ }
+ oids.clear();
+ }
+ }
+ } while (truncated);
+
+ while (!stat_ops.empty()) {
+ ret = pop_and_handle_stat_op(oids, stat_ops);
+ if (ret < 0) {
+ if (ret != -ENOENT) {
+ lderr(store->ctx()) << "ERROR: stat_async() returned error: " << cpp_strerror(-ret) << dendl;
+ }
+ }
+ }
+
+ return 0;
+}
+
+int RGWOrphanSearch::build_linked_oids_index()
+{
+ map<int, list<string> > oids;
+ map<int, string>::iterator iter = buckets_instance_index.find(search_stage.shard);
+ for (; iter != buckets_instance_index.end(); ++iter) {
+ ldout(store->ctx(), 0) << "building linked oids index: " << iter->first << "/" << buckets_instance_index.size() << dendl;
+ bool truncated;
+
+ string oid = iter->second;
+
+ do {
+ map<string, bufferlist> entries;
+ int ret = orphan_store.read_entries(oid, search_stage.marker, &entries, &truncated);
+ if (ret == -ENOENT) {
+ truncated = false;
+ ret = 0;
+ }
+
+ if (ret < 0) {
+ lderr(store->ctx()) << __func__ << ": ERROR: read_entries() oid=" << oid << " returned ret=" << ret << dendl;
+ return ret;
+ }
+
+ if (entries.empty()) {
+ break;
+ }
+
+ for (map<string, bufferlist>::iterator eiter = entries.begin(); eiter != entries.end(); ++eiter) {
+ ldout(store->ctx(), 20) << " indexed entry: " << eiter->first << dendl;
+ ret = build_linked_oids_for_bucket(eiter->first, oids);
+ if (ret < 0) {
+ lderr(store->ctx()) << __func__ << ": ERROR: build_linked_oids_for_bucket() indexed entry=" << eiter->first
+ << " returned ret=" << ret << dendl;
+ return ret;
+ }
+ }
+
+ search_stage.shard = iter->first;
+ search_stage.marker = entries.rbegin()->first; /* last entry */
+ } while (truncated);
+
+ search_stage.marker.clear();
+ }
+
+ int ret = log_oids(linked_objs_index, oids);
+ if (ret < 0) {
+ cerr << __func__ << ": ERROR: log_oids() returned ret=" << ret << std::endl;
+ return ret;
+ }
+
+ ret = save_state();
+ if (ret < 0) {
+ cerr << __func__ << ": ERROR: failed to write state ret=" << ret << std::endl;
+ return ret;
+ }
+
+ return 0;
+}
+
+class OMAPReader {
+ librados::IoCtx ioctx;
+ string oid;
+
+ map<string, bufferlist> entries;
+ map<string, bufferlist>::iterator iter;
+ string marker;
+ bool truncated;
+
+public:
+ OMAPReader(librados::IoCtx& _ioctx, const string& _oid) : ioctx(_ioctx), oid(_oid), truncated(true) {
+ iter = entries.end();
+ }
+
+ int get_next(string *key, bufferlist *pbl, bool *done);
+};
+
+int OMAPReader::get_next(string *key, bufferlist *pbl, bool *done)
+{
+ if (iter != entries.end()) {
+ *key = iter->first;
+ if (pbl) {
+ *pbl = iter->second;
+ }
+ ++iter;
+ *done = false;
+ marker = *key;
+ return 0;
+ }
+
+ if (!truncated) {
+ *done = true;
+ return 0;
+ }
+
+#define MAX_OMAP_GET_ENTRIES 100
+ int ret = ioctx.omap_get_vals(oid, marker, MAX_OMAP_GET_ENTRIES, &entries);
+ if (ret < 0) {
+ if (ret == -ENOENT) {
+ *done = true;
+ return 0;
+ }
+ return ret;
+ }
+
+ truncated = (entries.size() == MAX_OMAP_GET_ENTRIES);
+ iter = entries.begin();
+ return get_next(key, pbl, done);
+}
+
+int RGWOrphanSearch::compare_oid_indexes()
+{
+ ceph_assert(linked_objs_index.size() == all_objs_index.size());
+
+ librados::IoCtx& ioctx = orphan_store.get_ioctx();
+
+ librados::IoCtx data_ioctx;
+
+ int ret = rgw_init_ioctx(store->get_rados_handle(), search_info.pool, data_ioctx);
+ if (ret < 0) {
+ lderr(store->ctx()) << __func__ << ": rgw_init_ioctx() returned ret=" << ret << dendl;
+ return ret;
+ }
+
+ uint64_t time_threshold = search_info.start_time.sec() - stale_secs;
+
+ map<int, string>::iterator liter = linked_objs_index.begin();
+ map<int, string>::iterator aiter = all_objs_index.begin();
+
+ for (; liter != linked_objs_index.end(); ++liter, ++aiter) {
+ OMAPReader linked_entries(ioctx, liter->second);
+ OMAPReader all_entries(ioctx, aiter->second);
+
+ bool done;
+
+ string cur_linked;
+ bool linked_done = false;
+
+
+ do {
+ string key;
+ int r = all_entries.get_next(&key, NULL, &done);
+ if (r < 0) {
+ return r;
+ }
+ if (done) {
+ break;
+ }
+
+ string key_fp = obj_fingerprint(key);
+
+ while (cur_linked < key_fp && !linked_done) {
+ r = linked_entries.get_next(&cur_linked, NULL, &linked_done);
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ if (cur_linked == key_fp) {
+ ldout(store->ctx(), 20) << "linked: " << key << dendl;
+ continue;
+ }
+
+ time_t mtime;
+ r = data_ioctx.stat(key, NULL, &mtime);
+ if (r < 0) {
+ if (r != -ENOENT) {
+ lderr(store->ctx()) << "ERROR: ioctx.stat(" << key << ") returned ret=" << r << dendl;
+ }
+ continue;
+ }
+ if (stale_secs && (uint64_t)mtime >= time_threshold) {
+ ldout(store->ctx(), 20) << "skipping: " << key << " (mtime=" << mtime << " threshold=" << time_threshold << ")" << dendl;
+ continue;
+ }
+ ldout(store->ctx(), 20) << "leaked: " << key << dendl;
+ cout << "leaked: " << key << std::endl;
+ } while (!done);
+ }
+
+ return 0;
+}
+
+int RGWOrphanSearch::run()
+{
+ int r;
+
+ switch (search_stage.stage) {
+
+ case ORPHAN_SEARCH_STAGE_INIT:
+ ldout(store->ctx(), 0) << __func__ << "(): initializing state" << dendl;
+ search_stage = RGWOrphanSearchStage(ORPHAN_SEARCH_STAGE_LSPOOL);
+ r = save_state();
+ if (r < 0) {
+ lderr(store->ctx()) << __func__ << ": ERROR: failed to save state, ret=" << r << dendl;
+ return r;
+ }
+ // fall through
+ case ORPHAN_SEARCH_STAGE_LSPOOL:
+ ldout(store->ctx(), 0) << __func__ << "(): building index of all objects in pool" << dendl;
+ r = build_all_oids_index();
+ if (r < 0) {
+ lderr(store->ctx()) << __func__ << ": ERROR: build_all_objs_index returned ret=" << r << dendl;
+ return r;
+ }
+
+ search_stage = RGWOrphanSearchStage(ORPHAN_SEARCH_STAGE_LSBUCKETS);
+ r = save_state();
+ if (r < 0) {
+ lderr(store->ctx()) << __func__ << ": ERROR: failed to save state, ret=" << r << dendl;
+ return r;
+ }
+ // fall through
+
+ case ORPHAN_SEARCH_STAGE_LSBUCKETS:
+ ldout(store->ctx(), 0) << __func__ << "(): building index of all bucket indexes" << dendl;
+ r = build_buckets_instance_index();
+ if (r < 0) {
+ lderr(store->ctx()) << __func__ << ": ERROR: build_all_objs_index returned ret=" << r << dendl;
+ return r;
+ }
+
+ search_stage = RGWOrphanSearchStage(ORPHAN_SEARCH_STAGE_ITERATE_BI);
+ r = save_state();
+ if (r < 0) {
+ lderr(store->ctx()) << __func__ << ": ERROR: failed to save state, ret=" << r << dendl;
+ return r;
+ }
+ // fall through
+
+
+ case ORPHAN_SEARCH_STAGE_ITERATE_BI:
+ ldout(store->ctx(), 0) << __func__ << "(): building index of all linked objects" << dendl;
+ r = build_linked_oids_index();
+ if (r < 0) {
+ lderr(store->ctx()) << __func__ << ": ERROR: build_all_objs_index returned ret=" << r << dendl;
+ return r;
+ }
+
+ search_stage = RGWOrphanSearchStage(ORPHAN_SEARCH_STAGE_COMPARE);
+ r = save_state();
+ if (r < 0) {
+ lderr(store->ctx()) << __func__ << ": ERROR: failed to save state, ret=" << r << dendl;
+ return r;
+ }
+ // fall through
+
+ case ORPHAN_SEARCH_STAGE_COMPARE:
+ r = compare_oid_indexes();
+ if (r < 0) {
+ lderr(store->ctx()) << __func__ << ": ERROR: build_all_objs_index returned ret=" << r << dendl;
+ return r;
+ }
+
+ break;
+
+ default:
+ ceph_abort();
+ };
+
+ return 0;
+}
+
+
+int RGWOrphanSearch::remove_index(map<int, string>& index)
+{
+ librados::IoCtx& ioctx = orphan_store.get_ioctx();
+
+ for (map<int, string>::iterator iter = index.begin(); iter != index.end(); ++iter) {
+ int r = ioctx.remove(iter->second);
+ if (r < 0) {
+ if (r != -ENOENT) {
+ ldout(store->ctx(), 0) << "ERROR: couldn't remove " << iter->second << ": ret=" << r << dendl;
+ }
+ }
+ }
+ return 0;
+}
+
+int RGWOrphanSearch::finish()
+{
+ int r = remove_index(all_objs_index);
+ if (r < 0) {
+ ldout(store->ctx(), 0) << "ERROR: remove_index(" << all_objs_index << ") returned ret=" << r << dendl;
+ }
+ r = remove_index(buckets_instance_index);
+ if (r < 0) {
+ ldout(store->ctx(), 0) << "ERROR: remove_index(" << buckets_instance_index << ") returned ret=" << r << dendl;
+ }
+ r = remove_index(linked_objs_index);
+ if (r < 0) {
+ ldout(store->ctx(), 0) << "ERROR: remove_index(" << linked_objs_index << ") returned ret=" << r << dendl;
+ }
+
+ r = orphan_store.remove_job(search_info.job_name);
+ if (r < 0) {
+ ldout(store->ctx(), 0) << "ERROR: could not remove job name (" << search_info.job_name << ") ret=" << r << dendl;
+ }
+
+ return r;
+}
+
+
+int RGWRadosList::handle_stat_result(RGWRados::Object::Stat::Result& result,
+ std::set<string>& obj_oids)
+{
+ obj_oids.clear();
+
+ rgw_bucket& bucket = result.obj.bucket;
+
+ ldout(store->ctx(), 20) << "RGWRadosList::" << __func__ <<
+ " bucket=" << bucket << ", has_manifest=" << result.has_manifest <<
+ dendl;
+
+ // iterator to store result of dlo/slo attribute find
+ decltype(result.attrs)::iterator attr_it = result.attrs.end();
+ const std::string oid = bucket.marker + "_" + result.obj.get_oid();
+ ldout(store->ctx(), 20) << "radoslist processing object=\"" <<
+ oid << "\"" << dendl;
+ if (visited_oids.find(oid) != visited_oids.end()) {
+ // apparently we hit a loop; don't continue with this oid
+ ldout(store->ctx(), 15) <<
+ "radoslist stopped loop at already visited object=\"" <<
+ oid << "\"" << dendl;
+ return 0;
+ }
+
+ if (!result.has_manifest) {
+ /* a very very old object, or part of a multipart upload during upload */
+ obj_oids.insert(oid);
+
+ /*
+ * multipart parts don't have manifest on them, it's in the meta
+ * object; we'll process them in
+ * RGWRadosList::do_incomplete_multipart
+ */
+ } else if ((attr_it = result.attrs.find(RGW_ATTR_USER_MANIFEST)) !=
+ result.attrs.end()) {
+ // *** handle DLO object ***
+
+ obj_oids.insert(oid);
+ visited_oids.insert(oid); // prevent dlo loops
+ ldout(store->ctx(), 15) << "radoslist added to visited list DLO=\"" <<
+ oid << "\"" << dendl;
+
+ char* prefix_path_c = attr_it->second.c_str();
+ const std::string& prefix_path = prefix_path_c;
+
+ const size_t sep_pos = prefix_path.find('/');
+ if (string::npos == sep_pos) {
+ return -EINVAL;
+ }
+
+ const std::string bucket_name = prefix_path.substr(0, sep_pos);
+ const std::string prefix = prefix_path.substr(sep_pos + 1);
+
+ add_bucket_prefix(bucket_name, prefix);
+ ldout(store->ctx(), 25) << "radoslist DLO oid=\"" << oid <<
+ "\" added bucket=\"" << bucket_name << "\" prefix=\"" <<
+ prefix << "\" to process list" << dendl;
+ } else if ((attr_it = result.attrs.find(RGW_ATTR_SLO_MANIFEST)) !=
+ result.attrs.end()) {
+ // *** handle SLO object ***
+
+ obj_oids.insert(oid);
+ visited_oids.insert(oid); // prevent slo loops
+ ldout(store->ctx(), 15) << "radoslist added to visited list SLO=\"" <<
+ oid << "\"" << dendl;
+
+ RGWSLOInfo slo_info;
+ bufferlist::const_iterator bliter = attr_it->second.begin();
+ try {
+ ::decode(slo_info, bliter);
+ } catch (buffer::error& err) {
+ ldout(store->ctx(), 0) <<
+ "ERROR: failed to decode slo manifest for " << oid << dendl;
+ return -EIO;
+ }
+
+ for (const auto& iter : slo_info.entries) {
+ const string& path_str = iter.path;
+
+ const size_t sep_pos = path_str.find('/', 1 /* skip initial slash */);
+ if (string::npos == sep_pos) {
+ return -EINVAL;
+ }
+
+ std::string bucket_name;
+ std::string obj_name;
+
+ bucket_name = url_decode(path_str.substr(1, sep_pos - 1));
+ obj_name = url_decode(path_str.substr(sep_pos + 1));
+
+ const rgw_obj_key obj_key(obj_name);
+ add_bucket_filter(bucket_name, obj_key);
+ ldout(store->ctx(), 25) << "radoslist SLO oid=\"" << oid <<
+ "\" added bucket=\"" << bucket_name << "\" obj_key=\"" <<
+ obj_key << "\" to process list" << dendl;
+ }
+ } else {
+ RGWObjManifest& manifest = result.manifest;
+
+ // in multipart, the head object contains no data and just has the
+ // manifest AND empty objects have no manifest, but they're
+ // realized as empty rados objects
+ if (0 == manifest.get_max_head_size() ||
+ manifest.obj_begin() == manifest.obj_end()) {
+ obj_oids.insert(oid);
+ // first_insert = true;
+ }
+
+ RGWObjManifest::obj_iterator miter;
+ for (miter = manifest.obj_begin(); miter != manifest.obj_end(); ++miter) {
+ const rgw_raw_obj& loc = miter.get_location().get_raw_obj(store);
+ string s = loc.oid;
+ obj_oids.insert(s);
+ }
+ }
+
+ return 0;
+} // RGWRadosList::handle_stat_result
+
+int RGWRadosList::pop_and_handle_stat_op(
+ RGWObjectCtx& obj_ctx,
+ std::deque<RGWRados::Object::Stat>& ops)
+{
+ std::set<string> obj_oids;
+ RGWRados::Object::Stat& front_op = ops.front();
+
+ int ret = front_op.wait();
+ if (ret < 0) {
+ if (ret != -ENOENT) {
+ lderr(store->ctx()) << "ERROR: stat_async() returned error: " <<
+ cpp_strerror(-ret) << dendl;
+ }
+ goto done;
+ }
+
+ ret = handle_stat_result(front_op.result, obj_oids);
+ if (ret < 0) {
+ lderr(store->ctx()) << "ERROR: handle_stat_result() returned error: " <<
+ cpp_strerror(-ret) << dendl;
+ }
+
+ // output results
+ for (const auto& o : obj_oids) {
+ std::cout << o << std::endl;
+ }
+
+done:
+
+ // invalidate object context for this object to avoid memory leak
+ // (see pr https://github.com/ceph/ceph/pull/30174)
+ obj_ctx.invalidate(front_op.result.obj);
+
+ ops.pop_front();
+ return ret;
+}
+
+
+#if 0 // code that may be the basis for expansion
+int RGWRadosList::build_buckets_instance_index()
+{
+ void *handle;
+ int max = 1000;
+ string section = "bucket.instance";
+ int ret = store->meta_mgr->list_keys_init(section, &handle);
+ if (ret < 0) {
+ lderr(store->ctx()) << "ERROR: can't get key: " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ map<int, list<string> > instances;
+
+ bool truncated;
+
+ RGWObjectCtx obj_ctx(store);
+
+ int count = 0;
+ uint64_t total = 0;
+
+ do {
+ list<string> keys;
+ ret = store->meta_mgr->list_keys_next(handle, max, keys, &truncated);
+ if (ret < 0) {
+ lderr(store->ctx()) << "ERROR: lists_keys_next(): " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ for (list<string>::iterator iter = keys.begin(); iter != keys.end(); ++iter) {
+ ++total;
+ ldout(store->ctx(), 10) << "bucket_instance=" << *iter << " total=" << total << dendl;
+ int shard = orphan_shard(*iter);
+ instances[shard].push_back(*iter);
+
+ if (++count >= COUNT_BEFORE_FLUSH) {
+ ret = log_oids(buckets_instance_index, instances);
+ if (ret < 0) {
+ lderr(store->ctx()) << __func__ << ": ERROR: log_oids() returned ret=" << ret << dendl;
+ return ret;
+ }
+ count = 0;
+ instances.clear();
+ }
+ }
+ } while (truncated);
+
+ ret = log_oids(buckets_instance_index, instances);
+ if (ret < 0) {
+ lderr(store->ctx()) << __func__ << ": ERROR: log_oids() returned ret=" << ret << dendl;
+ return ret;
+ }
+ store->meta_mgr->list_keys_complete(handle);
+
+ return 0;
+}
+#endif
+
+
+int RGWRadosList::process_bucket(
+ const std::string& bucket_instance_id,
+ const std::string& prefix,
+ const std::set<rgw_obj_key>& entries_filter)
+{
+ ldout(store->ctx(), 10) << "RGWRadosList::" << __func__ <<
+ " bucket_instance_id=" << bucket_instance_id <<
+ ", prefix=" << prefix <<
+ ", entries_filter.size=" << entries_filter.size() << dendl;
+
+ RGWBucketInfo bucket_info;
+ RGWSysObjectCtx sys_obj_ctx = store->svc.sysobj->init_obj_ctx();
+ int ret = store->get_bucket_instance_info(sys_obj_ctx, bucket_instance_id,
+ bucket_info, nullptr, nullptr);
+ if (ret < 0) {
+ if (ret == -ENOENT) {
+ // probably raced with bucket removal
+ return 0;
+ }
+ lderr(store->ctx()) << __func__ <<
+ ": ERROR: RGWRados::get_bucket_instance_info() returned ret=" <<
+ ret << dendl;
+ return ret;
+ }
+
+ RGWRados::Bucket target(store, bucket_info);
+ RGWRados::Bucket::List list_op(&target);
+
+ std::string marker;
+ list_op.params.marker = rgw_obj_key(marker);
+ list_op.params.list_versions = true;
+ list_op.params.enforce_ns = false;
+ list_op.params.allow_unordered = false;
+ list_op.params.prefix = prefix;
+
+ bool truncated;
+
+ std::deque<RGWRados::Object::Stat> stat_ops;
+ std::string prev_versioned_key_name = "";
+
+ RGWObjectCtx obj_ctx(store);
+
+ do {
+ std::vector<rgw_bucket_dir_entry> result;
+
+ constexpr int64_t LIST_OBJS_MAX_ENTRIES = 100;
+ ret = list_op.list_objects(LIST_OBJS_MAX_ENTRIES, &result,
+ NULL, &truncated);
+ if (ret == -ENOENT) {
+ // race with bucket delete?
+ ret = 0;
+ break;
+ } else if (ret < 0) {
+ std::cerr << "ERROR: store->list_objects(): " << cpp_strerror(-ret) <<
+ std::endl;
+ return ret;
+ }
+
+ for (std::vector<rgw_bucket_dir_entry>::iterator iter = result.begin();
+ iter != result.end();
+ ++iter) {
+ rgw_bucket_dir_entry& entry = *iter;
+
+ if (entry.key.instance.empty()) {
+ ldout(store->ctx(), 20) << "obj entry: " << entry.key.name << dendl;
+ } else {
+ ldout(store->ctx(), 20) << "obj entry: " << entry.key.name <<
+ " [" << entry.key.instance << "]" << dendl;
+ }
+
+ ldout(store->ctx(), 20) << __func__ << ": entry.key.name=" <<
+ entry.key.name << " entry.key.instance=" << entry.key.instance <<
+ dendl;
+
+ // ignore entries that are not in the filter if there is a filter
+ if (!entries_filter.empty() &&
+ entries_filter.find(entry.key) == entries_filter.cend()) {
+ continue;
+ }
+
+ // we need to do this in two cases below, so use a lambda
+ auto do_stat_key =
+ [&](const rgw_obj_key& key) -> int {
+ int ret;
+
+ rgw_obj obj(bucket_info.bucket, key);
+
+ RGWRados::Object op_target(store, bucket_info, obj_ctx, obj);
+
+ stat_ops.push_back(RGWRados::Object::Stat(&op_target));
+ RGWRados::Object::Stat& op = stat_ops.back();
+
+ ret = op.stat_async();
+ if (ret < 0) {
+ lderr(store->ctx()) << "ERROR: stat_async() returned error: " <<
+ cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ if (stat_ops.size() >= max_concurrent_ios) {
+ ret = pop_and_handle_stat_op(obj_ctx, stat_ops);
+ if (ret < 0) {
+ if (ret != -ENOENT) {
+ lderr(store->ctx()) <<
+ "ERROR: pop_and_handle_stat_op() returned error: " <<
+ cpp_strerror(-ret) << dendl;
+ }
+
+ // clear error, so we'll continue processing directory
+ ret = 0;
+ }
+ }
+
+ return ret;
+ }; // do_stat_key lambda
+
+ // for versioned objects, make sure the head object is handled
+ // as well by ignoring the instance identifier
+ if (!entry.key.instance.empty() &&
+ entry.key.name != prev_versioned_key_name) {
+ // don't do the same key twice; even though out bucket index
+ // listing allows unordered, since all versions of an object
+ // use the same bucket index key, they'll all end up together
+ // and sorted
+ prev_versioned_key_name = entry.key.name;
+
+ rgw_obj_key uninstanced(entry.key.name);
+
+ ret = do_stat_key(uninstanced);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ ret = do_stat_key(entry.key);
+ if (ret < 0) {
+ return ret;
+ }
+ } // for iter loop
+ } while (truncated);
+
+ while (!stat_ops.empty()) {
+ ret = pop_and_handle_stat_op(obj_ctx, stat_ops);
+ if (ret < 0) {
+ if (ret != -ENOENT) {
+ lderr(store->ctx()) << "ERROR: stat_async() returned error: " <<
+ cpp_strerror(-ret) << dendl;
+ }
+ }
+ }
+
+ return 0;
+}
+
+
+int RGWRadosList::run()
+{
+ int ret;
+ void* handle = nullptr;
+
+ ret = store->meta_mgr->list_keys_init("bucket", &handle);
+ if (ret < 0) {
+ lderr(store->ctx()) << "RGWRadosList::" << __func__ <<
+ " ERROR: list_keys_init returned " <<
+ cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ const int max_keys = 1000;
+ bool truncated = true;
+
+ do {
+ std::list<std::string> buckets;
+ ret = store->meta_mgr->list_keys_next(handle, max_keys, buckets, &truncated);
+
+ for (std::string& bucket_id : buckets) {
+ ret = run(bucket_id);
+ if (ret == -ENOENT) {
+ continue;
+ } else if (ret < 0) {
+ return ret;
+ }
+ }
+ } while (truncated);
+
+ return 0;
+} // RGWRadosList::run()
+
+
+int RGWRadosList::run(const std::string& start_bucket_name)
+{
+ RGWSysObjectCtx sys_obj_ctx = store->svc.sysobj->init_obj_ctx();
+ RGWObjectCtx obj_ctx(store);
+ int ret;
+
+ add_bucket_entire(start_bucket_name);
+
+ while (! bucket_process_map.empty()) {
+ // pop item from map and capture its key data
+ auto front = bucket_process_map.begin();
+ std::string bucket_name = front->first;
+ process_t process;
+ std::swap(process, front->second);
+ bucket_process_map.erase(front);
+
+ RGWBucketInfo bucket_info;
+ ret = store->get_bucket_info(sys_obj_ctx,
+ tenant_name, bucket_name, bucket_info,
+ nullptr, nullptr);
+ if (ret == -ENOENT) {
+ std::cerr << "WARNING: bucket " << bucket_name <<
+ " does not exist; could it have been deleted very recently?" <<
+ std::endl;
+ continue;
+ } else if (ret < 0) {
+ std::cerr << "ERROR: could not get info for bucket " << bucket_name <<
+ " -- " << cpp_strerror(-ret) << std::endl;
+ return ret;
+ }
+
+ const std::string bucket_id = bucket_info.bucket.get_key();
+
+ static const std::set<rgw_obj_key> empty_filter;
+ static const std::string empty_prefix;
+
+ auto do_process_bucket =
+ [&bucket_id, this]
+ (const std::string& prefix,
+ const std::set<rgw_obj_key>& entries_filter) -> int {
+ int ret = process_bucket(bucket_id, prefix, entries_filter);
+ if (ret == -ENOENT) {
+ // bucket deletion race?
+ return 0;
+ } if (ret < 0) {
+ lderr(store->ctx()) << "RGWRadosList::" << __func__ <<
+ ": ERROR: process_bucket(); bucket_id=" <<
+ bucket_id << " returned ret=" << ret << dendl;
+ }
+
+ return ret;
+ };
+
+ // either process the whole bucket *or* process the filters and/or
+ // the prefixes
+ if (process.entire_container) {
+ ret = do_process_bucket(empty_prefix, empty_filter);
+ if (ret < 0) {
+ return ret;
+ }
+ } else {
+ if (! process.filter_keys.empty()) {
+ ret = do_process_bucket(empty_prefix, process.filter_keys);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+ for (const auto& p : process.prefixes) {
+ ret = do_process_bucket(p, empty_filter);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+ }
+ } // while (! bucket_process_map.empty())
+
+ // now handle incomplete multipart uploads by going back to the
+ // initial bucket
+
+ RGWBucketInfo bucket_info;
+ ret = store->get_bucket_info(sys_obj_ctx,
+ tenant_name, start_bucket_name, bucket_info,
+ nullptr, nullptr);
+ if (ret == -ENOENT) {
+ // bucket deletion race?
+ return 0;
+ } else if (ret < 0) {
+ lderr(store->ctx()) << "RGWRadosList::" << __func__ <<
+ ": ERROR: get_bucket_info returned ret=" << ret << dendl;
+ return ret;
+ }
+
+ ret = do_incomplete_multipart(store, bucket_info);
+ if (ret < 0) {
+ lderr(store->ctx()) << "RGWRadosList::" << __func__ <<
+ ": ERROR: do_incomplete_multipart returned ret=" << ret << dendl;
+ return ret;
+ }
+
+ return 0;
+} // RGWRadosList::run(string)
+
+
+int RGWRadosList::do_incomplete_multipart(
+ RGWRados* store,
+ RGWBucketInfo& bucket_info)
+{
+ constexpr int max_uploads = 1000;
+ constexpr int max_parts = 1000;
+ static const std::string mp_ns = RGW_OBJ_NS_MULTIPART;
+ static MultipartMetaFilter mp_filter;
+
+ int ret;
+
+ RGWRados::Bucket target(store, bucket_info);
+ RGWRados::Bucket::List list_op(&target);
+ list_op.params.ns = mp_ns;
+ list_op.params.filter = &mp_filter;
+ // use empty string for initial list_op.params.marker
+ // use empty strings for list_op.params.{prefix,delim}
+
+ bool is_listing_truncated;
+
+ do {
+ std::vector<rgw_bucket_dir_entry> objs;
+ std::map<string, bool> common_prefixes;
+ ret = list_op.list_objects(max_uploads, &objs, &common_prefixes,
+ &is_listing_truncated);
+ if (ret == -ENOENT) {
+ // could bucket have been removed while this is running?
+ ldout(store->ctx(), 20) << "RGWRadosList::" << __func__ <<
+ ": WARNING: call to list_objects of multipart namespace got ENOENT; "
+ "assuming bucket removal race" << dendl;
+ break;
+ } else if (ret < 0) {
+ lderr(store->ctx()) << "RGWRadosList::" << __func__ <<
+ ": ERROR: list_objects op returned ret=" << ret << dendl;
+ return ret;
+ }
+
+ if (!objs.empty()) {
+ std::vector<RGWMultipartUploadEntry> uploads;
+ RGWMultipartUploadEntry entry;
+ for (const rgw_bucket_dir_entry& obj : objs) {
+ const rgw_obj_key& key = obj.key;
+ if (!entry.mp.from_meta(key.name)) {
+ // we only want the meta objects, so skip all the components
+ continue;
+ }
+ entry.obj = obj;
+ uploads.push_back(entry);
+ ldout(store->ctx(), 20) << "RGWRadosList::" << __func__ <<
+ " processing incomplete multipart entry " <<
+ entry << dendl;
+ }
+
+ // now process the uploads vector
+ int parts_marker = 0;
+ bool is_parts_truncated = false;
+ do {
+ map<uint32_t, RGWUploadPartInfo> parts;
+
+ for (const auto& upload : uploads) {
+ const RGWMPObj& mp = upload.mp;
+ ret = list_multipart_parts(store, bucket_info, store->ctx(),
+ mp.get_upload_id(), mp.get_meta(),
+ max_parts,
+ parts_marker, parts, NULL, &is_parts_truncated);
+ if (ret == -ENOENT) {
+ continue;
+ } else if (ret < 0) {
+ lderr(store->ctx()) << "RGWRadosList::" << __func__ <<
+ ": ERROR: list_multipart_parts returned ret=" << ret << dendl;
+ return ret;
+ }
+
+ for (auto& p : parts) {
+ RGWObjManifest& manifest = p.second.manifest;
+ for (auto obj_it = manifest.obj_begin();
+ obj_it != manifest.obj_end();
+ ++obj_it) {
+ const rgw_raw_obj& loc = obj_it.get_location().get_raw_obj(store);
+ std::cout << loc.oid << std::endl;
+ }
+ }
+ }
+ } while (is_parts_truncated);
+ } // if objs not empty
+ } while (is_listing_truncated);
+
+ return 0;
+} // RGWRadosList::do_incomplete_multipart
diff --git a/src/rgw/rgw_orphan.h b/src/rgw/rgw_orphan.h
new file mode 100644
index 00000000..fe737b4f
--- /dev/null
+++ b/src/rgw/rgw_orphan.h
@@ -0,0 +1,290 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2015 Red Hat
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_RGW_ORPHAN_H
+#define CEPH_RGW_ORPHAN_H
+
+#include "common/config.h"
+#include "common/Formatter.h"
+#include "common/errno.h"
+
+#include "rgw_rados.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+#define RGW_ORPHAN_INDEX_OID "orphan.index"
+#define RGW_ORPHAN_INDEX_PREFIX "orphan.scan"
+
+
+enum RGWOrphanSearchStageId {
+ ORPHAN_SEARCH_STAGE_UNKNOWN = 0,
+ ORPHAN_SEARCH_STAGE_INIT = 1,
+ ORPHAN_SEARCH_STAGE_LSPOOL = 2,
+ ORPHAN_SEARCH_STAGE_LSBUCKETS = 3,
+ ORPHAN_SEARCH_STAGE_ITERATE_BI = 4,
+ ORPHAN_SEARCH_STAGE_COMPARE = 5,
+};
+
+
+struct RGWOrphanSearchStage {
+ RGWOrphanSearchStageId stage;
+ int shard;
+ string marker;
+
+ RGWOrphanSearchStage() : stage(ORPHAN_SEARCH_STAGE_UNKNOWN), shard(0) {}
+ explicit RGWOrphanSearchStage(RGWOrphanSearchStageId _stage) : stage(_stage), shard(0) {}
+ RGWOrphanSearchStage(RGWOrphanSearchStageId _stage, int _shard, const string& _marker) : stage(_stage), shard(_shard), marker(_marker) {}
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode((int)stage, bl);
+ encode(shard, bl);
+ encode(marker, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ int s;
+ decode(s, bl);
+ stage = (RGWOrphanSearchStageId)s;
+ decode(shard, bl);
+ decode(marker, bl);
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const;
+};
+WRITE_CLASS_ENCODER(RGWOrphanSearchStage)
+
+struct RGWOrphanSearchInfo {
+ string job_name;
+ rgw_pool pool;
+ uint16_t num_shards;
+ utime_t start_time;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(2, 1, bl);
+ encode(job_name, bl);
+ encode(pool.to_str(), bl);
+ encode(num_shards, bl);
+ encode(start_time, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(2, bl);
+ decode(job_name, bl);
+ string s;
+ decode(s, bl);
+ pool.from_str(s);
+ decode(num_shards, bl);
+ decode(start_time, bl);
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const;
+};
+WRITE_CLASS_ENCODER(RGWOrphanSearchInfo)
+
+struct RGWOrphanSearchState {
+ RGWOrphanSearchInfo info;
+ RGWOrphanSearchStage stage;
+
+ RGWOrphanSearchState() : stage(ORPHAN_SEARCH_STAGE_UNKNOWN) {}
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(info, bl);
+ encode(stage, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(info, bl);
+ decode(stage, bl);
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const;
+};
+WRITE_CLASS_ENCODER(RGWOrphanSearchState)
+
+class RGWOrphanStore {
+ RGWRados *store;
+ librados::IoCtx ioctx;
+
+ string oid;
+
+public:
+ explicit RGWOrphanStore(RGWRados *_store) : store(_store), oid(RGW_ORPHAN_INDEX_OID) {}
+
+ librados::IoCtx& get_ioctx() { return ioctx; }
+
+ int init();
+
+ int read_job(const string& job_name, RGWOrphanSearchState& state);
+ int write_job(const string& job_name, const RGWOrphanSearchState& state);
+ int remove_job(const string& job_name);
+ int list_jobs(map<string,RGWOrphanSearchState> &job_list);
+
+
+ int store_entries(const string& oid, const map<string, bufferlist>& entries);
+ int read_entries(const string& oid, const string& marker, map<string, bufferlist> *entries, bool *truncated);
+};
+
+
+class RGWOrphanSearch {
+ RGWRados *store;
+
+ RGWOrphanStore orphan_store;
+
+ RGWOrphanSearchInfo search_info;
+ RGWOrphanSearchStage search_stage;
+
+ map<int, string> all_objs_index;
+ map<int, string> buckets_instance_index;
+ map<int, string> linked_objs_index;
+
+ string index_objs_prefix;
+
+ uint16_t max_concurrent_ios;
+ uint64_t stale_secs;
+ int64_t max_list_bucket_entries;
+
+ bool detailed_mode;
+
+ struct log_iter_info {
+ string oid;
+ list<string>::iterator cur;
+ list<string>::iterator end;
+ };
+
+ int log_oids(map<int, string>& log_shards, map<int, list<string> >& oids);
+
+#define RGW_ORPHANSEARCH_HASH_PRIME 7877
+ int orphan_shard(const string& str) {
+ return ceph_str_hash_linux(str.c_str(), str.size()) % RGW_ORPHANSEARCH_HASH_PRIME % search_info.num_shards;
+ }
+
+ int handle_stat_result(map<int, list<string> >& oids, RGWRados::Object::Stat::Result& result);
+ int pop_and_handle_stat_op(map<int, list<string> >& oids, std::deque<RGWRados::Object::Stat>& ops);
+
+
+ int remove_index(map<int, string>& index);
+public:
+ RGWOrphanSearch(RGWRados *_store, int _max_ios, uint64_t _stale_secs) : store(_store), orphan_store(store), max_concurrent_ios(_max_ios), stale_secs(_stale_secs) {}
+
+ int save_state() {
+ RGWOrphanSearchState state;
+ state.info = search_info;
+ state.stage = search_stage;
+ return orphan_store.write_job(search_info.job_name, state);
+ }
+
+ int init(const string& job_name, RGWOrphanSearchInfo *info, bool _detailed_mode=false);
+
+ int create(const string& job_name, int num_shards);
+
+ int build_all_oids_index();
+ int build_buckets_instance_index();
+ int build_linked_oids_for_bucket(const string& bucket_instance_id, map<int, list<string> >& oids);
+ int build_linked_oids_index();
+ int compare_oid_indexes();
+
+ int run();
+ int finish();
+};
+
+
+class RGWRadosList {
+
+ /*
+ * process_t describes how to process a irectory, we will either
+ * process the whole thing (entire_container == true) or a portion
+ * of it (entire_container == false). When we only process a
+ * portion, we will list the specific keys and/or specific lexical
+ * prefixes.
+ */
+ struct process_t {
+ bool entire_container;
+ std::set<rgw_obj_key> filter_keys;
+ std::set<std::string> prefixes;
+
+ process_t() :
+ entire_container(false)
+ {}
+ };
+
+ std::map<std::string,process_t> bucket_process_map;
+ std::set<std::string> visited_oids;
+
+ void add_bucket_entire(const std::string& bucket_name) {
+ auto p = bucket_process_map.emplace(std::make_pair(bucket_name,
+ process_t()));
+ p.first->second.entire_container = true;
+ }
+
+ void add_bucket_prefix(const std::string& bucket_name,
+ const std::string& prefix) {
+ auto p = bucket_process_map.emplace(std::make_pair(bucket_name,
+ process_t()));
+ p.first->second.prefixes.insert(prefix);
+ }
+
+ void add_bucket_filter(const std::string& bucket_name,
+ const rgw_obj_key& obj_key) {
+ auto p = bucket_process_map.emplace(std::make_pair(bucket_name,
+ process_t()));
+ p.first->second.filter_keys.insert(obj_key);
+ }
+
+ RGWRados *store;
+
+ uint16_t max_concurrent_ios;
+ uint64_t stale_secs;
+ std::string tenant_name;
+
+ int handle_stat_result(RGWRados::Object::Stat::Result& result,
+ std::set<string>& obj_oids);
+ int pop_and_handle_stat_op(RGWObjectCtx& obj_ctx,
+ std::deque<RGWRados::Object::Stat>& ops);
+
+public:
+
+ RGWRadosList(RGWRados *_store,
+ int _max_ios,
+ uint64_t _stale_secs,
+ const std::string& _tenant_name) :
+ store(_store),
+ max_concurrent_ios(_max_ios),
+ stale_secs(_stale_secs),
+ tenant_name(_tenant_name)
+ {}
+
+ int process_bucket(const std::string& bucket_instance_id,
+ const std::string& prefix,
+ const std::set<rgw_obj_key>& entries_filter);
+
+ int do_incomplete_multipart(RGWRados* store, RGWBucketInfo& bucket_info);
+
+ int build_linked_oids_index();
+
+ int run(const std::string& bucket_id);
+ int run();
+}; // class RGWRadosList
+
+#endif
diff --git a/src/rgw/rgw_os_lib.cc b/src/rgw/rgw_os_lib.cc
new file mode 100644
index 00000000..e43bf418
--- /dev/null
+++ b/src/rgw/rgw_os_lib.cc
@@ -0,0 +1,62 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "rgw_rest.h"
+#include "rgw_rest_s3.h"
+#include "rgw_rest_user.h"
+#include "rgw_os_lib.h"
+#include "rgw_file.h"
+#include "rgw_lib_frontend.h"
+
+namespace rgw {
+
+/* static */
+ int RGWHandler_Lib::init_from_header(struct req_state *s)
+ {
+ string req;
+ string first;
+
+ const char *req_name = s->relative_uri.c_str();
+ const char *p;
+
+ /* skip request_params parsing, rgw_file should not be
+ * seeing any */
+ if (*req_name == '?') {
+ p = req_name;
+ } else {
+ p = s->info.request_params.c_str();
+ }
+
+ s->info.args.set(p);
+ s->info.args.parse();
+
+ if (*req_name != '/')
+ return 0;
+
+ req_name++;
+
+ if (!*req_name)
+ return 0;
+
+ req = req_name;
+ int pos = req.find('/');
+ if (pos >= 0) {
+ first = req.substr(0, pos);
+ } else {
+ first = req;
+ }
+
+ if (s->bucket_name.empty()) {
+ s->bucket_name = std::move(first);
+ if (pos >= 0) {
+ // XXX ugh, another copy
+ string encoded_obj_str = req.substr(pos+1);
+ s->object = rgw_obj_key(encoded_obj_str, s->info.args.get("versionId"));
+ }
+ } else {
+ s->object = rgw_obj_key(req_name, s->info.args.get("versionId"));
+ }
+ return 0;
+ } /* init_from_header */
+
+} /* namespace rgw */
diff --git a/src/rgw/rgw_os_lib.h b/src/rgw/rgw_os_lib.h
new file mode 100644
index 00000000..78071b7d
--- /dev/null
+++ b/src/rgw/rgw_os_lib.h
@@ -0,0 +1,12 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RGW_OS_LIB_H
+#define RGW_OS_LIB_H
+
+#include <functional>
+#include "rgw_common.h"
+#include "rgw_lib.h"
+
+
+#endif /* RGW_OS_LIB_H */
diff --git a/src/rgw/rgw_otp.cc b/src/rgw/rgw_otp.cc
new file mode 100644
index 00000000..e00a9344
--- /dev/null
+++ b/src/rgw/rgw_otp.cc
@@ -0,0 +1,158 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <errno.h>
+
+#include <string>
+#include <map>
+#include <boost/algorithm/string.hpp>
+
+#include "common/errno.h"
+#include "common/Formatter.h"
+#include "common/ceph_json.h"
+#include "rgw_rados.h"
+#include "rgw_zone.h"
+
+#include "include/types.h"
+
+#include "rgw_common.h"
+#include "rgw_tools.h"
+
+#include "services/svc_zone.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+using namespace std;
+
+
+static RGWMetadataHandler *otp_meta_handler = NULL;
+
+
+class RGWOTPMetadataObject : public RGWMetadataObject {
+ list<rados::cls::otp::otp_info_t> result;
+public:
+ RGWOTPMetadataObject(list<rados::cls::otp::otp_info_t>& _result, obj_version& v, real_time m) {
+ result.swap(_result);
+ objv = v;
+ mtime = m;
+ }
+
+ void dump(Formatter *f) const override {
+ encode_json("devices", result, f);
+ }
+};
+
+class RGWOTPMetadataHandler : public RGWMetadataHandler {
+public:
+ string get_type() override { return "otp"; }
+
+ int get(RGWRados *store, string& entry, RGWMetadataObject **obj) override {
+ RGWObjVersionTracker objv_tracker;
+ real_time mtime;
+
+ list<rados::cls::otp::otp_info_t> result;
+ int r = store->list_mfa(entry, &result, &objv_tracker, &mtime);
+ if (r < 0) {
+ return r;
+ }
+ RGWOTPMetadataObject *mdo = new RGWOTPMetadataObject(result, objv_tracker.read_version, mtime);
+ *obj = mdo;
+ return 0;
+ }
+
+ int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker,
+ real_time mtime, JSONObj *obj, sync_type_t sync_mode) override {
+
+ list<rados::cls::otp::otp_info_t> devices;
+ try {
+ JSONDecoder::decode_json("devices", devices, obj);
+ } catch (JSONDecoder::err& e) {
+ return -EINVAL;
+ }
+
+ int ret = store->meta_mgr->mutate(this, entry, mtime, &objv_tracker,
+ MDLOG_STATUS_WRITE, sync_mode,
+ [&] {
+ return store->set_mfa(entry, devices, true, &objv_tracker, mtime);
+ });
+ if (ret < 0) {
+ return ret;
+ }
+
+ return STATUS_APPLIED;
+ }
+
+ int remove(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker) override {
+ return store->meta_mgr->remove_entry(this, entry, &objv_tracker);
+ }
+
+ void get_pool_and_oid(RGWRados *store, const string& key, rgw_pool& pool, string& oid) override {
+ oid = key;
+ pool = store->svc.zone->get_zone_params().otp_pool;
+ }
+
+ struct list_keys_info {
+ RGWRados *store;
+ RGWListRawObjsCtx ctx;
+ };
+
+ int list_keys_init(RGWRados *store, const string& marker, void **phandle) override
+ {
+ auto info = std::make_unique<list_keys_info>();
+
+ info->store = store;
+
+ int ret = store->list_raw_objects_init(store->svc.zone->get_zone_params().otp_pool, marker,
+ &info->ctx);
+ if (ret < 0) {
+ return ret;
+ }
+
+ *phandle = (void *)info.release();
+
+ return 0;
+ }
+
+ int list_keys_next(void *handle, int max, list<string>& keys, bool *truncated) override {
+ list_keys_info *info = static_cast<list_keys_info *>(handle);
+
+ string no_filter;
+
+ keys.clear();
+
+ RGWRados *store = info->store;
+
+ int ret = store->list_raw_objects_next(no_filter, max, info->ctx,
+ keys, truncated);
+ if (ret < 0 && ret != -ENOENT)
+ return ret;
+ if (ret == -ENOENT) {
+ if (truncated)
+ *truncated = false;
+ return 0;
+ }
+
+ return 0;
+ }
+
+ void list_keys_complete(void *handle) override {
+ list_keys_info *info = static_cast<list_keys_info *>(handle);
+ delete info;
+ }
+
+ string get_marker(void *handle) override {
+ list_keys_info *info = static_cast<list_keys_info *>(handle);
+ return info->store->list_raw_objs_get_cursor(info->ctx);
+ }
+};
+
+RGWMetadataHandler *rgw_otp_get_handler()
+{
+ return otp_meta_handler;
+}
+
+void rgw_otp_init(RGWRados *store)
+{
+ otp_meta_handler = new RGWOTPMetadataHandler;
+ store->meta_mgr->register_handler(otp_meta_handler);
+}
diff --git a/src/rgw/rgw_otp.h b/src/rgw/rgw_otp.h
new file mode 100644
index 00000000..54491343
--- /dev/null
+++ b/src/rgw/rgw_otp.h
@@ -0,0 +1,15 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_OTP_H
+#define CEPH_RGW_OTP_H
+
+class RGWRados;
+
+class RGWMetadataHandler;
+
+RGWMetadataHandler *rgw_otp_get_handler(void);
+void rgw_otp_init(RGWRados *store);
+
+#endif
+
diff --git a/src/rgw/rgw_perf_counters.cc b/src/rgw/rgw_perf_counters.cc
new file mode 100644
index 00000000..21d1a363
--- /dev/null
+++ b/src/rgw/rgw_perf_counters.cc
@@ -0,0 +1,60 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "rgw_perf_counters.h"
+#include "common/perf_counters.h"
+#include "common/ceph_context.h"
+
+PerfCounters *perfcounter = NULL;
+
+int rgw_perf_start(CephContext *cct)
+{
+ PerfCountersBuilder plb(cct, "rgw", l_rgw_first, l_rgw_last);
+
+ // RGW emits comparatively few metrics, so let's be generous
+ // and mark them all USEFUL to get transmission to ceph-mgr by default.
+ plb.set_prio_default(PerfCountersBuilder::PRIO_USEFUL);
+
+ plb.add_u64_counter(l_rgw_req, "req", "Requests");
+ plb.add_u64_counter(l_rgw_failed_req, "failed_req", "Aborted requests");
+
+ plb.add_u64_counter(l_rgw_get, "get", "Gets");
+ plb.add_u64_counter(l_rgw_get_b, "get_b", "Size of gets");
+ plb.add_time_avg(l_rgw_get_lat, "get_initial_lat", "Get latency");
+ plb.add_u64_counter(l_rgw_put, "put", "Puts");
+ plb.add_u64_counter(l_rgw_put_b, "put_b", "Size of puts");
+ plb.add_time_avg(l_rgw_put_lat, "put_initial_lat", "Put latency");
+
+ plb.add_u64(l_rgw_qlen, "qlen", "Queue length");
+ plb.add_u64(l_rgw_qactive, "qactive", "Active requests queue");
+
+ plb.add_u64_counter(l_rgw_cache_hit, "cache_hit", "Cache hits");
+ plb.add_u64_counter(l_rgw_cache_miss, "cache_miss", "Cache miss");
+
+ plb.add_u64_counter(l_rgw_keystone_token_cache_hit, "keystone_token_cache_hit", "Keystone token cache hits");
+ plb.add_u64_counter(l_rgw_keystone_token_cache_miss, "keystone_token_cache_miss", "Keystone token cache miss");
+
+ plb.add_u64_counter(l_rgw_gc_retire, "gc_retire_object", "GC object retires");
+
+ plb.add_u64_counter(l_rgw_pubsub_event_triggered, "pubsub_event_triggered", "Pubsub events with at least one topic");
+ plb.add_u64_counter(l_rgw_pubsub_event_lost, "pubsub_event_lost", "Pubsub events lost");
+ plb.add_u64_counter(l_rgw_pubsub_store_ok, "pubsub_store_ok", "Pubsub events successfully stored");
+ plb.add_u64_counter(l_rgw_pubsub_store_fail, "pubsub_store_fail", "Pubsub events failed to be stored");
+ plb.add_u64(l_rgw_pubsub_events, "pubsub_events", "Pubsub events in store");
+ plb.add_u64_counter(l_rgw_pubsub_push_ok, "pubsub_push_ok", "Pubsub events pushed to an endpoint");
+ plb.add_u64_counter(l_rgw_pubsub_push_failed, "pubsub_push_failed", "Pubsub events failed to be pushed to an endpoint");
+ plb.add_u64(l_rgw_pubsub_push_pending, "pubsub_push_pending", "Pubsub events pending reply from endpoint");
+ plb.add_u64_counter(l_rgw_pubsub_missing_conf, "pubsub_missing_conf", "Pubsub events could not be handled because of missing configuration");
+
+ perfcounter = plb.create_perf_counters();
+ cct->get_perfcounters_collection()->add(perfcounter);
+ return 0;
+}
+
+void rgw_perf_stop(CephContext *cct)
+{
+ ceph_assert(perfcounter);
+ cct->get_perfcounters_collection()->remove(perfcounter);
+ delete perfcounter;
+}
+
diff --git a/src/rgw/rgw_perf_counters.h b/src/rgw/rgw_perf_counters.h
new file mode 100644
index 00000000..1f0b6fc3
--- /dev/null
+++ b/src/rgw/rgw_perf_counters.h
@@ -0,0 +1,50 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+class CephContext;
+class PerfCounters;
+
+extern PerfCounters *perfcounter;
+
+extern int rgw_perf_start(CephContext *cct);
+extern void rgw_perf_stop(CephContext *cct);
+
+enum {
+ l_rgw_first = 15000,
+ l_rgw_req,
+ l_rgw_failed_req,
+
+ l_rgw_get,
+ l_rgw_get_b,
+ l_rgw_get_lat,
+
+ l_rgw_put,
+ l_rgw_put_b,
+ l_rgw_put_lat,
+
+ l_rgw_qlen,
+ l_rgw_qactive,
+
+ l_rgw_cache_hit,
+ l_rgw_cache_miss,
+
+ l_rgw_keystone_token_cache_hit,
+ l_rgw_keystone_token_cache_miss,
+
+ l_rgw_gc_retire,
+
+ l_rgw_pubsub_event_triggered,
+ l_rgw_pubsub_event_lost,
+ l_rgw_pubsub_store_ok,
+ l_rgw_pubsub_store_fail,
+ l_rgw_pubsub_events,
+ l_rgw_pubsub_push_ok,
+ l_rgw_pubsub_push_failed,
+ l_rgw_pubsub_push_pending,
+ l_rgw_pubsub_missing_conf,
+
+ l_rgw_last,
+};
+
diff --git a/src/rgw/rgw_period_history.cc b/src/rgw/rgw_period_history.cc
new file mode 100644
index 00000000..cf0f3cfc
--- /dev/null
+++ b/src/rgw/rgw_period_history.cc
@@ -0,0 +1,354 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "rgw_period_history.h"
+#include "rgw_rados.h"
+#include "rgw_zone.h"
+
+#include "include/ceph_assert.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+#undef dout_prefix
+#define dout_prefix (*_dout << "rgw period history: ")
+
+/// an ordered history of consecutive periods
+class RGWPeriodHistory::History : public bi::avl_set_base_hook<> {
+ public:
+ std::deque<RGWPeriod> periods;
+
+ epoch_t get_oldest_epoch() const {
+ return periods.front().get_realm_epoch();
+ }
+ epoch_t get_newest_epoch() const {
+ return periods.back().get_realm_epoch();
+ }
+ bool contains(epoch_t epoch) const {
+ return get_oldest_epoch() <= epoch && epoch <= get_newest_epoch();
+ }
+ RGWPeriod& get(epoch_t epoch) {
+ return periods[epoch - get_oldest_epoch()];
+ }
+ const RGWPeriod& get(epoch_t epoch) const {
+ return periods[epoch - get_oldest_epoch()];
+ }
+ const std::string& get_predecessor_id() const {
+ return periods.front().get_predecessor();
+ }
+};
+
+/// value comparison for avl_set
+bool operator<(const RGWPeriodHistory::History& lhs,
+ const RGWPeriodHistory::History& rhs)
+{
+ return lhs.get_newest_epoch() < rhs.get_newest_epoch();
+}
+
+/// key-value comparison for avl_set
+struct NewestEpochLess {
+ bool operator()(const RGWPeriodHistory::History& value, epoch_t key) const {
+ return value.get_newest_epoch() < key;
+ }
+};
+
+
+using Cursor = RGWPeriodHistory::Cursor;
+
+const RGWPeriod& Cursor::get_period() const
+{
+ std::lock_guard<std::mutex> lock(*mutex);
+ return history->get(epoch);
+}
+bool Cursor::has_prev() const
+{
+ std::lock_guard<std::mutex> lock(*mutex);
+ return epoch > history->get_oldest_epoch();
+}
+bool Cursor::has_next() const
+{
+ std::lock_guard<std::mutex> lock(*mutex);
+ return epoch < history->get_newest_epoch();
+}
+
+bool operator==(const Cursor& lhs, const Cursor& rhs)
+{
+ return lhs.history == rhs.history && lhs.epoch == rhs.epoch;
+}
+
+bool operator!=(const Cursor& lhs, const Cursor& rhs)
+{
+ return !(lhs == rhs);
+}
+
+class RGWPeriodHistory::Impl final {
+ public:
+ Impl(CephContext* cct, Puller* puller, const RGWPeriod& current_period);
+ ~Impl();
+
+ Cursor get_current() const { return current_cursor; }
+ Cursor attach(RGWPeriod&& period);
+ Cursor insert(RGWPeriod&& period);
+ Cursor lookup(epoch_t realm_epoch);
+
+ private:
+ /// an intrusive set of histories, ordered by their newest epoch. although
+ /// the newest epoch of each history is mutable, the ordering cannot change
+ /// because we prevent the histories from overlapping
+ using Set = bi::avl_set<RGWPeriodHistory::History>;
+
+ /// insert the given period into the period history, creating new unconnected
+ /// histories or merging existing histories as necessary. expects the caller
+ /// to hold a lock on mutex. returns a valid cursor regardless of whether it
+ /// ends up in current_history, though cursors in other histories are only
+ /// valid within the context of the lock
+ Cursor insert_locked(RGWPeriod&& period);
+
+ /// merge the periods from the src history onto the end of the dst history,
+ /// and return an iterator to the merged history
+ Set::iterator merge(Set::iterator dst, Set::iterator src);
+
+ /// construct a Cursor object using Cursor's private constuctor
+ Cursor make_cursor(Set::const_iterator history, epoch_t epoch);
+
+ CephContext *const cct;
+ Puller *const puller; //< interface for pulling missing periods
+ Cursor current_cursor; //< Cursor to realm's current period
+
+ mutable std::mutex mutex; //< protects the histories
+
+ /// set of disjoint histories that are missing intermediate periods needed to
+ /// connect them together
+ Set histories;
+
+ /// iterator to the history that contains the realm's current period
+ Set::const_iterator current_history;
+};
+
+RGWPeriodHistory::Impl::Impl(CephContext* cct, Puller* puller,
+ const RGWPeriod& current_period)
+ : cct(cct), puller(puller)
+{
+ if (!current_period.get_id().empty()) {
+ // copy the current period into a new history
+ auto history = new History;
+ history->periods.push_back(current_period);
+
+ // insert as our current history
+ current_history = histories.insert(*history).first;
+
+ // get a cursor to the current period
+ current_cursor = make_cursor(current_history, current_period.get_realm_epoch());
+ } else {
+ current_history = histories.end();
+ }
+}
+
+RGWPeriodHistory::Impl::~Impl()
+{
+ // clear the histories and delete each entry
+ histories.clear_and_dispose(std::default_delete<History>{});
+}
+
+Cursor RGWPeriodHistory::Impl::attach(RGWPeriod&& period)
+{
+ if (current_history == histories.end()) {
+ return Cursor{-EINVAL};
+ }
+
+ const auto epoch = period.get_realm_epoch();
+
+ std::string predecessor_id;
+ for (;;) {
+ {
+ // hold the lock over insert, and while accessing the unsafe cursor
+ std::lock_guard<std::mutex> lock(mutex);
+
+ auto cursor = insert_locked(std::move(period));
+ if (!cursor) {
+ return cursor;
+ }
+ if (current_history->contains(epoch)) {
+ break; // the history is complete
+ }
+
+ // take the predecessor id of the most recent history
+ if (cursor.get_epoch() > current_cursor.get_epoch()) {
+ predecessor_id = cursor.history->get_predecessor_id();
+ } else {
+ predecessor_id = current_history->get_predecessor_id();
+ }
+ }
+
+ if (predecessor_id.empty()) {
+ lderr(cct) << "reached a period with an empty predecessor id" << dendl;
+ return Cursor{-EINVAL};
+ }
+
+ // pull the period outside of the lock
+ int r = puller->pull(predecessor_id, period);
+ if (r < 0) {
+ return Cursor{r};
+ }
+ }
+
+ // return a cursor to the requested period
+ return make_cursor(current_history, epoch);
+}
+
+Cursor RGWPeriodHistory::Impl::insert(RGWPeriod&& period)
+{
+ if (current_history == histories.end()) {
+ return Cursor{-EINVAL};
+ }
+
+ std::lock_guard<std::mutex> lock(mutex);
+
+ auto cursor = insert_locked(std::move(period));
+
+ if (cursor.get_error()) {
+ return cursor;
+ }
+ // we can only provide cursors that are safe to use outside of the mutex if
+ // they're within the current_history, because other histories can disappear
+ // in a merge. see merge() for the special handling of current_history
+ if (cursor.history == &*current_history) {
+ return cursor;
+ }
+ return Cursor{};
+}
+
+Cursor RGWPeriodHistory::Impl::lookup(epoch_t realm_epoch)
+{
+ if (current_history != histories.end() &&
+ current_history->contains(realm_epoch)) {
+ return make_cursor(current_history, realm_epoch);
+ }
+ return Cursor{};
+}
+
+Cursor RGWPeriodHistory::Impl::insert_locked(RGWPeriod&& period)
+{
+ auto epoch = period.get_realm_epoch();
+
+ // find the first history whose newest epoch comes at or after this period
+ auto i = histories.lower_bound(epoch, NewestEpochLess{});
+
+ if (i == histories.end()) {
+ // epoch is past the end of our newest history
+ auto last = --Set::iterator{i}; // last = i - 1
+
+ if (epoch == last->get_newest_epoch() + 1) {
+ // insert at the back of the last history
+ last->periods.emplace_back(std::move(period));
+ return make_cursor(last, epoch);
+ }
+
+ // create a new history for this period
+ auto history = new History;
+ history->periods.emplace_back(std::move(period));
+ histories.insert(last, *history);
+
+ i = Set::s_iterator_to(*history);
+ return make_cursor(i, epoch);
+ }
+
+ if (i->contains(epoch)) {
+ // already resident in this history
+ auto& existing = i->get(epoch);
+ // verify that the period ids match; otherwise we've forked the history
+ if (period.get_id() != existing.get_id()) {
+ lderr(cct) << "Got two different periods, " << period.get_id()
+ << " and " << existing.get_id() << ", with the same realm epoch "
+ << epoch << "! This indicates a fork in the period history." << dendl;
+ return Cursor{-EEXIST};
+ }
+ // update the existing period if we got a newer period epoch
+ if (period.get_epoch() > existing.get_epoch()) {
+ existing = std::move(period);
+ }
+ return make_cursor(i, epoch);
+ }
+
+ if (epoch + 1 == i->get_oldest_epoch()) {
+ // insert at the front of this history
+ i->periods.emplace_front(std::move(period));
+
+ // try to merge with the previous history
+ if (i != histories.begin()) {
+ auto prev = --Set::iterator{i};
+ if (epoch == prev->get_newest_epoch() + 1) {
+ i = merge(prev, i);
+ }
+ }
+ return make_cursor(i, epoch);
+ }
+
+ if (i != histories.begin()) {
+ auto prev = --Set::iterator{i};
+ if (epoch == prev->get_newest_epoch() + 1) {
+ // insert at the back of the previous history
+ prev->periods.emplace_back(std::move(period));
+ return make_cursor(prev, epoch);
+ }
+ }
+
+ // create a new history for this period
+ auto history = new History;
+ history->periods.emplace_back(std::move(period));
+ histories.insert(i, *history);
+
+ i = Set::s_iterator_to(*history);
+ return make_cursor(i, epoch);
+}
+
+RGWPeriodHistory::Impl::Set::iterator
+RGWPeriodHistory::Impl::merge(Set::iterator dst, Set::iterator src)
+{
+ ceph_assert(dst->get_newest_epoch() + 1 == src->get_oldest_epoch());
+
+ // always merge into current_history
+ if (src == current_history) {
+ // move the periods from dst onto the front of src
+ src->periods.insert(src->periods.begin(),
+ std::make_move_iterator(dst->periods.begin()),
+ std::make_move_iterator(dst->periods.end()));
+ histories.erase_and_dispose(dst, std::default_delete<History>{});
+ return src;
+ }
+
+ // move the periods from src onto the end of dst
+ dst->periods.insert(dst->periods.end(),
+ std::make_move_iterator(src->periods.begin()),
+ std::make_move_iterator(src->periods.end()));
+ histories.erase_and_dispose(src, std::default_delete<History>{});
+ return dst;
+}
+
+Cursor RGWPeriodHistory::Impl::make_cursor(Set::const_iterator history,
+ epoch_t epoch) {
+ return Cursor{&*history, &mutex, epoch};
+}
+
+
+RGWPeriodHistory::RGWPeriodHistory(CephContext* cct, Puller* puller,
+ const RGWPeriod& current_period)
+ : impl(new Impl(cct, puller, current_period)) {}
+
+RGWPeriodHistory::~RGWPeriodHistory() = default;
+
+Cursor RGWPeriodHistory::get_current() const
+{
+ return impl->get_current();
+}
+Cursor RGWPeriodHistory::attach(RGWPeriod&& period)
+{
+ return impl->attach(std::move(period));
+}
+Cursor RGWPeriodHistory::insert(RGWPeriod&& period)
+{
+ return impl->insert(std::move(period));
+}
+Cursor RGWPeriodHistory::lookup(epoch_t realm_epoch)
+{
+ return impl->lookup(realm_epoch);
+}
diff --git a/src/rgw/rgw_period_history.h b/src/rgw/rgw_period_history.h
new file mode 100644
index 00000000..11a5c2aa
--- /dev/null
+++ b/src/rgw/rgw_period_history.h
@@ -0,0 +1,114 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RGW_PERIOD_HISTORY_H
+#define RGW_PERIOD_HISTORY_H
+
+#include <deque>
+#include <mutex>
+#include <system_error>
+#include <boost/intrusive/avl_set.hpp>
+#include "include/ceph_assert.h"
+#include "include/types.h"
+
+namespace bi = boost::intrusive;
+
+class RGWPeriod;
+
+/**
+ * RGWPeriodHistory tracks the relative history of all inserted periods,
+ * coordinates the pulling of missing intermediate periods, and provides a
+ * Cursor object for traversing through the connected history.
+ */
+class RGWPeriodHistory final {
+ private:
+ /// an ordered history of consecutive periods
+ class History;
+
+ // comparisons for avl_set ordering
+ friend bool operator<(const History& lhs, const History& rhs);
+ friend struct NewestEpochLess;
+
+ class Impl;
+ std::unique_ptr<Impl> impl;
+
+ public:
+ /**
+ * Puller is a synchronous interface for pulling periods from the master
+ * zone. The abstraction exists mainly to support unit testing.
+ */
+ class Puller {
+ public:
+ virtual ~Puller() = default;
+
+ virtual int pull(const std::string& period_id, RGWPeriod& period) = 0;
+ };
+
+ RGWPeriodHistory(CephContext* cct, Puller* puller,
+ const RGWPeriod& current_period);
+ ~RGWPeriodHistory();
+
+ /**
+ * Cursor tracks a position in the period history and allows forward and
+ * backward traversal. Only periods that are fully connected to the
+ * current_period are reachable via a Cursor, because other histories are
+ * temporary and can be merged away. Cursors to periods in disjoint
+ * histories, as provided by insert() or lookup(), are therefore invalid and
+ * their operator bool() will return false.
+ */
+ class Cursor final {
+ public:
+ Cursor() = default;
+ explicit Cursor(int error) : error(error) {}
+
+ int get_error() const { return error; }
+
+ /// return false for a default-constructed or error Cursor
+ operator bool() const { return history != nullptr; }
+
+ epoch_t get_epoch() const { return epoch; }
+ const RGWPeriod& get_period() const;
+
+ bool has_prev() const;
+ bool has_next() const;
+
+ void prev() { epoch--; }
+ void next() { epoch++; }
+
+ friend bool operator==(const Cursor& lhs, const Cursor& rhs);
+ friend bool operator!=(const Cursor& lhs, const Cursor& rhs);
+
+ private:
+ // private constructors for RGWPeriodHistory
+ friend class RGWPeriodHistory::Impl;
+
+ Cursor(const History* history, std::mutex* mutex, epoch_t epoch)
+ : history(history), mutex(mutex), epoch(epoch) {}
+
+ int error{0};
+ const History* history{nullptr};
+ std::mutex* mutex{nullptr};
+ epoch_t epoch{0}; //< realm epoch of cursor position
+ };
+
+ /// return a cursor to the current period
+ Cursor get_current() const;
+
+ /// build up a connected period history that covers the span between
+ /// current_period and the given period, reading predecessor periods or
+ /// fetching them from the master as necessary. returns a cursor at the
+ /// given period that can be used to traverse the current_history
+ Cursor attach(RGWPeriod&& period);
+
+ /// insert the given period into an existing history, or create a new
+ /// unconnected history. similar to attach(), but it doesn't try to fetch
+ /// missing periods. returns a cursor to the inserted period iff it's in
+ /// the current_history
+ Cursor insert(RGWPeriod&& period);
+
+ /// search for a period by realm epoch, returning a valid Cursor iff it's in
+ /// the current_history
+ Cursor lookup(epoch_t realm_epoch);
+};
+
+#endif // RGW_PERIOD_HISTORY_H
diff --git a/src/rgw/rgw_period_puller.cc b/src/rgw/rgw_period_puller.cc
new file mode 100644
index 00000000..934eb000
--- /dev/null
+++ b/src/rgw/rgw_period_puller.cc
@@ -0,0 +1,114 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "rgw_rados.h"
+#include "rgw_zone.h"
+#include "rgw_rest_conn.h"
+#include "common/ceph_json.h"
+#include "common/errno.h"
+
+#include "services/svc_zone.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+#undef dout_prefix
+#define dout_prefix (*_dout << "rgw period puller: ")
+
+namespace {
+
+// pull the given period over the connection
+int pull_period(RGWRESTConn* conn, const std::string& period_id,
+ const std::string& realm_id, RGWPeriod& period)
+{
+ rgw_user user;
+ RGWEnv env;
+ req_info info(conn->get_ctx(), &env);
+ info.method = "GET";
+ info.request_uri = "/admin/realm/period";
+
+ auto& params = info.args.get_params();
+ params["realm_id"] = realm_id;
+ params["period_id"] = period_id;
+
+ bufferlist data;
+#define MAX_REST_RESPONSE (128 * 1024)
+ int r = conn->forward(user, info, nullptr, MAX_REST_RESPONSE, nullptr, &data);
+ if (r < 0) {
+ return r;
+ }
+
+ JSONParser parser;
+ r = parser.parse(data.c_str(), data.length());
+ if (r < 0) {
+ lderr(conn->get_ctx()) << "request failed: " << cpp_strerror(-r) << dendl;
+ return r;
+ }
+
+ try {
+ decode_json_obj(period, &parser);
+ } catch (JSONDecoder::err& e) {
+ lderr(conn->get_ctx()) << "failed to decode JSON input: "
+ << e.message << dendl;
+ return -EINVAL;
+ }
+ return 0;
+}
+
+} // anonymous namespace
+
+int RGWPeriodPuller::pull(const std::string& period_id, RGWPeriod& period)
+{
+ // try to read the period from rados
+ period.set_id(period_id);
+ period.set_epoch(0);
+ int r = period.init(store->ctx(), store->svc.sysobj);
+ if (r < 0) {
+ if (store->svc.zone->is_meta_master()) {
+ // can't pull if we're the master
+ ldout(store->ctx(), 1) << "metadata master failed to read period "
+ << period_id << " from local storage: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+ ldout(store->ctx(), 14) << "pulling period " << period_id
+ << " from master" << dendl;
+ // request the period from the master zone
+ r = pull_period(store->svc.zone->get_master_conn(), period_id,
+ store->svc.zone->get_realm().get_id(), period);
+ if (r < 0) {
+ lderr(store->ctx()) << "failed to pull period " << period_id << dendl;
+ return r;
+ }
+ // write the period to rados
+ r = period.store_info(true);
+ if (r == -EEXIST) {
+ r = 0;
+ } else if (r < 0) {
+ lderr(store->ctx()) << "failed to store period " << period_id << dendl;
+ return r;
+ }
+ // update latest epoch
+ r = period.update_latest_epoch(period.get_epoch());
+ if (r == -EEXIST) {
+ // already have this epoch (or a more recent one)
+ return 0;
+ }
+ if (r < 0) {
+ lderr(store->ctx()) << "failed to update latest_epoch for period "
+ << period_id << dendl;
+ return r;
+ }
+ // reflect period objects if this is the latest version
+ if (store->svc.zone->get_realm().get_current_period() == period_id) {
+ r = period.reflect();
+ if (r < 0) {
+ return r;
+ }
+ }
+ ldout(store->ctx(), 14) << "period " << period_id
+ << " pulled and written to local storage" << dendl;
+ } else {
+ ldout(store->ctx(), 14) << "found period " << period_id
+ << " in local storage" << dendl;
+ }
+ return 0;
+}
diff --git a/src/rgw/rgw_period_puller.h b/src/rgw/rgw_period_puller.h
new file mode 100644
index 00000000..9018d584
--- /dev/null
+++ b/src/rgw/rgw_period_puller.h
@@ -0,0 +1,20 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_PERIOD_PULLER_H
+#define CEPH_RGW_PERIOD_PULLER_H
+
+#include "rgw_period_history.h"
+
+class RGWRados;
+class RGWPeriod;
+
+class RGWPeriodPuller : public RGWPeriodHistory::Puller {
+ RGWRados *const store;
+ public:
+ explicit RGWPeriodPuller(RGWRados* store) : store(store) {}
+
+ int pull(const std::string& period_id, RGWPeriod& period) override;
+};
+
+#endif // CEPH_RGW_PERIOD_PULLER_H
diff --git a/src/rgw/rgw_period_pusher.cc b/src/rgw/rgw_period_pusher.cc
new file mode 100644
index 00000000..e3db85df
--- /dev/null
+++ b/src/rgw/rgw_period_pusher.cc
@@ -0,0 +1,307 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <map>
+#include <thread>
+
+#include "rgw_period_pusher.h"
+#include "rgw_cr_rest.h"
+#include "rgw_zone.h"
+
+#include "services/svc_zone.h"
+
+#include "common/errno.h"
+
+#include <boost/asio/yield.hpp>
+
+#define dout_subsys ceph_subsys_rgw
+
+#undef dout_prefix
+#define dout_prefix (*_dout << "rgw period pusher: ")
+
+/// A coroutine to post the period over the given connection.
+using PushCR = RGWPostRESTResourceCR<RGWPeriod, int>;
+
+/// A coroutine that calls PushCR, and retries with backoff until success.
+class PushAndRetryCR : public RGWCoroutine {
+ const std::string& zone;
+ RGWRESTConn *const conn;
+ RGWHTTPManager *const http;
+ RGWPeriod& period;
+ const std::string epoch; //< epoch string for params
+ double timeout; //< current interval between retries
+ const double timeout_max; //< maximum interval between retries
+ uint32_t counter; //< number of failures since backoff increased
+
+ public:
+ PushAndRetryCR(CephContext* cct, const std::string& zone, RGWRESTConn* conn,
+ RGWHTTPManager* http, RGWPeriod& period)
+ : RGWCoroutine(cct), zone(zone), conn(conn), http(http), period(period),
+ epoch(std::to_string(period.get_epoch())),
+ timeout(cct->_conf->rgw_period_push_interval),
+ timeout_max(cct->_conf->rgw_period_push_interval_max),
+ counter(0)
+ {}
+
+ int operate() override;
+};
+
+int PushAndRetryCR::operate()
+{
+ reenter(this) {
+ for (;;) {
+ yield {
+ ldout(cct, 10) << "pushing period " << period.get_id()
+ << " to " << zone << dendl;
+ // initialize the http params
+ rgw_http_param_pair params[] = {
+ { "period", period.get_id().c_str() },
+ { "epoch", epoch.c_str() },
+ { nullptr, nullptr }
+ };
+ call(new PushCR(cct, conn, http, "/admin/realm/period",
+ params, period, nullptr));
+ }
+
+ // stop on success
+ if (get_ret_status() == 0) {
+ ldout(cct, 10) << "push to " << zone << " succeeded" << dendl;
+ return set_cr_done();
+ }
+
+ // try each endpoint in the connection before waiting
+ if (++counter < conn->get_endpoint_count())
+ continue;
+ counter = 0;
+
+ // wait with exponential backoff up to timeout_max
+ yield {
+ utime_t dur;
+ dur.set_from_double(timeout);
+
+ ldout(cct, 10) << "waiting " << dur << "s for retry.." << dendl;
+ wait(dur);
+
+ timeout *= 2;
+ if (timeout > timeout_max)
+ timeout = timeout_max;
+ }
+ }
+ }
+ return 0;
+}
+
+/**
+ * PushAllCR is a coroutine that sends the period over all of the given
+ * connections, retrying until they are all marked as completed.
+ */
+class PushAllCR : public RGWCoroutine {
+ RGWHTTPManager *const http;
+ RGWPeriod period; //< period object to push
+ std::map<std::string, RGWRESTConn> conns; //< zones that need the period
+
+ public:
+ PushAllCR(CephContext* cct, RGWHTTPManager* http, RGWPeriod&& period,
+ std::map<std::string, RGWRESTConn>&& conns)
+ : RGWCoroutine(cct), http(http),
+ period(std::move(period)),
+ conns(std::move(conns))
+ {}
+
+ int operate() override;
+};
+
+int PushAllCR::operate()
+{
+ reenter(this) {
+ // spawn a coroutine to push the period over each connection
+ yield {
+ ldout(cct, 4) << "sending " << conns.size() << " periods" << dendl;
+ for (auto& c : conns)
+ spawn(new PushAndRetryCR(cct, c.first, &c.second, http, period), false);
+ }
+ // wait for all to complete
+ drain_all();
+ return set_cr_done();
+ }
+ return 0;
+}
+
+/// A background thread to run the PushAllCR coroutine and exit.
+class RGWPeriodPusher::CRThread {
+ RGWCoroutinesManager coroutines;
+ RGWHTTPManager http;
+ boost::intrusive_ptr<PushAllCR> push_all;
+ std::thread thread;
+
+ public:
+ CRThread(CephContext* cct, RGWPeriod&& period,
+ std::map<std::string, RGWRESTConn>&& conns)
+ : coroutines(cct, NULL),
+ http(cct, coroutines.get_completion_mgr()),
+ push_all(new PushAllCR(cct, &http, std::move(period), std::move(conns)))
+ {
+ http.start();
+ // must spawn the CR thread after start
+ thread = std::thread([this] { coroutines.run(push_all.get()); });
+ }
+ ~CRThread()
+ {
+ push_all.reset();
+ coroutines.stop();
+ http.stop();
+ if (thread.joinable())
+ thread.join();
+ }
+};
+
+
+RGWPeriodPusher::RGWPeriodPusher(RGWRados* store)
+ : cct(store->ctx()), store(store)
+{
+ const auto& realm = store->svc.zone->get_realm();
+ auto& realm_id = realm.get_id();
+ if (realm_id.empty()) // no realm configuration
+ return;
+
+ // always send out the current period on startup
+ RGWPeriod period;
+ int r = period.init(cct, store->svc.sysobj, realm_id, realm.get_name());
+ if (r < 0) {
+ lderr(cct) << "failed to load period for realm " << realm_id << dendl;
+ return;
+ }
+
+ std::lock_guard<std::mutex> lock(mutex);
+ handle_notify(std::move(period));
+}
+
+// destructor is here because CRThread is incomplete in the header
+RGWPeriodPusher::~RGWPeriodPusher() = default;
+
+void RGWPeriodPusher::handle_notify(RGWRealmNotify type,
+ bufferlist::const_iterator& p)
+{
+ // decode the period
+ RGWZonesNeedPeriod info;
+ try {
+ decode(info, p);
+ } catch (buffer::error& e) {
+ lderr(cct) << "Failed to decode the period: " << e.what() << dendl;
+ return;
+ }
+
+ std::lock_guard<std::mutex> lock(mutex);
+
+ // we can't process this notification without access to our current realm
+ // configuration. queue it until resume()
+ if (store == nullptr) {
+ pending_periods.emplace_back(std::move(info));
+ return;
+ }
+
+ handle_notify(std::move(info));
+}
+
+// expects the caller to hold a lock on mutex
+void RGWPeriodPusher::handle_notify(RGWZonesNeedPeriod&& period)
+{
+ if (period.get_realm_epoch() < realm_epoch) {
+ ldout(cct, 10) << "period's realm epoch " << period.get_realm_epoch()
+ << " is not newer than current realm epoch " << realm_epoch
+ << ", discarding update" << dendl;
+ return;
+ }
+ if (period.get_realm_epoch() == realm_epoch &&
+ period.get_epoch() <= period_epoch) {
+ ldout(cct, 10) << "period epoch " << period.get_epoch() << " is not newer "
+ "than current epoch " << period_epoch << ", discarding update" << dendl;
+ return;
+ }
+
+ // find our zonegroup in the new period
+ auto& zonegroups = period.get_map().zonegroups;
+ auto i = zonegroups.find(store->svc.zone->get_zonegroup().get_id());
+ if (i == zonegroups.end()) {
+ lderr(cct) << "The new period does not contain my zonegroup!" << dendl;
+ return;
+ }
+ auto& my_zonegroup = i->second;
+
+ // if we're not a master zone, we're not responsible for pushing any updates
+ if (my_zonegroup.master_zone != store->svc.zone->get_zone_params().get_id())
+ return;
+
+ // construct a map of the zones that need this period. the map uses the same
+ // keys/ordering as the zone[group] map, so we can use a hint for insertions
+ std::map<std::string, RGWRESTConn> conns;
+ auto hint = conns.end();
+
+ // are we the master zonegroup in this period?
+ if (period.get_map().master_zonegroup == store->svc.zone->get_zonegroup().get_id()) {
+ // update other zonegroup endpoints
+ for (auto& zg : zonegroups) {
+ auto& zonegroup = zg.second;
+ if (zonegroup.get_id() == store->svc.zone->get_zonegroup().get_id())
+ continue;
+ if (zonegroup.endpoints.empty())
+ continue;
+
+ hint = conns.emplace_hint(
+ hint, std::piecewise_construct,
+ std::forward_as_tuple(zonegroup.get_id()),
+ std::forward_as_tuple(cct, store->svc.zone, zonegroup.get_id(), zonegroup.endpoints));
+ }
+ }
+
+ // update other zone endpoints
+ for (auto& z : my_zonegroup.zones) {
+ auto& zone = z.second;
+ if (zone.id == store->svc.zone->get_zone_params().get_id())
+ continue;
+ if (zone.endpoints.empty())
+ continue;
+
+ hint = conns.emplace_hint(
+ hint, std::piecewise_construct,
+ std::forward_as_tuple(zone.id),
+ std::forward_as_tuple(cct, store->svc.zone, zone.id, zone.endpoints));
+ }
+
+ if (conns.empty()) {
+ ldout(cct, 4) << "No zones to update" << dendl;
+ return;
+ }
+
+ realm_epoch = period.get_realm_epoch();
+ period_epoch = period.get_epoch();
+
+ ldout(cct, 4) << "Zone master pushing period " << period.get_id()
+ << " epoch " << period_epoch << " to "
+ << conns.size() << " other zones" << dendl;
+
+ // spawn a new coroutine thread, destroying the previous one
+ cr_thread.reset(new CRThread(cct, std::move(period), std::move(conns)));
+}
+
+void RGWPeriodPusher::pause()
+{
+ ldout(cct, 4) << "paused for realm update" << dendl;
+ std::lock_guard<std::mutex> lock(mutex);
+ store = nullptr;
+}
+
+void RGWPeriodPusher::resume(RGWRados* store)
+{
+ std::lock_guard<std::mutex> lock(mutex);
+ this->store = store;
+
+ ldout(cct, 4) << "resume with " << pending_periods.size()
+ << " periods pending" << dendl;
+
+ // process notification queue
+ for (auto& info : pending_periods) {
+ handle_notify(std::move(info));
+ }
+ pending_periods.clear();
+}
diff --git a/src/rgw/rgw_period_pusher.h b/src/rgw/rgw_period_pusher.h
new file mode 100644
index 00000000..fdadd226
--- /dev/null
+++ b/src/rgw/rgw_period_pusher.h
@@ -0,0 +1,56 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RGW_PERIOD_PUSHER_H
+#define RGW_PERIOD_PUSHER_H
+
+#include <memory>
+#include <mutex>
+#include <vector>
+
+#include "rgw_realm_reloader.h"
+
+class RGWRados;
+class RGWPeriod;
+
+// RGWRealmNotify payload for push coordination
+using RGWZonesNeedPeriod = RGWPeriod;
+
+/**
+ * RGWPeriodPusher coordinates with other nodes via the realm watcher to manage
+ * the responsibility for pushing period updates to other zones or zonegroups.
+ */
+class RGWPeriodPusher final : public RGWRealmWatcher::Watcher,
+ public RGWRealmReloader::Pauser {
+ public:
+ explicit RGWPeriodPusher(RGWRados* store);
+ ~RGWPeriodPusher() override;
+
+ /// respond to realm notifications by pushing new periods to other zones
+ void handle_notify(RGWRealmNotify type, bufferlist::const_iterator& p) override;
+
+ /// avoid accessing RGWRados while dynamic reconfiguration is in progress.
+ /// notifications will be enqueued until resume()
+ void pause() override;
+
+ /// continue processing notifications with a new RGWRados instance
+ void resume(RGWRados* store) override;
+
+ private:
+ void handle_notify(RGWZonesNeedPeriod&& period);
+
+ CephContext *const cct;
+ RGWRados* store;
+
+ std::mutex mutex;
+ epoch_t realm_epoch{0}; //< the current realm epoch being sent
+ epoch_t period_epoch{0}; //< the current period epoch being sent
+
+ /// while paused for reconfiguration, we need to queue up notifications
+ std::vector<RGWZonesNeedPeriod> pending_periods;
+
+ class CRThread; //< contains thread, coroutine manager, http manager
+ std::unique_ptr<CRThread> cr_thread; //< thread to run the push coroutines
+};
+
+#endif // RGW_PERIOD_PUSHER_H
diff --git a/src/rgw/rgw_policy_s3.cc b/src/rgw/rgw_policy_s3.cc
new file mode 100644
index 00000000..17a4e953
--- /dev/null
+++ b/src/rgw/rgw_policy_s3.cc
@@ -0,0 +1,303 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <errno.h>
+
+#include "common/ceph_json.h"
+#include "rgw_policy_s3.h"
+#include "rgw_common.h"
+#include "rgw_crypt_sanitize.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+class RGWPolicyCondition {
+protected:
+ string v1;
+ string v2;
+
+ virtual bool check(const string& first, const string& second, string& err_msg) = 0;
+
+public:
+ virtual ~RGWPolicyCondition() {}
+
+ void set_vals(const string& _v1, const string& _v2) {
+ v1 = _v1;
+ v2 = _v2;
+ }
+
+ bool check(RGWPolicyEnv *env, map<string, bool, ltstr_nocase>& checked_vars, string& err_msg) {
+ string first, second;
+ env->get_value(v1, first, checked_vars);
+ env->get_value(v2, second, checked_vars);
+ dout(1) << "policy condition check " << v1 << " ["
+ << rgw::crypt_sanitize::s3_policy{v1, first}
+ << "] " << v2 << " ["
+ << rgw::crypt_sanitize::s3_policy{v2, second}
+ << "]" << dendl;
+ bool ret = check(first, second, err_msg);
+ if (!ret) {
+ err_msg.append(": ");
+ err_msg.append(v1);
+ err_msg.append(", ");
+ err_msg.append(v2);
+ }
+ return ret;
+ }
+
+};
+
+
+class RGWPolicyCondition_StrEqual : public RGWPolicyCondition {
+protected:
+ bool check(const string& first, const string& second, string& msg) override {
+ bool ret = first.compare(second) == 0;
+ if (!ret) {
+ msg = "Policy condition failed: eq";
+ }
+ return ret;
+ }
+};
+
+class RGWPolicyCondition_StrStartsWith : public RGWPolicyCondition {
+protected:
+ bool check(const string& first, const string& second, string& msg) override {
+ bool ret = first.compare(0, second.size(), second) == 0;
+ if (!ret) {
+ msg = "Policy condition failed: starts-with";
+ }
+ return ret;
+ }
+};
+
+void RGWPolicyEnv::add_var(const string& name, const string& value)
+{
+ vars[name] = value;
+}
+
+bool RGWPolicyEnv::get_var(const string& name, string& val)
+{
+ map<string, string, ltstr_nocase>::iterator iter = vars.find(name);
+ if (iter == vars.end())
+ return false;
+
+ val = iter->second;
+
+ return true;
+}
+
+bool RGWPolicyEnv::get_value(const string& s, string& val, map<string, bool, ltstr_nocase>& checked_vars)
+{
+ if (s.empty() || s[0] != '$') {
+ val = s;
+ return true;
+ }
+
+ const string& var = s.substr(1);
+ checked_vars[var] = true;
+
+ return get_var(var, val);
+}
+
+
+bool RGWPolicyEnv::match_policy_vars(map<string, bool, ltstr_nocase>& policy_vars, string& err_msg)
+{
+ map<string, string, ltstr_nocase>::iterator iter;
+ string ignore_prefix = "x-ignore-";
+ for (iter = vars.begin(); iter != vars.end(); ++iter) {
+ const string& var = iter->first;
+ if (strncasecmp(ignore_prefix.c_str(), var.c_str(), ignore_prefix.size()) == 0)
+ continue;
+ if (policy_vars.count(var) == 0) {
+ err_msg = "Policy missing condition: ";
+ err_msg.append(iter->first);
+ dout(1) << "env var missing in policy: " << iter->first << dendl;
+ return false;
+ }
+ }
+ return true;
+}
+
+RGWPolicy::~RGWPolicy()
+{
+ list<RGWPolicyCondition *>::iterator citer;
+ for (citer = conditions.begin(); citer != conditions.end(); ++citer) {
+ RGWPolicyCondition *cond = *citer;
+ delete cond;
+ }
+}
+
+int RGWPolicy::set_expires(const string& e)
+{
+ struct tm t;
+ if (!parse_iso8601(e.c_str(), &t))
+ return -EINVAL;
+
+ expires = internal_timegm(&t);
+
+ return 0;
+}
+
+int RGWPolicy::add_condition(const string& op, const string& first, const string& second, string& err_msg)
+{
+ RGWPolicyCondition *cond = NULL;
+ if (stringcasecmp(op, "eq") == 0) {
+ cond = new RGWPolicyCondition_StrEqual;
+ } else if (stringcasecmp(op, "starts-with") == 0) {
+ cond = new RGWPolicyCondition_StrStartsWith;
+ } else if (stringcasecmp(op, "content-length-range") == 0) {
+ off_t min, max;
+ int r = stringtoll(first, &min);
+ if (r < 0) {
+ err_msg = "Bad content-length-range param";
+ dout(0) << "bad content-length-range param: " << first << dendl;
+ return r;
+ }
+
+ r = stringtoll(second, &max);
+ if (r < 0) {
+ err_msg = "Bad content-length-range param";
+ dout(0) << "bad content-length-range param: " << second << dendl;
+ return r;
+ }
+
+ if (min > min_length)
+ min_length = min;
+
+ if (max < max_length)
+ max_length = max;
+
+ return 0;
+ }
+
+ if (!cond) {
+ err_msg = "Invalid condition: ";
+ err_msg.append(op);
+ dout(0) << "invalid condition: " << op << dendl;
+ return -EINVAL;
+ }
+
+ cond->set_vals(first, second);
+
+ conditions.push_back(cond);
+
+ return 0;
+}
+
+int RGWPolicy::check(RGWPolicyEnv *env, string& err_msg)
+{
+ uint64_t now = ceph_clock_now().sec();
+ if (expires <= now) {
+ dout(0) << "NOTICE: policy calculated as expired: " << expiration_str << dendl;
+ err_msg = "Policy expired";
+ return -EACCES; // change to condition about expired policy following S3
+ }
+
+ list<pair<string, string> >::iterator viter;
+ for (viter = var_checks.begin(); viter != var_checks.end(); ++viter) {
+ pair<string, string>& p = *viter;
+ const string& name = p.first;
+ const string& check_val = p.second;
+ string val;
+ if (!env->get_var(name, val)) {
+ dout(20) << " policy check failed, variable not found: '" << name << "'" << dendl;
+ err_msg = "Policy check failed, variable not found: ";
+ err_msg.append(name);
+ return -EACCES;
+ }
+
+ set_var_checked(name);
+
+ dout(20) << "comparing " << name << " [" << val << "], " << check_val << dendl;
+ if (val.compare(check_val) != 0) {
+ err_msg = "Policy check failed, variable not met condition: ";
+ err_msg.append(name);
+ dout(1) << "policy check failed, val=" << val << " != " << check_val << dendl;
+ return -EACCES;
+ }
+ }
+
+ list<RGWPolicyCondition *>::iterator citer;
+ for (citer = conditions.begin(); citer != conditions.end(); ++citer) {
+ RGWPolicyCondition *cond = *citer;
+ if (!cond->check(env, checked_vars, err_msg)) {
+ return -EACCES;
+ }
+ }
+
+ if (!env->match_policy_vars(checked_vars, err_msg)) {
+ dout(1) << "missing policy condition" << dendl;
+ return -EACCES;
+ }
+ return 0;
+}
+
+
+int RGWPolicy::from_json(bufferlist& bl, string& err_msg)
+{
+ JSONParser parser;
+
+ if (!parser.parse(bl.c_str(), bl.length())) {
+ err_msg = "Malformed JSON";
+ dout(0) << "malformed json" << dendl;
+ return -EINVAL;
+ }
+
+ // as no time was included in the request, we hope that the user has included a short timeout
+ JSONObjIter iter = parser.find_first("expiration");
+ if (iter.end()) {
+ err_msg = "Policy missing expiration";
+ dout(0) << "expiration not found" << dendl;
+ return -EINVAL; // change to a "no expiration" error following S3
+ }
+
+ JSONObj *obj = *iter;
+ expiration_str = obj->get_data();
+ int r = set_expires(expiration_str);
+ if (r < 0) {
+ err_msg = "Failed to parse policy expiration";
+ return r;
+ }
+
+ iter = parser.find_first("conditions");
+ if (iter.end()) {
+ err_msg = "Policy missing conditions";
+ dout(0) << "conditions not found" << dendl;
+ return -EINVAL; // change to a "no conditions" error following S3
+ }
+
+ obj = *iter;
+
+ iter = obj->find_first();
+ for (; !iter.end(); ++iter) {
+ JSONObj *child = *iter;
+ dout(20) << "data=" << child->get_data() << dendl;
+ dout(20) << "is_object=" << child->is_object() << dendl;
+ dout(20) << "is_array=" << child->is_array() << dendl;
+ JSONObjIter citer = child->find_first();
+ if (child->is_array()) {
+ vector<string> v;
+ int i;
+ for (i = 0; !citer.end() && i < 3; ++citer, ++i) {
+ JSONObj *o = *citer;
+ v.push_back(o->get_data());
+ }
+ if (i != 3 || !citer.end()) { /* we expect exactly 3 arguments here */
+ err_msg = "Bad condition array, expecting 3 arguments";
+ return -EINVAL;
+ }
+
+ int r = add_condition(v[0], v[1], v[2], err_msg);
+ if (r < 0)
+ return r;
+ } else if (!citer.end()) {
+ JSONObj *c = *citer;
+ dout(20) << "adding simple_check: " << c->get_name() << " : " << c->get_data() << dendl;
+
+ add_simple_check(c->get_name(), c->get_data());
+ } else {
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
diff --git a/src/rgw/rgw_policy_s3.h b/src/rgw/rgw_policy_s3.h
new file mode 100644
index 00000000..9768055d
--- /dev/null
+++ b/src/rgw/rgw_policy_s3.h
@@ -0,0 +1,59 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_POLICY_H
+#define CEPH_RGW_POLICY_H
+
+#include <limits.h>
+
+#include <map>
+#include <list>
+#include <string>
+
+#include "include/utime.h"
+
+#include "rgw_string.h"
+
+
+class RGWPolicyEnv {
+ std::map<std::string, std::string, ltstr_nocase> vars;
+
+public:
+ void add_var(const string& name, const string& value);
+ bool get_var(const string& name, string& val);
+ bool get_value(const string& s, string& val, std::map<std::string, bool, ltstr_nocase>& checked_vars);
+ bool match_policy_vars(map<string, bool, ltstr_nocase>& policy_vars, string& err_msg);
+};
+
+class RGWPolicyCondition;
+
+
+class RGWPolicy {
+ uint64_t expires;
+ string expiration_str;
+ std::list<RGWPolicyCondition *> conditions;
+ std::list<pair<std::string, std::string> > var_checks;
+ std::map<std::string, bool, ltstr_nocase> checked_vars;
+
+public:
+ off_t min_length;
+ off_t max_length;
+
+ RGWPolicy() : expires(0), min_length(0), max_length(LLONG_MAX) {}
+ ~RGWPolicy();
+
+ int set_expires(const string& e);
+
+ void set_var_checked(const std::string& var) {
+ checked_vars[var] = true;
+ }
+
+ int add_condition(const std::string& op, const std::string& first, const std::string& second, string& err_msg);
+ void add_simple_check(const std::string& var, const std::string& value) {
+ var_checks.push_back(pair<string, string>(var, value));
+ }
+
+ int check(RGWPolicyEnv *env, string& err_msg);
+ int from_json(bufferlist& bl, string& err_msg);
+};
+#endif
diff --git a/src/rgw/rgw_process.cc b/src/rgw/rgw_process.cc
new file mode 100644
index 00000000..ad43b5d3
--- /dev/null
+++ b/src/rgw/rgw_process.cc
@@ -0,0 +1,323 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/errno.h"
+#include "common/Throttle.h"
+#include "common/WorkQueue.h"
+#include "include/scope_guard.h"
+
+#include "rgw_rados.h"
+#include "rgw_dmclock_scheduler.h"
+#include "rgw_rest.h"
+#include "rgw_frontend.h"
+#include "rgw_request.h"
+#include "rgw_process.h"
+#include "rgw_loadgen.h"
+#include "rgw_client_io.h"
+#include "rgw_opa.h"
+#include "rgw_perf_counters.h"
+
+#include "services/svc_zone_utils.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+using rgw::dmclock::Scheduler;
+
+void RGWProcess::RGWWQ::_dump_queue()
+{
+ if (!g_conf()->subsys.should_gather<ceph_subsys_rgw, 20>()) {
+ return;
+ }
+ deque<RGWRequest *>::iterator iter;
+ if (process->m_req_queue.empty()) {
+ dout(20) << "RGWWQ: empty" << dendl;
+ return;
+ }
+ dout(20) << "RGWWQ:" << dendl;
+ for (iter = process->m_req_queue.begin();
+ iter != process->m_req_queue.end(); ++iter) {
+ dout(20) << "req: " << hex << *iter << dec << dendl;
+ }
+} /* RGWProcess::RGWWQ::_dump_queue */
+
+auto schedule_request(Scheduler *scheduler, req_state *s, RGWOp *op)
+{
+ using rgw::dmclock::SchedulerCompleter;
+ if (!scheduler)
+ return std::make_pair(0,SchedulerCompleter{});
+
+ const auto client = op->dmclock_client();
+ const auto cost = op->dmclock_cost();
+ ldpp_dout(op,10) << "scheduling with dmclock client=" << static_cast<int>(client)
+ << " cost=" << cost << dendl;
+ return scheduler->schedule_request(client, {},
+ req_state::Clock::to_double(s->time),
+ cost,
+ s->yield);
+}
+
+bool RGWProcess::RGWWQ::_enqueue(RGWRequest* req) {
+ process->m_req_queue.push_back(req);
+ perfcounter->inc(l_rgw_qlen);
+ dout(20) << "enqueued request req=" << hex << req << dec << dendl;
+ _dump_queue();
+ return true;
+}
+
+RGWRequest* RGWProcess::RGWWQ::_dequeue() {
+ if (process->m_req_queue.empty())
+ return NULL;
+ RGWRequest *req = process->m_req_queue.front();
+ process->m_req_queue.pop_front();
+ dout(20) << "dequeued request req=" << hex << req << dec << dendl;
+ _dump_queue();
+ perfcounter->inc(l_rgw_qlen, -1);
+ return req;
+}
+
+void RGWProcess::RGWWQ::_process(RGWRequest *req, ThreadPool::TPHandle &) {
+ perfcounter->inc(l_rgw_qactive);
+ process->handle_request(req);
+ process->req_throttle.put(1);
+ perfcounter->inc(l_rgw_qactive, -1);
+}
+
+int rgw_process_authenticated(RGWHandler_REST * const handler,
+ RGWOp *& op,
+ RGWRequest * const req,
+ req_state * const s,
+ const bool skip_retarget)
+{
+ ldpp_dout(op, 2) << "init permissions" << dendl;
+ int ret = handler->init_permissions(op);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /**
+ * Only some accesses support website mode, and website mode does NOT apply
+ * if you are using the REST endpoint either (ergo, no authenticated access)
+ */
+ if (! skip_retarget) {
+ ldpp_dout(op, 2) << "recalculating target" << dendl;
+ ret = handler->retarget(op, &op);
+ if (ret < 0) {
+ return ret;
+ }
+ req->op = op;
+ } else {
+ ldpp_dout(op, 2) << "retargeting skipped because of SubOp mode" << dendl;
+ }
+
+ /* If necessary extract object ACL and put them into req_state. */
+ ldpp_dout(op, 2) << "reading permissions" << dendl;
+ ret = handler->read_permissions(op);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ldpp_dout(op, 2) << "init op" << dendl;
+ ret = op->init_processing();
+ if (ret < 0) {
+ return ret;
+ }
+
+ ldpp_dout(op, 2) << "verifying op mask" << dendl;
+ ret = op->verify_op_mask();
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Check if OPA is used to authorize requests */
+ if (s->cct->_conf->rgw_use_opa_authz) {
+ ret = rgw_opa_authorize(op, s);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ ldpp_dout(op, 2) << "verifying op permissions" << dendl;
+ ret = op->verify_permission();
+ if (ret < 0) {
+ if (s->system_request) {
+ dout(2) << "overriding permissions due to system operation" << dendl;
+ } else if (s->auth.identity->is_admin_of(s->user->user_id)) {
+ dout(2) << "overriding permissions due to admin operation" << dendl;
+ } else {
+ return ret;
+ }
+ }
+
+ ldpp_dout(op, 2) << "verifying op params" << dendl;
+ ret = op->verify_params();
+ if (ret < 0) {
+ return ret;
+ }
+
+ ldpp_dout(op, 2) << "pre-executing" << dendl;
+ op->pre_exec();
+
+ ldpp_dout(op, 2) << "executing" << dendl;
+ op->execute();
+
+ ldpp_dout(op, 2) << "completing" << dendl;
+ op->complete();
+
+ return 0;
+}
+
+int process_request(RGWRados* const store,
+ RGWREST* const rest,
+ RGWRequest* const req,
+ const std::string& frontend_prefix,
+ const rgw_auth_registry_t& auth_registry,
+ RGWRestfulIO* const client_io,
+ OpsLogSocket* const olog,
+ optional_yield yield,
+ rgw::dmclock::Scheduler *scheduler,
+ int* http_ret)
+{
+ int ret = client_io->init(g_ceph_context);
+
+ dout(1) << "====== starting new request req=" << hex << req << dec
+ << " =====" << dendl;
+ perfcounter->inc(l_rgw_req);
+
+ RGWEnv& rgw_env = client_io->get_env();
+
+ RGWUserInfo userinfo;
+
+ struct req_state rstate(g_ceph_context, &rgw_env, &userinfo, req->id);
+ struct req_state *s = &rstate;
+
+ RGWObjectCtx rados_ctx(store, s);
+ s->obj_ctx = &rados_ctx;
+
+ auto sysobj_ctx = store->svc.sysobj->init_obj_ctx();
+ s->sysobj_ctx = &sysobj_ctx;
+
+ if (ret < 0) {
+ s->cio = client_io;
+ abort_early(s, nullptr, ret, nullptr);
+ return ret;
+ }
+
+ s->req_id = store->svc.zone_utils->unique_id(req->id);
+ s->trans_id = store->svc.zone_utils->unique_trans_id(req->id);
+ s->host_id = store->host_id;
+ s->yield = yield;
+
+ ldpp_dout(s, 2) << "initializing for trans_id = " << s->trans_id << dendl;
+
+ RGWOp* op = nullptr;
+ int init_error = 0;
+ bool should_log = false;
+ RGWRESTMgr *mgr;
+ RGWHandler_REST *handler = rest->get_handler(store, s,
+ auth_registry,
+ frontend_prefix,
+ client_io, &mgr, &init_error);
+ rgw::dmclock::SchedulerCompleter c;
+ if (init_error != 0) {
+ abort_early(s, nullptr, init_error, nullptr);
+ goto done;
+ }
+ dout(10) << "handler=" << typeid(*handler).name() << dendl;
+
+ should_log = mgr->get_logging();
+
+ ldpp_dout(s, 2) << "getting op " << s->op << dendl;
+ op = handler->get_op(store);
+ if (!op) {
+ abort_early(s, NULL, -ERR_METHOD_NOT_ALLOWED, handler);
+ goto done;
+ }
+ std::tie(ret,c) = schedule_request(scheduler, s, op);
+ if (ret < 0) {
+ if (ret == -EAGAIN) {
+ ret = -ERR_RATE_LIMITED;
+ }
+ ldpp_dout(op,0) << "Scheduling request failed with " << ret << dendl;
+ abort_early(s, op, ret, handler);
+ goto done;
+ }
+ req->op = op;
+ dout(10) << "op=" << typeid(*op).name() << dendl;
+
+ s->op_type = op->get_type();
+
+ try {
+ ldpp_dout(op, 2) << "verifying requester" << dendl;
+ ret = op->verify_requester(auth_registry);
+ if (ret < 0) {
+ dout(10) << "failed to authorize request" << dendl;
+ abort_early(s, op, ret, handler);
+ goto done;
+ }
+
+ /* FIXME: remove this after switching all handlers to the new authentication
+ * infrastructure. */
+ if (nullptr == s->auth.identity) {
+ s->auth.identity = rgw::auth::transform_old_authinfo(s);
+ }
+
+ ldpp_dout(op, 2) << "normalizing buckets and tenants" << dendl;
+ ret = handler->postauth_init();
+ if (ret < 0) {
+ dout(10) << "failed to run post-auth init" << dendl;
+ abort_early(s, op, ret, handler);
+ goto done;
+ }
+
+ if (s->user->suspended) {
+ dout(10) << "user is suspended, uid=" << s->user->user_id << dendl;
+ abort_early(s, op, -ERR_USER_SUSPENDED, handler);
+ goto done;
+ }
+
+ ret = rgw_process_authenticated(handler, op, req, s);
+ if (ret < 0) {
+ abort_early(s, op, ret, handler);
+ goto done;
+ }
+ } catch (const ceph::crypto::DigestException& e) {
+ dout(0) << "authentication failed" << e.what() << dendl;
+ abort_early(s, op, -ERR_INVALID_SECRET_KEY, handler);
+ }
+
+done:
+ try {
+ client_io->complete_request();
+ } catch (rgw::io::Exception& e) {
+ dout(0) << "ERROR: client_io->complete_request() returned "
+ << e.what() << dendl;
+ }
+
+ if (should_log) {
+ rgw_log_op(store, rest, s, (op ? op->name() : "unknown"), olog);
+ }
+
+ if (http_ret != nullptr) {
+ *http_ret = s->err.http_ret;
+ }
+ int op_ret = 0;
+ if (op) {
+ op_ret = op->get_ret();
+ ldpp_dout(op, 2) << "op status=" << op_ret << dendl;
+ ldpp_dout(op, 2) << "http status=" << s->err.http_ret << dendl;
+ } else {
+ ldpp_dout(s, 2) << "http status=" << s->err.http_ret << dendl;
+ }
+ if (handler)
+ handler->put_op(op);
+ rest->put_handler(handler);
+
+ dout(1) << "====== req done req=" << hex << req << dec
+ << " op status=" << op_ret
+ << " http_status=" << s->err.http_ret
+ << " latency=" << s->time_elapsed()
+ << " ======"
+ << dendl;
+
+ return (ret < 0 ? ret : s->err.ret);
+} /* process_request */
diff --git a/src/rgw/rgw_process.h b/src/rgw/rgw_process.h
new file mode 100644
index 00000000..c3b27bd7
--- /dev/null
+++ b/src/rgw/rgw_process.h
@@ -0,0 +1,199 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RGW_PROCESS_H
+#define RGW_PROCESS_H
+
+#include "rgw_common.h"
+#include "rgw_rados.h"
+#include "rgw_acl.h"
+#include "rgw_auth_registry.h"
+#include "rgw_user.h"
+#include "rgw_op.h"
+#include "rgw_rest.h"
+
+#include "include/ceph_assert.h"
+
+#include "common/WorkQueue.h"
+#include "common/Throttle.h"
+
+#include <atomic>
+
+#if !defined(dout_subsys)
+#define dout_subsys ceph_subsys_rgw
+#define def_dout_subsys
+#endif
+
+#define dout_context g_ceph_context
+
+extern void signal_shutdown();
+
+namespace rgw::dmclock {
+ class Scheduler;
+}
+
+struct RGWProcessEnv {
+ RGWRados *store;
+ RGWREST *rest;
+ OpsLogSocket *olog;
+ int port;
+ std::string uri_prefix;
+ std::shared_ptr<rgw::auth::StrategyRegistry> auth_registry;
+};
+
+class RGWFrontendConfig;
+
+class RGWProcess {
+ deque<RGWRequest*> m_req_queue;
+protected:
+ CephContext *cct;
+ RGWRados* store;
+ rgw_auth_registry_ptr_t auth_registry;
+ OpsLogSocket* olog;
+ ThreadPool m_tp;
+ Throttle req_throttle;
+ RGWREST* rest;
+ RGWFrontendConfig* conf;
+ int sock_fd;
+ std::string uri_prefix;
+
+ struct RGWWQ : public ThreadPool::WorkQueue<RGWRequest> {
+ RGWProcess* process;
+ RGWWQ(RGWProcess* p, time_t timeout, time_t suicide_timeout, ThreadPool* tp)
+ : ThreadPool::WorkQueue<RGWRequest>("RGWWQ", timeout, suicide_timeout,
+ tp), process(p) {}
+
+ bool _enqueue(RGWRequest* req) override;
+
+ void _dequeue(RGWRequest* req) override {
+ ceph_abort();
+ }
+
+ bool _empty() override {
+ return process->m_req_queue.empty();
+ }
+
+ RGWRequest* _dequeue() override;
+
+ using ThreadPool::WorkQueue<RGWRequest>::_process;
+
+ void _process(RGWRequest *req, ThreadPool::TPHandle &) override;
+
+ void _dump_queue();
+
+ void _clear() override {
+ ceph_assert(process->m_req_queue.empty());
+ }
+ } req_wq;
+
+public:
+ RGWProcess(CephContext* const cct,
+ RGWProcessEnv* const pe,
+ const int num_threads,
+ RGWFrontendConfig* const conf)
+ : cct(cct),
+ store(pe->store),
+ auth_registry(pe->auth_registry),
+ olog(pe->olog),
+ m_tp(cct, "RGWProcess::m_tp", "tp_rgw_process", num_threads),
+ req_throttle(cct, "rgw_ops", num_threads * 2),
+ rest(pe->rest),
+ conf(conf),
+ sock_fd(-1),
+ uri_prefix(pe->uri_prefix),
+ req_wq(this, g_conf()->rgw_op_thread_timeout,
+ g_conf()->rgw_op_thread_suicide_timeout, &m_tp) {
+ }
+
+ virtual ~RGWProcess() = default;
+
+ virtual void run() = 0;
+ virtual void handle_request(RGWRequest *req) = 0;
+
+ void pause() {
+ m_tp.pause();
+ }
+
+ void unpause_with_new_config(RGWRados* const store,
+ rgw_auth_registry_ptr_t auth_registry) {
+ this->store = store;
+ this->auth_registry = std::move(auth_registry);
+ m_tp.unpause();
+ }
+
+ void close_fd() {
+ if (sock_fd >= 0) {
+ ::close(sock_fd);
+ sock_fd = -1;
+ }
+ }
+}; /* RGWProcess */
+
+class RGWFCGXProcess : public RGWProcess {
+ int max_connections;
+public:
+
+ /* have a bit more connections than threads so that requests are
+ * still accepted even if we're still processing older requests */
+ RGWFCGXProcess(CephContext* const cct,
+ RGWProcessEnv* const pe,
+ const int num_threads,
+ RGWFrontendConfig* const conf)
+ : RGWProcess(cct, pe, num_threads, conf),
+ max_connections(num_threads + (num_threads >> 3)) {
+ }
+
+ void run() override;
+ void handle_request(RGWRequest* req) override;
+};
+
+class RGWProcessControlThread : public Thread {
+ RGWProcess *pprocess;
+public:
+ explicit RGWProcessControlThread(RGWProcess *_pprocess) : pprocess(_pprocess) {}
+
+ void *entry() override {
+ pprocess->run();
+ return NULL;
+ }
+};
+
+class RGWLoadGenProcess : public RGWProcess {
+ RGWAccessKey access_key;
+public:
+ RGWLoadGenProcess(CephContext* cct, RGWProcessEnv* pe, int num_threads,
+ RGWFrontendConfig* _conf) :
+ RGWProcess(cct, pe, num_threads, _conf) {}
+ void run() override;
+ void checkpoint();
+ void handle_request(RGWRequest* req) override;
+ void gen_request(const string& method, const string& resource,
+ int content_length, std::atomic<bool>* fail_flag);
+
+ void set_access_key(RGWAccessKey& key) { access_key = key; }
+};
+/* process stream request */
+extern int process_request(RGWRados* store,
+ RGWREST* rest,
+ RGWRequest* req,
+ const std::string& frontend_prefix,
+ const rgw_auth_registry_t& auth_registry,
+ RGWRestfulIO* client_io,
+ OpsLogSocket* olog,
+ optional_yield y,
+ rgw::dmclock::Scheduler *scheduler,
+ int* http_ret = nullptr);
+
+extern int rgw_process_authenticated(RGWHandler_REST* handler,
+ RGWOp*& op,
+ RGWRequest* req,
+ req_state* s,
+ bool skip_retarget = false);
+
+#if defined(def_dout_subsys)
+#undef def_dout_subsys
+#undef dout_subsys
+#endif
+#undef dout_context
+
+#endif /* RGW_PROCESS_H */
diff --git a/src/rgw/rgw_pubsub.cc b/src/rgw/rgw_pubsub.cc
new file mode 100644
index 00000000..f3ff342f
--- /dev/null
+++ b/src/rgw/rgw_pubsub.cc
@@ -0,0 +1,872 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#include "services/svc_zone.h"
+#include "rgw_b64.h"
+#include "rgw_rados.h"
+#include "rgw_pubsub.h"
+#include "rgw_tools.h"
+#include "rgw_xml.h"
+#include "rgw_arn.h"
+#include "rgw_pubsub_push.h"
+#include "rgw_rados.h"
+#include <regex>
+#include <algorithm>
+
+#define dout_subsys ceph_subsys_rgw
+
+void set_event_id(std::string& id, const std::string& hash, const utime_t& ts) {
+ char buf[64];
+ const auto len = snprintf(buf, sizeof(buf), "%010ld.%06ld.%s", (long)ts.sec(), (long)ts.usec(), hash.c_str());
+ if (len > 0) {
+ id.assign(buf, len);
+ }
+}
+
+bool rgw_s3_key_filter::decode_xml(XMLObj* obj) {
+ XMLObjIter iter = obj->find("FilterRule");
+ XMLObj *o;
+
+ const auto throw_if_missing = true;
+ auto prefix_not_set = true;
+ auto suffix_not_set = true;
+ auto regex_not_set = true;
+ std::string name;
+
+ while ((o = iter.get_next())) {
+ RGWXMLDecoder::decode_xml("Name", name, o, throw_if_missing);
+ if (name == "prefix" && prefix_not_set) {
+ prefix_not_set = false;
+ RGWXMLDecoder::decode_xml("Value", prefix_rule, o, throw_if_missing);
+ } else if (name == "suffix" && suffix_not_set) {
+ suffix_not_set = false;
+ RGWXMLDecoder::decode_xml("Value", suffix_rule, o, throw_if_missing);
+ } else if (name == "regex" && regex_not_set) {
+ regex_not_set = false;
+ RGWXMLDecoder::decode_xml("Value", regex_rule, o, throw_if_missing);
+ } else {
+ throw RGWXMLDecoder::err("invalid/duplicate S3Key filter rule name: '" + name + "'");
+ }
+ }
+ return true;
+}
+
+void rgw_s3_key_filter::dump_xml(Formatter *f) const {
+ if (!prefix_rule.empty()) {
+ f->open_object_section("FilterRule");
+ ::encode_xml("Name", "prefix", f);
+ ::encode_xml("Value", prefix_rule, f);
+ f->close_section();
+ }
+ if (!suffix_rule.empty()) {
+ f->open_object_section("FilterRule");
+ ::encode_xml("Name", "suffix", f);
+ ::encode_xml("Value", suffix_rule, f);
+ f->close_section();
+ }
+ if (!regex_rule.empty()) {
+ f->open_object_section("FilterRule");
+ ::encode_xml("Name", "regex", f);
+ ::encode_xml("Value", regex_rule, f);
+ f->close_section();
+ }
+}
+
+bool rgw_s3_key_filter::has_content() const {
+ return !(prefix_rule.empty() && suffix_rule.empty() && regex_rule.empty());
+}
+
+bool rgw_s3_key_value_filter::decode_xml(XMLObj* obj) {
+ kvl.clear();
+ XMLObjIter iter = obj->find("FilterRule");
+ XMLObj *o;
+
+ const auto throw_if_missing = true;
+
+ std::string key;
+ std::string value;
+
+ while ((o = iter.get_next())) {
+ RGWXMLDecoder::decode_xml("Name", key, o, throw_if_missing);
+ RGWXMLDecoder::decode_xml("Value", value, o, throw_if_missing);
+ kvl.emplace(key, value);
+ }
+ return true;
+}
+
+void rgw_s3_key_value_filter::dump_xml(Formatter *f) const {
+ for (const auto& key_value : kvl) {
+ f->open_object_section("FilterRule");
+ ::encode_xml("Name", key_value.first, f);
+ ::encode_xml("Value", key_value.second, f);
+ f->close_section();
+ }
+}
+
+bool rgw_s3_key_value_filter::has_content() const {
+ return !kvl.empty();
+}
+
+bool rgw_s3_filter::decode_xml(XMLObj* obj) {
+ RGWXMLDecoder::decode_xml("S3Key", key_filter, obj);
+ RGWXMLDecoder::decode_xml("S3Metadata", metadata_filter, obj);
+ RGWXMLDecoder::decode_xml("S3Tags", tag_filter, obj);
+ return true;
+}
+
+void rgw_s3_filter::dump_xml(Formatter *f) const {
+ if (key_filter.has_content()) {
+ ::encode_xml("S3Key", key_filter, f);
+ }
+ if (metadata_filter.has_content()) {
+ ::encode_xml("S3Metadata", metadata_filter, f);
+ }
+ if (tag_filter.has_content()) {
+ ::encode_xml("S3Tags", tag_filter, f);
+ }
+}
+
+bool rgw_s3_filter::has_content() const {
+ return key_filter.has_content() ||
+ metadata_filter.has_content() ||
+ tag_filter.has_content();
+}
+
+bool match(const rgw_s3_key_filter& filter, const std::string& key) {
+ const auto key_size = key.size();
+ const auto prefix_size = filter.prefix_rule.size();
+ if (prefix_size != 0) {
+ // prefix rule exists
+ if (prefix_size > key_size) {
+ // if prefix is longer than key, we fail
+ return false;
+ }
+ if (!std::equal(filter.prefix_rule.begin(), filter.prefix_rule.end(), key.begin())) {
+ return false;
+ }
+ }
+ const auto suffix_size = filter.suffix_rule.size();
+ if (suffix_size != 0) {
+ // suffix rule exists
+ if (suffix_size > key_size) {
+ // if suffix is longer than key, we fail
+ return false;
+ }
+ if (!std::equal(filter.suffix_rule.begin(), filter.suffix_rule.end(), (key.end() - suffix_size))) {
+ return false;
+ }
+ }
+ if (!filter.regex_rule.empty()) {
+ // TODO add regex chaching in the filter
+ const std::regex base_regex(filter.regex_rule);
+ if (!std::regex_match(key, base_regex)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool match(const rgw_s3_key_value_filter& filter, const KeyValueList& kvl) {
+ // all filter pairs must exist with the same value in the object's metadata/tags
+ // object metadata/tags may include items not in the filter
+ return std::includes(kvl.begin(), kvl.end(), filter.kvl.begin(), filter.kvl.end());
+}
+
+bool match(const rgw::notify::EventTypeList& events, rgw::notify::EventType event) {
+ // if event list exists, and none of the events in the list matches the event type, filter the message
+ if (!events.empty() && std::find(events.begin(), events.end(), event) == events.end()) {
+ return false;
+ }
+ return true;
+}
+
+void do_decode_xml_obj(rgw::notify::EventTypeList& l, const string& name, XMLObj *obj) {
+ l.clear();
+
+ XMLObjIter iter = obj->find(name);
+ XMLObj *o;
+
+ while ((o = iter.get_next())) {
+ std::string val;
+ decode_xml_obj(val, o);
+ l.push_back(rgw::notify::from_string(val));
+ }
+}
+
+bool rgw_pubsub_s3_notification::decode_xml(XMLObj *obj) {
+ const auto throw_if_missing = true;
+ RGWXMLDecoder::decode_xml("Id", id, obj, throw_if_missing);
+
+ RGWXMLDecoder::decode_xml("Topic", topic_arn, obj, throw_if_missing);
+
+ RGWXMLDecoder::decode_xml("Filter", filter, obj);
+
+ do_decode_xml_obj(events, "Event", obj);
+ if (events.empty()) {
+ // if no events are provided, we assume all events
+ events.push_back(rgw::notify::ObjectCreated);
+ events.push_back(rgw::notify::ObjectRemoved);
+ }
+ return true;
+}
+
+void rgw_pubsub_s3_notification::dump_xml(Formatter *f) const {
+ ::encode_xml("Id", id, f);
+ ::encode_xml("Topic", topic_arn.c_str(), f);
+ if (filter.has_content()) {
+ ::encode_xml("Filter", filter, f);
+ }
+ for (const auto& event : events) {
+ ::encode_xml("Event", rgw::notify::to_string(event), f);
+ }
+}
+
+bool rgw_pubsub_s3_notifications::decode_xml(XMLObj *obj) {
+ do_decode_xml_obj(list, "TopicConfiguration", obj);
+ if (list.empty()) {
+ throw RGWXMLDecoder::err("at least one 'TopicConfiguration' must exist");
+ }
+ return true;
+}
+
+rgw_pubsub_s3_notification::rgw_pubsub_s3_notification(const rgw_pubsub_topic_filter& topic_filter) :
+ id(topic_filter.s3_id), events(topic_filter.events), topic_arn(topic_filter.topic.arn), filter(topic_filter.s3_filter) {}
+
+void rgw_pubsub_s3_notifications::dump_xml(Formatter *f) const {
+ do_encode_xml("NotificationConfiguration", list, "TopicConfiguration", f);
+}
+
+void rgw_pubsub_s3_record::dump(Formatter *f) const {
+ encode_json("eventVersion", eventVersion, f);
+ encode_json("eventSource", eventSource, f);
+ encode_json("awsRegion", awsRegion, f);
+ utime_t ut(eventTime);
+ encode_json("eventTime", ut, f);
+ encode_json("eventName", eventName, f);
+ {
+ Formatter::ObjectSection s(*f, "userIdentity");
+ encode_json("principalId", userIdentity, f);
+ }
+ {
+ Formatter::ObjectSection s(*f, "requestParameters");
+ encode_json("sourceIPAddress", sourceIPAddress, f);
+ }
+ {
+ Formatter::ObjectSection s(*f, "responseElements");
+ encode_json("x-amz-request-id", x_amz_request_id, f);
+ encode_json("x-amz-id-2", x_amz_id_2, f);
+ }
+ {
+ Formatter::ObjectSection s(*f, "s3");
+ encode_json("s3SchemaVersion", s3SchemaVersion, f);
+ encode_json("configurationId", configurationId, f);
+ {
+ Formatter::ObjectSection sub_s(*f, "bucket");
+ encode_json("name", bucket_name, f);
+ {
+ Formatter::ObjectSection sub_sub_s(*f, "ownerIdentity");
+ encode_json("principalId", bucket_ownerIdentity, f);
+ }
+ encode_json("arn", bucket_arn, f);
+ encode_json("id", bucket_id, f);
+ }
+ {
+ Formatter::ObjectSection sub_s(*f, "object");
+ encode_json("key", object_key, f);
+ encode_json("size", object_size, f);
+ encode_json("etag", object_etag, f);
+ encode_json("versionId", object_versionId, f);
+ encode_json("sequencer", object_sequencer, f);
+ encode_json("metadata", x_meta_map, f);
+ encode_json("tags", tags, f);
+ }
+ }
+ encode_json("eventId", id, f);
+ encode_json("opaqueData", opaque_data, f);
+}
+
+void rgw_pubsub_event::dump(Formatter *f) const
+{
+ encode_json("id", id, f);
+ encode_json("event", event_name, f);
+ utime_t ut(timestamp);
+ encode_json("timestamp", ut, f);
+ encode_json("info", info, f);
+}
+
+void rgw_pubsub_topic::dump(Formatter *f) const
+{
+ encode_json("user", user, f);
+ encode_json("name", name, f);
+ encode_json("dest", dest, f);
+ encode_json("arn", arn, f);
+ encode_json("opaqueData", opaque_data, f);
+}
+
+void rgw_pubsub_topic::dump_xml(Formatter *f) const
+{
+ encode_xml("User", user, f);
+ encode_xml("Name", name, f);
+ encode_xml("EndPoint", dest, f);
+ encode_xml("TopicArn", arn, f);
+ encode_xml("OpaqueData", opaque_data, f);
+}
+
+void encode_json(const char *name, const rgw::notify::EventTypeList& l, Formatter *f)
+{
+ f->open_array_section(name);
+ for (auto iter = l.cbegin(); iter != l.cend(); ++iter) {
+ f->dump_string("obj", rgw::notify::to_ceph_string(*iter));
+ }
+ f->close_section();
+}
+
+void rgw_pubsub_topic_filter::dump(Formatter *f) const
+{
+ encode_json("topic", topic, f);
+ encode_json("events", events, f);
+}
+
+void rgw_pubsub_topic_subs::dump(Formatter *f) const
+{
+ encode_json("topic", topic, f);
+ encode_json("subs", subs, f);
+}
+
+void rgw_pubsub_bucket_topics::dump(Formatter *f) const
+{
+ Formatter::ArraySection s(*f, "topics");
+ for (auto& t : topics) {
+ encode_json(t.first.c_str(), t.second, f);
+ }
+}
+
+void rgw_pubsub_user_topics::dump(Formatter *f) const
+{
+ Formatter::ArraySection s(*f, "topics");
+ for (auto& t : topics) {
+ encode_json(t.first.c_str(), t.second, f);
+ }
+}
+
+void rgw_pubsub_user_topics::dump_xml(Formatter *f) const
+{
+ for (auto& t : topics) {
+ encode_xml("member", t.second.topic, f);
+ }
+}
+
+void rgw_pubsub_sub_dest::dump(Formatter *f) const
+{
+ encode_json("bucket_name", bucket_name, f);
+ encode_json("oid_prefix", oid_prefix, f);
+ encode_json("push_endpoint", push_endpoint, f);
+ encode_json("push_endpoint_args", push_endpoint_args, f);
+ encode_json("push_endpoint_topic", arn_topic, f);
+}
+
+void rgw_pubsub_sub_dest::dump_xml(Formatter *f) const
+{
+ encode_xml("EndpointAddress", push_endpoint, f);
+ encode_xml("EndpointArgs", push_endpoint_args, f);
+ encode_xml("EndpointTopic", arn_topic, f);
+}
+
+void rgw_pubsub_sub_config::dump(Formatter *f) const
+{
+ encode_json("user", user, f);
+ encode_json("name", name, f);
+ encode_json("topic", topic, f);
+ encode_json("dest", dest, f);
+ encode_json("s3_id", s3_id, f);
+}
+
+
+int RGWUserPubSub::remove(const rgw_raw_obj& obj, RGWObjVersionTracker *objv_tracker)
+{
+ int ret = rgw_delete_system_obj(store, obj.pool, obj.oid, objv_tracker);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWUserPubSub::read_user_topics(rgw_pubsub_user_topics *result, RGWObjVersionTracker *objv_tracker)
+{
+ int ret = read(user_meta_obj, result, objv_tracker);
+ if (ret < 0) {
+ ldout(store->ctx(), 10) << "WARNING: failed to read topics info: ret=" << ret << dendl;
+ return ret;
+ }
+ return 0;
+}
+
+int RGWUserPubSub::write_user_topics(const rgw_pubsub_user_topics& topics, RGWObjVersionTracker *objv_tracker)
+{
+ int ret = write(user_meta_obj, topics, objv_tracker);
+ if (ret < 0 && ret != -ENOENT) {
+ ldout(store->ctx(), 1) << "ERROR: failed to write topics info: ret=" << ret << dendl;
+ return ret;
+ }
+ return 0;
+}
+
+int RGWUserPubSub::get_user_topics(rgw_pubsub_user_topics *result)
+{
+ return read_user_topics(result, nullptr);
+}
+
+int RGWUserPubSub::Bucket::read_topics(rgw_pubsub_bucket_topics *result, RGWObjVersionTracker *objv_tracker)
+{
+ int ret = ps->read(bucket_meta_obj, result, objv_tracker);
+ if (ret < 0 && ret != -ENOENT) {
+ ldout(ps->store->ctx(), 1) << "ERROR: failed to read bucket topics info: ret=" << ret << dendl;
+ return ret;
+ }
+ return 0;
+}
+
+int RGWUserPubSub::Bucket::write_topics(const rgw_pubsub_bucket_topics& topics, RGWObjVersionTracker *objv_tracker)
+{
+ int ret = ps->write(bucket_meta_obj, topics, objv_tracker);
+ if (ret < 0) {
+ ldout(ps->store->ctx(), 1) << "ERROR: failed to write bucket topics info: ret=" << ret << dendl;
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWUserPubSub::Bucket::get_topics(rgw_pubsub_bucket_topics *result)
+{
+ return read_topics(result, nullptr);
+}
+
+int RGWUserPubSub::get_topic(const string& name, rgw_pubsub_topic_subs *result)
+{
+ rgw_pubsub_user_topics topics;
+ int ret = get_user_topics(&topics);
+ if (ret < 0) {
+ ldout(store->ctx(), 1) << "ERROR: failed to read topics info: ret=" << ret << dendl;
+ return ret;
+ }
+
+ auto iter = topics.topics.find(name);
+ if (iter == topics.topics.end()) {
+ ldout(store->ctx(), 1) << "ERROR: topic not found" << dendl;
+ return -ENOENT;
+ }
+
+ *result = iter->second;
+ return 0;
+}
+
+int RGWUserPubSub::get_topic(const string& name, rgw_pubsub_topic *result)
+{
+ rgw_pubsub_user_topics topics;
+ int ret = get_user_topics(&topics);
+ if (ret < 0) {
+ ldout(store->ctx(), 1) << "ERROR: failed to read topics info: ret=" << ret << dendl;
+ return ret;
+ }
+
+ auto iter = topics.topics.find(name);
+ if (iter == topics.topics.end()) {
+ ldout(store->ctx(), 1) << "ERROR: topic not found" << dendl;
+ return -ENOENT;
+ }
+
+ *result = iter->second.topic;
+ return 0;
+}
+
+int RGWUserPubSub::Bucket::create_notification(const string& topic_name, const rgw::notify::EventTypeList& events) {
+ return create_notification(topic_name, events, std::nullopt, "");
+}
+
+int RGWUserPubSub::Bucket::create_notification(const string& topic_name, const rgw::notify::EventTypeList& events, OptionalFilter s3_filter, const std::string& notif_name) {
+ rgw_pubsub_topic_subs user_topic_info;
+ RGWRados *store = ps->store;
+
+ int ret = ps->get_topic(topic_name, &user_topic_info);
+ if (ret < 0) {
+ ldout(store->ctx(), 1) << "ERROR: failed to read topic '" << topic_name << "' info: ret=" << ret << dendl;
+ return ret;
+ }
+ ldout(store->ctx(), 20) << "successfully read topic '" << topic_name << "' info" << dendl;
+
+ RGWObjVersionTracker objv_tracker;
+ rgw_pubsub_bucket_topics bucket_topics;
+
+ ret = read_topics(&bucket_topics, &objv_tracker);
+ if (ret < 0) {
+ ldout(store->ctx(), 1) << "ERROR: failed to read topics from bucket '" <<
+ bucket.name << "': ret=" << ret << dendl;
+ return ret;
+ }
+ ldout(store->ctx(), 20) << "successfully read " << bucket_topics.topics.size() << " topics from bucket '" <<
+ bucket.name << "'" << dendl;
+
+ auto& topic_filter = bucket_topics.topics[topic_name];
+ topic_filter.topic = user_topic_info.topic;
+ topic_filter.events = events;
+ topic_filter.s3_id = notif_name;
+ if (s3_filter) {
+ topic_filter.s3_filter = *s3_filter;
+ }
+
+ ret = write_topics(bucket_topics, &objv_tracker);
+ if (ret < 0) {
+ ldout(store->ctx(), 1) << "ERROR: failed to write topics to bucket '" << bucket.name << "': ret=" << ret << dendl;
+ return ret;
+ }
+
+ ldout(store->ctx(), 20) << "successfully wrote " << bucket_topics.topics.size() << " topics to bucket '" << bucket.name << "'" << dendl;
+
+ return 0;
+}
+
+int RGWUserPubSub::Bucket::remove_notification(const string& topic_name)
+{
+ rgw_pubsub_topic_subs user_topic_info;
+ RGWRados *store = ps->store;
+
+ int ret = ps->get_topic(topic_name, &user_topic_info);
+ if (ret < 0) {
+ ldout(store->ctx(), 1) << "ERROR: failed to read topic info: ret=" << ret << dendl;
+ return ret;
+ }
+
+ RGWObjVersionTracker objv_tracker;
+ rgw_pubsub_bucket_topics bucket_topics;
+
+ ret = read_topics(&bucket_topics, &objv_tracker);
+ if (ret < 0) {
+ ldout(store->ctx(), 1) << "ERROR: failed to read bucket topics info: ret=" << ret << dendl;
+ return ret;
+ }
+
+ bucket_topics.topics.erase(topic_name);
+
+ ret = write_topics(bucket_topics, &objv_tracker);
+ if (ret < 0) {
+ ldout(store->ctx(), 1) << "ERROR: failed to write topics info: ret=" << ret << dendl;
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWUserPubSub::create_topic(const string& name) {
+ return create_topic(name, rgw_pubsub_sub_dest(), "", "");
+}
+
+int RGWUserPubSub::create_topic(const string& name, const rgw_pubsub_sub_dest& dest, const std::string& arn, const std::string& opaque_data) {
+ RGWObjVersionTracker objv_tracker;
+ rgw_pubsub_user_topics topics;
+
+ int ret = read_user_topics(&topics, &objv_tracker);
+ if (ret < 0 && ret != -ENOENT) {
+ // its not an error if not topics exist, we create one
+ ldout(store->ctx(), 1) << "ERROR: failed to read topics info: ret=" << ret << dendl;
+ return ret;
+ }
+
+ rgw_pubsub_topic_subs& new_topic = topics.topics[name];
+ new_topic.topic.user = user;
+ new_topic.topic.name = name;
+ new_topic.topic.dest = dest;
+ new_topic.topic.arn = arn;
+ new_topic.topic.opaque_data = opaque_data;
+
+ ret = write_user_topics(topics, &objv_tracker);
+ if (ret < 0) {
+ ldout(store->ctx(), 1) << "ERROR: failed to write topics info: ret=" << ret << dendl;
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWUserPubSub::remove_topic(const string& name)
+{
+ RGWObjVersionTracker objv_tracker;
+ rgw_pubsub_user_topics topics;
+
+ int ret = read_user_topics(&topics, &objv_tracker);
+ if (ret < 0 && ret != -ENOENT) {
+ ldout(store->ctx(), 1) << "ERROR: failed to read topics info: ret=" << ret << dendl;
+ return ret;
+ } else if (ret == -ENOENT) {
+ // its not an error if no topics exist, just a no-op
+ ldout(store->ctx(), 10) << "WARNING: failed to read topics info, deletion is a no-op: ret=" << ret << dendl;
+ return 0;
+ }
+
+ topics.topics.erase(name);
+
+ ret = write_user_topics(topics, &objv_tracker);
+ if (ret < 0) {
+ ldout(store->ctx(), 1) << "ERROR: failed to remove topics info: ret=" << ret << dendl;
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWUserPubSub::Sub::read_sub(rgw_pubsub_sub_config *result, RGWObjVersionTracker *objv_tracker)
+{
+ int ret = ps->read(sub_meta_obj, result, objv_tracker);
+ if (ret < 0 && ret != -ENOENT) {
+ ldout(ps->store->ctx(), 1) << "ERROR: failed to read subscription info: ret=" << ret << dendl;
+ return ret;
+ }
+ return 0;
+}
+
+int RGWUserPubSub::Sub::write_sub(const rgw_pubsub_sub_config& sub_conf, RGWObjVersionTracker *objv_tracker)
+{
+ int ret = ps->write(sub_meta_obj, sub_conf, objv_tracker);
+ if (ret < 0) {
+ ldout(ps->store->ctx(), 1) << "ERROR: failed to write subscription info: ret=" << ret << dendl;
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWUserPubSub::Sub::remove_sub(RGWObjVersionTracker *objv_tracker)
+{
+ int ret = ps->remove(sub_meta_obj, objv_tracker);
+ if (ret < 0) {
+ ldout(ps->store->ctx(), 1) << "ERROR: failed to remove subscription info: ret=" << ret << dendl;
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWUserPubSub::Sub::get_conf(rgw_pubsub_sub_config *result)
+{
+ return read_sub(result, nullptr);
+}
+
+int RGWUserPubSub::Sub::subscribe(const string& topic, const rgw_pubsub_sub_dest& dest, const std::string& s3_id)
+{
+ RGWObjVersionTracker user_objv_tracker;
+ rgw_pubsub_user_topics topics;
+ RGWRados *store = ps->store;
+
+ int ret = ps->read_user_topics(&topics, &user_objv_tracker);
+ if (ret < 0) {
+ ldout(store->ctx(), 1) << "ERROR: failed to read topics info: ret=" << ret << dendl;
+ return ret != -ENOENT ? ret : -EINVAL;
+ }
+
+ auto iter = topics.topics.find(topic);
+ if (iter == topics.topics.end()) {
+ ldout(store->ctx(), 1) << "ERROR: cannot add subscription to topic: topic not found" << dendl;
+ return -EINVAL;
+ }
+
+ auto& t = iter->second;
+
+ rgw_pubsub_sub_config sub_conf;
+
+ sub_conf.user = ps->user;
+ sub_conf.name = sub;
+ sub_conf.topic = topic;
+ sub_conf.dest = dest;
+ sub_conf.s3_id = s3_id;
+
+ t.subs.insert(sub);
+
+ ret = ps->write_user_topics(topics, &user_objv_tracker);
+ if (ret < 0) {
+ ldout(store->ctx(), 1) << "ERROR: failed to write topics info: ret=" << ret << dendl;
+ return ret;
+ }
+
+ ret = write_sub(sub_conf, nullptr);
+ if (ret < 0) {
+ ldout(store->ctx(), 1) << "ERROR: failed to write subscription info: ret=" << ret << dendl;
+ return ret;
+ }
+ return 0;
+}
+
+int RGWUserPubSub::Sub::unsubscribe(const string& _topic)
+{
+ string topic = _topic;
+ RGWObjVersionTracker sobjv_tracker;
+ RGWRados *store = ps->store;
+
+ if (topic.empty()) {
+ rgw_pubsub_sub_config sub_conf;
+ int ret = read_sub(&sub_conf, &sobjv_tracker);
+ if (ret < 0) {
+ ldout(store->ctx(), 1) << "ERROR: failed to read subscription info: ret=" << ret << dendl;
+ return ret;
+ }
+ topic = sub_conf.topic;
+ }
+
+ RGWObjVersionTracker objv_tracker;
+ rgw_pubsub_user_topics topics;
+
+ int ret = ps->read_user_topics(&topics, &objv_tracker);
+ if (ret < 0) {
+ // not an error - could be that topic was already deleted
+ ldout(store->ctx(), 10) << "WARNING: failed to read topics info: ret=" << ret << dendl;
+ } else {
+ auto iter = topics.topics.find(topic);
+ if (iter != topics.topics.end()) {
+ auto& t = iter->second;
+
+ t.subs.erase(sub);
+
+ ret = ps->write_user_topics(topics, &objv_tracker);
+ if (ret < 0) {
+ ldout(store->ctx(), 1) << "ERROR: failed to write topics info: ret=" << ret << dendl;
+ return ret;
+ }
+ }
+ }
+
+ ret = remove_sub(&sobjv_tracker);
+ if (ret < 0) {
+ ldout(store->ctx(), 1) << "ERROR: failed to delete subscription info: ret=" << ret << dendl;
+ return ret;
+ }
+ return 0;
+}
+
+template<typename EventType>
+void RGWUserPubSub::SubWithEvents<EventType>::list_events_result::dump(Formatter *f) const
+{
+ encode_json("next_marker", next_marker, f);
+ encode_json("is_truncated", is_truncated, f);
+
+ Formatter::ArraySection s(*f, EventType::json_type_plural);
+ for (auto& event : events) {
+ encode_json("", event, f);
+ }
+}
+
+template<typename EventType>
+int RGWUserPubSub::SubWithEvents<EventType>::list_events(const string& marker, int max_events)
+{
+ RGWRados *store = ps->store;
+ rgw_pubsub_sub_config sub_conf;
+ int ret = get_conf(&sub_conf);
+ if (ret < 0) {
+ ldout(store->ctx(), 1) << "ERROR: failed to read sub config: ret=" << ret << dendl;
+ return ret;
+ }
+
+ RGWBucketInfo bucket_info;
+ string tenant;
+ RGWSysObjectCtx obj_ctx(store->svc.sysobj->init_obj_ctx());
+ ret = store->get_bucket_info(obj_ctx, tenant, sub_conf.dest.bucket_name, bucket_info, nullptr, nullptr);
+ if (ret == -ENOENT) {
+ list.is_truncated = false;
+ return 0;
+ }
+ if (ret < 0) {
+ ldout(store->ctx(), 1) << "ERROR: failed to read bucket info for events bucket: bucket=" << sub_conf.dest.bucket_name << " ret=" << ret << dendl;
+ return ret;
+ }
+
+ RGWRados::Bucket target(store, bucket_info);
+ RGWRados::Bucket::List list_op(&target);
+
+ list_op.params.prefix = sub_conf.dest.oid_prefix;
+ list_op.params.marker = marker;
+
+ std::vector<rgw_bucket_dir_entry> objs;
+
+ ret = list_op.list_objects(max_events, &objs, nullptr, &list.is_truncated);
+ if (ret < 0) {
+ ldout(store->ctx(), 1) << "ERROR: failed to list bucket: bucket=" << sub_conf.dest.bucket_name << " ret=" << ret << dendl;
+ return ret;
+ }
+ if (list.is_truncated) {
+ list.next_marker = list_op.get_next_marker().name;
+ }
+
+ for (auto& obj : objs) {
+ bufferlist bl64;
+ bufferlist bl;
+ bl64.append(obj.meta.user_data);
+ try {
+ bl.decode_base64(bl64);
+ } catch (buffer::error& err) {
+ ldout(store->ctx(), 1) << "ERROR: failed to event (not a valid base64)" << dendl;
+ continue;
+ }
+ EventType event;
+
+ auto iter = bl.cbegin();
+ try {
+ decode(event, iter);
+ } catch (buffer::error& err) {
+ ldout(store->ctx(), 1) << "ERROR: failed to decode event" << dendl;
+ continue;
+ };
+
+ list.events.push_back(event);
+ }
+ return 0;
+}
+
+template<typename EventType>
+int RGWUserPubSub::SubWithEvents<EventType>::remove_event(const string& event_id)
+{
+ RGWRados *store = ps->store;
+ rgw_pubsub_sub_config sub_conf;
+ int ret = get_conf(&sub_conf);
+ if (ret < 0) {
+ ldout(store->ctx(), 1) << "ERROR: failed to read sub config: ret=" << ret << dendl;
+ return ret;
+ }
+
+ RGWBucketInfo bucket_info;
+ string tenant;
+ RGWSysObjectCtx sysobj_ctx(store->svc.sysobj->init_obj_ctx());
+ ret = store->get_bucket_info(sysobj_ctx, tenant, sub_conf.dest.bucket_name, bucket_info, nullptr, nullptr);
+ if (ret < 0) {
+ ldout(store->ctx(), 1) << "ERROR: failed to read bucket info for events bucket: bucket=" << sub_conf.dest.bucket_name << " ret=" << ret << dendl;
+ return ret;
+ }
+
+ rgw_bucket& bucket = bucket_info.bucket;
+
+ RGWObjectCtx obj_ctx(store);
+ rgw_obj obj(bucket, sub_conf.dest.oid_prefix + event_id);
+
+ obj_ctx.set_atomic(obj);
+
+ RGWRados::Object del_target(store, bucket_info, obj_ctx, obj);
+ RGWRados::Object::Delete del_op(&del_target);
+
+ del_op.params.bucket_owner = bucket_info.owner;
+ del_op.params.versioning_status = bucket_info.versioning_status();
+
+ ret = del_op.delete_obj();
+ if (ret < 0) {
+ ldout(store->ctx(), 1) << "ERROR: failed to remove event (obj=" << obj << "): ret=" << ret << dendl;
+ }
+ return 0;
+}
+
+template<typename EventType>
+void RGWUserPubSub::SubWithEvents<EventType>::dump(Formatter* f) const {
+ list.dump(f);
+}
+
+// explicit instantiation for the only two possible types
+// no need to move implementation to header
+template class RGWUserPubSub::SubWithEvents<rgw_pubsub_event>;
+template class RGWUserPubSub::SubWithEvents<rgw_pubsub_s3_record>;
+
diff --git a/src/rgw/rgw_pubsub.h b/src/rgw/rgw_pubsub.h
new file mode 100644
index 00000000..d7b1758a
--- /dev/null
+++ b/src/rgw/rgw_pubsub.h
@@ -0,0 +1,812 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#ifndef CEPH_RGW_PUBSUB_H
+#define CEPH_RGW_PUBSUB_H
+
+#include "rgw_common.h"
+#include "rgw_tools.h"
+#include "rgw_zone.h"
+#include "rgw_rados.h"
+#include "rgw_notify_event_type.h"
+#include "services/svc_sys_obj.h"
+#include <boost/container/flat_map.hpp>
+
+class XMLObj;
+
+struct rgw_s3_key_filter {
+ std::string prefix_rule;
+ std::string suffix_rule;
+ std::string regex_rule;
+
+ bool has_content() const;
+
+ bool decode_xml(XMLObj *obj);
+ void dump_xml(Formatter *f) const;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(prefix_rule, bl);
+ encode(suffix_rule, bl);
+ encode(regex_rule, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(prefix_rule, bl);
+ decode(suffix_rule, bl);
+ decode(regex_rule, bl);
+ DECODE_FINISH(bl);
+ }
+};
+WRITE_CLASS_ENCODER(rgw_s3_key_filter)
+
+using KeyValueList = boost::container::flat_map<std::string, std::string>;
+
+struct rgw_s3_key_value_filter {
+ KeyValueList kvl;
+
+ bool has_content() const;
+
+ bool decode_xml(XMLObj *obj);
+ void dump_xml(Formatter *f) const;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(kvl, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(kvl, bl);
+ DECODE_FINISH(bl);
+ }
+};
+WRITE_CLASS_ENCODER(rgw_s3_key_value_filter)
+
+struct rgw_s3_filter {
+ rgw_s3_key_filter key_filter;
+ rgw_s3_key_value_filter metadata_filter;
+ rgw_s3_key_value_filter tag_filter;
+
+ bool has_content() const;
+
+ bool decode_xml(XMLObj *obj);
+ void dump_xml(Formatter *f) const;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(2, 1, bl);
+ encode(key_filter, bl);
+ encode(metadata_filter, bl);
+ encode(tag_filter, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(2, bl);
+ decode(key_filter, bl);
+ decode(metadata_filter, bl);
+ if (struct_v >= 2) {
+ decode(tag_filter, bl);
+ }
+ DECODE_FINISH(bl);
+ }
+};
+WRITE_CLASS_ENCODER(rgw_s3_filter)
+
+using OptionalFilter = std::optional<rgw_s3_filter>;
+
+struct rgw_pubsub_topic_filter;
+/* S3 notification configuration
+ * based on: https://docs.aws.amazon.com/AmazonS3/latest/API/RESTBucketPUTnotification.html
+<NotificationConfiguration xmlns="http://s3.amazonaws.com/doc/2006-03-01/">
+ <TopicConfiguration>
+ <Filter>
+ <S3Key>
+ <FilterRule>
+ <Name>suffix</Name>
+ <Value>jpg</Value>
+ </FilterRule>
+ </S3Key>
+ <S3Metadata>
+ <FilterRule>
+ <Name></Name>
+ <Value></Value>
+ </FilterRule>
+ </S3Metadata>
+ <S3Tags>
+ <FilterRule>
+ <Name></Name>
+ <Value></Value>
+ </FilterRule>
+ </S3Tags>
+ </Filter>
+ <Id>notification1</Id>
+ <Topic>arn:aws:sns:<region>:<account>:<topic></Topic>
+ <Event>s3:ObjectCreated:*</Event>
+ <Event>s3:ObjectRemoved:*</Event>
+ </TopicConfiguration>
+</NotificationConfiguration>
+*/
+struct rgw_pubsub_s3_notification {
+ // notification id
+ std::string id;
+ // types of events
+ rgw::notify::EventTypeList events;
+ // topic ARN
+ std::string topic_arn;
+ // filter rules
+ rgw_s3_filter filter;
+
+ bool decode_xml(XMLObj *obj);
+ void dump_xml(Formatter *f) const;
+
+ rgw_pubsub_s3_notification() = default;
+ // construct from rgw_pubsub_topic_filter (used by get/list notifications)
+ explicit rgw_pubsub_s3_notification(const rgw_pubsub_topic_filter& topic_filter);
+};
+
+// return true if the key matches the prefix/suffix/regex rules of the key filter
+bool match(const rgw_s3_key_filter& filter, const std::string& key);
+// return true if the key matches the metadata/tags rules of the metadata/tags filter
+bool match(const rgw_s3_key_value_filter& filter, const KeyValueList& kvl);
+// return true if the event type matches (equal or contained in) one of the events in the list
+bool match(const rgw::notify::EventTypeList& events, rgw::notify::EventType event);
+
+struct rgw_pubsub_s3_notifications {
+ std::list<rgw_pubsub_s3_notification> list;
+ bool decode_xml(XMLObj *obj);
+ void dump_xml(Formatter *f) const;
+};
+
+/* S3 event records structure
+ * based on: https://docs.aws.amazon.com/AmazonS3/latest/dev/notification-content-structure.html
+{
+"Records":[
+ {
+ "eventVersion":""
+ "eventSource":"",
+ "awsRegion":"",
+ "eventTime":"",
+ "eventName":"",
+ "userIdentity":{
+ "principalId":""
+ },
+ "requestParameters":{
+ "sourceIPAddress":""
+ },
+ "responseElements":{
+ "x-amz-request-id":"",
+ "x-amz-id-2":""
+ },
+ "s3":{
+ "s3SchemaVersion":"1.0",
+ "configurationId":"",
+ "bucket":{
+ "name":"",
+ "ownerIdentity":{
+ "principalId":""
+ },
+ "arn":""
+ "id": ""
+ },
+ "object":{
+ "key":"",
+ "size": ,
+ "eTag":"",
+ "versionId":"",
+ "sequencer": "",
+ "metadata": ""
+ "tags": ""
+ }
+ },
+ "eventId":"",
+ }
+]
+}*/
+
+struct rgw_pubsub_s3_record {
+ constexpr static const char* const json_type_plural = "Records";
+ std::string eventVersion = "2.2";
+ // aws:s3
+ std::string eventSource = "ceph:s3";
+ // zonegroup
+ std::string awsRegion;
+ // time of the request
+ ceph::real_time eventTime;
+ // type of the event
+ std::string eventName;
+ // user that sent the request
+ std::string userIdentity;
+ // IP address of source of the request (not implemented)
+ std::string sourceIPAddress;
+ // request ID (not implemented)
+ std::string x_amz_request_id;
+ // radosgw that received the request
+ std::string x_amz_id_2;
+ std::string s3SchemaVersion = "1.0";
+ // ID received in the notification request
+ std::string configurationId;
+ // bucket name
+ std::string bucket_name;
+ // bucket owner
+ std::string bucket_ownerIdentity;
+ // bucket ARN
+ std::string bucket_arn;
+ // object key
+ std::string object_key;
+ // object size
+ uint64_t object_size = 0;
+ // object etag
+ std::string object_etag;
+ // object version id bucket is versioned
+ std::string object_versionId;
+ // hexadecimal value used to determine event order for specific key
+ std::string object_sequencer;
+ // this is an rgw extension (not S3 standard)
+ // used to store a globally unique identifier of the event
+ // that could be used for acking or any other identification of the event
+ std::string id;
+ // this is an rgw extension holding the internal bucket id
+ std::string bucket_id;
+ // meta data
+ KeyValueList x_meta_map;
+ // tags
+ KeyValueList tags;
+ // opaque data received from the topic
+ // could be used to identify the gateway
+ std::string opaque_data;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(4, 1, bl);
+ encode(eventVersion, bl);
+ encode(eventSource, bl);
+ encode(awsRegion, bl);
+ encode(eventTime, bl);
+ encode(eventName, bl);
+ encode(userIdentity, bl);
+ encode(sourceIPAddress, bl);
+ encode(x_amz_request_id, bl);
+ encode(x_amz_id_2, bl);
+ encode(s3SchemaVersion, bl);
+ encode(configurationId, bl);
+ encode(bucket_name, bl);
+ encode(bucket_ownerIdentity, bl);
+ encode(bucket_arn, bl);
+ encode(object_key, bl);
+ encode(object_size, bl);
+ encode(object_etag, bl);
+ encode(object_versionId, bl);
+ encode(object_sequencer, bl);
+ encode(id, bl);
+ encode(bucket_id, bl);
+ encode(x_meta_map, bl);
+ encode(tags, bl);
+ encode(opaque_data, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(4, bl);
+ decode(eventVersion, bl);
+ decode(eventSource, bl);
+ decode(awsRegion, bl);
+ decode(eventTime, bl);
+ decode(eventName, bl);
+ decode(userIdentity, bl);
+ decode(sourceIPAddress, bl);
+ decode(x_amz_request_id, bl);
+ decode(x_amz_id_2, bl);
+ decode(s3SchemaVersion, bl);
+ decode(configurationId, bl);
+ decode(bucket_name, bl);
+ decode(bucket_ownerIdentity, bl);
+ decode(bucket_arn, bl);
+ decode(object_key, bl);
+ decode(object_size, bl);
+ decode(object_etag, bl);
+ decode(object_versionId, bl);
+ decode(object_sequencer, bl);
+ decode(id, bl);
+ if (struct_v >= 2) {
+ decode(bucket_id, bl);
+ decode(x_meta_map, bl);
+ }
+ if (struct_v >= 3) {
+ decode(tags, bl);
+ }
+ if (struct_v >= 4) {
+ decode(opaque_data, bl);
+ }
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const;
+};
+WRITE_CLASS_ENCODER(rgw_pubsub_s3_record)
+
+struct rgw_pubsub_event {
+ constexpr static const char* const json_type_plural = "events";
+ std::string id;
+ std::string event_name;
+ std::string source;
+ ceph::real_time timestamp;
+ JSONFormattable info;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(id, bl);
+ encode(event_name, bl);
+ encode(source, bl);
+ encode(timestamp, bl);
+ encode(info, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(id, bl);
+ decode(event_name, bl);
+ decode(source, bl);
+ decode(timestamp, bl);
+ decode(info, bl);
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const;
+};
+WRITE_CLASS_ENCODER(rgw_pubsub_event)
+
+// settign a unique ID for an event/record based on object hash and timestamp
+void set_event_id(std::string& id, const std::string& hash, const utime_t& ts);
+
+struct rgw_pubsub_sub_dest {
+ std::string bucket_name;
+ std::string oid_prefix;
+ std::string push_endpoint;
+ std::string push_endpoint_args;
+ std::string arn_topic;
+ bool stored_secret = false;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(4, 1, bl);
+ encode(bucket_name, bl);
+ encode(oid_prefix, bl);
+ encode(push_endpoint, bl);
+ encode(push_endpoint_args, bl);
+ encode(arn_topic, bl);
+ encode(stored_secret, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(4, bl);
+ decode(bucket_name, bl);
+ decode(oid_prefix, bl);
+ decode(push_endpoint, bl);
+ if (struct_v >= 2) {
+ decode(push_endpoint_args, bl);
+ }
+ if (struct_v >= 3) {
+ decode(arn_topic, bl);
+ }
+ if (struct_v >= 4) {
+ decode(stored_secret, bl);
+ }
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const;
+ void dump_xml(Formatter *f) const;
+};
+WRITE_CLASS_ENCODER(rgw_pubsub_sub_dest)
+
+struct rgw_pubsub_sub_config {
+ rgw_user user;
+ std::string name;
+ std::string topic;
+ rgw_pubsub_sub_dest dest;
+ std::string s3_id;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(2, 1, bl);
+ encode(user, bl);
+ encode(name, bl);
+ encode(topic, bl);
+ encode(dest, bl);
+ encode(s3_id, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(2, bl);
+ decode(user, bl);
+ decode(name, bl);
+ decode(topic, bl);
+ decode(dest, bl);
+ if (struct_v >= 2) {
+ decode(s3_id, bl);
+ }
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const;
+};
+WRITE_CLASS_ENCODER(rgw_pubsub_sub_config)
+
+struct rgw_pubsub_topic {
+ rgw_user user;
+ std::string name;
+ rgw_pubsub_sub_dest dest;
+ std::string arn;
+ std::string opaque_data;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(3, 1, bl);
+ encode(user, bl);
+ encode(name, bl);
+ encode(dest, bl);
+ encode(arn, bl);
+ encode(opaque_data, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(3, bl);
+ decode(user, bl);
+ decode(name, bl);
+ if (struct_v >= 2) {
+ decode(dest, bl);
+ decode(arn, bl);
+ }
+ if (struct_v >= 3) {
+ decode(opaque_data, bl);
+ }
+ DECODE_FINISH(bl);
+ }
+
+ string to_str() const {
+ return user.to_str() + "/" + name;
+ }
+
+ void dump(Formatter *f) const;
+ void dump_xml(Formatter *f) const;
+
+ bool operator<(const rgw_pubsub_topic& t) const {
+ return to_str().compare(t.to_str());
+ }
+};
+WRITE_CLASS_ENCODER(rgw_pubsub_topic)
+
+struct rgw_pubsub_topic_subs {
+ rgw_pubsub_topic topic;
+ std::set<std::string> subs;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(topic, bl);
+ encode(subs, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(topic, bl);
+ decode(subs, bl);
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const;
+};
+WRITE_CLASS_ENCODER(rgw_pubsub_topic_subs)
+
+struct rgw_pubsub_topic_filter {
+ rgw_pubsub_topic topic;
+ rgw::notify::EventTypeList events;
+ std::string s3_id;
+ rgw_s3_filter s3_filter;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(3, 1, bl);
+ encode(topic, bl);
+ // events are stored as a vector of strings
+ std::vector<std::string> tmp_events;
+ const auto converter = s3_id.empty() ? rgw::notify::to_ceph_string : rgw::notify::to_string;
+ std::transform(events.begin(), events.end(), std::back_inserter(tmp_events), converter);
+ encode(tmp_events, bl);
+ encode(s3_id, bl);
+ encode(s3_filter, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(3, bl);
+ decode(topic, bl);
+ // events are stored as a vector of strings
+ events.clear();
+ std::vector<std::string> tmp_events;
+ decode(tmp_events, bl);
+ std::transform(tmp_events.begin(), tmp_events.end(), std::back_inserter(events), rgw::notify::from_string);
+ if (struct_v >= 2) {
+ decode(s3_id, bl);
+ }
+ if (struct_v >= 3) {
+ decode(s3_filter, bl);
+ }
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const;
+};
+WRITE_CLASS_ENCODER(rgw_pubsub_topic_filter)
+
+struct rgw_pubsub_bucket_topics {
+ std::map<std::string, rgw_pubsub_topic_filter> topics;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(topics, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(topics, bl);
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const;
+};
+WRITE_CLASS_ENCODER(rgw_pubsub_bucket_topics)
+
+struct rgw_pubsub_user_topics {
+ std::map<std::string, rgw_pubsub_topic_subs> topics;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(topics, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(topics, bl);
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const;
+ void dump_xml(Formatter *f) const;
+};
+WRITE_CLASS_ENCODER(rgw_pubsub_user_topics)
+
+static std::string pubsub_user_oid_prefix = "pubsub.user.";
+
+class RGWUserPubSub
+{
+ friend class Bucket;
+
+ RGWRados *store;
+ rgw_user user;
+ RGWSysObjectCtx obj_ctx;
+
+ rgw_raw_obj user_meta_obj;
+
+ std::string user_meta_oid() const {
+ return pubsub_user_oid_prefix + user.to_str();
+ }
+
+ std::string bucket_meta_oid(const rgw_bucket& bucket) const {
+ return pubsub_user_oid_prefix + user.to_str() + ".bucket." + bucket.name + "/" + bucket.bucket_id;
+ }
+
+ std::string sub_meta_oid(const string& name) const {
+ return pubsub_user_oid_prefix + user.to_str() + ".sub." + name;
+ }
+
+ template <class T>
+ int read(const rgw_raw_obj& obj, T *data, RGWObjVersionTracker *objv_tracker);
+
+ template <class T>
+ int write(const rgw_raw_obj& obj, const T& info, RGWObjVersionTracker *obj_tracker);
+
+ int remove(const rgw_raw_obj& obj, RGWObjVersionTracker *objv_tracker);
+
+ int read_user_topics(rgw_pubsub_user_topics *result, RGWObjVersionTracker *objv_tracker);
+ int write_user_topics(const rgw_pubsub_user_topics& topics, RGWObjVersionTracker *objv_tracker);
+
+public:
+ RGWUserPubSub(RGWRados *_store, const rgw_user& _user) : store(_store),
+ user(_user),
+ obj_ctx(store->svc.sysobj->init_obj_ctx()) {
+ get_user_meta_obj(&user_meta_obj);
+ }
+
+ class Bucket {
+ friend class RGWUserPubSub;
+ RGWUserPubSub *ps;
+ rgw_bucket bucket;
+ rgw_raw_obj bucket_meta_obj;
+
+ // read the list of topics associated with a bucket and populate into result
+ // use version tacker to enforce atomicity between read/write
+ // return 0 on success or if no topic was associated with the bucket, error code otherwise
+ int read_topics(rgw_pubsub_bucket_topics *result, RGWObjVersionTracker *objv_tracker);
+ // set the list of topics associated with a bucket
+ // use version tacker to enforce atomicity between read/write
+ // return 0 on success, error code otherwise
+ int write_topics(const rgw_pubsub_bucket_topics& topics, RGWObjVersionTracker *objv_tracker);
+ public:
+ Bucket(RGWUserPubSub *_ps, const rgw_bucket& _bucket) : ps(_ps), bucket(_bucket) {
+ ps->get_bucket_meta_obj(bucket, &bucket_meta_obj);
+ }
+
+ // read the list of topics associated with a bucket and populate into result
+ // return 0 on success or if no topic was associated with the bucket, error code otherwise
+ int get_topics(rgw_pubsub_bucket_topics *result);
+ // adds a topic + filter (event list, and possibly name metadata or tags filters) to a bucket
+ // assigning a notification name is optional (needed for S3 compatible notifications)
+ // if the topic already exist on the bucket, the filter event list may be updated
+ // for S3 compliant notifications the version with: s3_filter and notif_name should be used
+ // return -ENOENT if the topic does not exists
+ // return 0 on success, error code otherwise
+ int create_notification(const string& topic_name, const rgw::notify::EventTypeList& events);
+ int create_notification(const string& topic_name, const rgw::notify::EventTypeList& events, OptionalFilter s3_filter, const std::string& notif_name);
+ // remove a topic and filter from bucket
+ // if the topic does not exists on the bucket it is a no-op (considered success)
+ // return -ENOENT if the topic does not exists
+ // return 0 on success, error code otherwise
+ int remove_notification(const string& topic_name);
+ };
+
+ // base class for subscription
+ class Sub {
+ friend class RGWUserPubSub;
+ protected:
+ RGWUserPubSub* const ps;
+ const std::string sub;
+ rgw_raw_obj sub_meta_obj;
+
+ int read_sub(rgw_pubsub_sub_config *result, RGWObjVersionTracker *objv_tracker);
+ int write_sub(const rgw_pubsub_sub_config& sub_conf, RGWObjVersionTracker *objv_tracker);
+ int remove_sub(RGWObjVersionTracker *objv_tracker);
+ public:
+ Sub(RGWUserPubSub *_ps, const std::string& _sub) : ps(_ps), sub(_sub) {
+ ps->get_sub_meta_obj(sub, &sub_meta_obj);
+ }
+
+ virtual ~Sub() = default;
+
+ int subscribe(const string& topic_name, const rgw_pubsub_sub_dest& dest, const std::string& s3_id="");
+ int unsubscribe(const string& topic_name);
+ int get_conf(rgw_pubsub_sub_config* result);
+
+ static const int DEFAULT_MAX_EVENTS = 100;
+ // followint virtual methods should only be called in derived
+ virtual int list_events(const string& marker, int max_events) {ceph_assert(false);}
+ virtual int remove_event(const string& event_id) {ceph_assert(false);}
+ virtual void dump(Formatter* f) const {ceph_assert(false);}
+ };
+
+ // subscription with templated list of events to support both S3 compliant and Ceph specific events
+ template<typename EventType>
+ class SubWithEvents : public Sub {
+ private:
+ struct list_events_result {
+ std::string next_marker;
+ bool is_truncated{false};
+ void dump(Formatter *f) const;
+ std::vector<EventType> events;
+ } list;
+
+ public:
+ SubWithEvents(RGWUserPubSub *_ps, const string& _sub) : Sub(_ps, _sub) {}
+
+ virtual ~SubWithEvents() = default;
+
+ int list_events(const string& marker, int max_events) override;
+ int remove_event(const string& event_id) override;
+ void dump(Formatter* f) const override;
+ };
+
+ using BucketRef = std::shared_ptr<Bucket>;
+ using SubRef = std::shared_ptr<Sub>;
+
+ BucketRef get_bucket(const rgw_bucket& bucket) {
+ return std::make_shared<Bucket>(this, bucket);
+ }
+
+ SubRef get_sub(const string& sub) {
+ return std::make_shared<Sub>(this, sub);
+ }
+
+ SubRef get_sub_with_events(const string& sub) {
+ auto tmpsub = Sub(this, sub);
+ rgw_pubsub_sub_config conf;
+ if (tmpsub.get_conf(&conf) < 0) {
+ return nullptr;
+ }
+ if (conf.s3_id.empty()) {
+ return std::make_shared<SubWithEvents<rgw_pubsub_event>>(this, sub);
+ }
+ return std::make_shared<SubWithEvents<rgw_pubsub_s3_record>>(this, sub);
+ }
+
+ void get_user_meta_obj(rgw_raw_obj *obj) const {
+ *obj = rgw_raw_obj(store->svc.zone->get_zone_params().log_pool, user_meta_oid());
+ }
+
+ void get_bucket_meta_obj(const rgw_bucket& bucket, rgw_raw_obj *obj) const {
+ *obj = rgw_raw_obj(store->svc.zone->get_zone_params().log_pool, bucket_meta_oid(bucket));
+ }
+
+ void get_sub_meta_obj(const string& name, rgw_raw_obj *obj) const {
+ *obj = rgw_raw_obj(store->svc.zone->get_zone_params().log_pool, sub_meta_oid(name));
+ }
+
+ // get all topics defined for the user and populate them into "result"
+ // return 0 on success or if no topics exist, error code otherwise
+ int get_user_topics(rgw_pubsub_user_topics *result);
+ // get a topic with its subscriptions by its name and populate it into "result"
+ // return -ENOENT if the topic does not exists
+ // return 0 on success, error code otherwise
+ int get_topic(const string& name, rgw_pubsub_topic_subs *result);
+ // get a topic with by its name and populate it into "result"
+ // return -ENOENT if the topic does not exists
+ // return 0 on success, error code otherwise
+ int get_topic(const string& name, rgw_pubsub_topic *result);
+ // create a topic with a name only
+ // if the topic already exists it is a no-op (considered success)
+ // return 0 on success, error code otherwise
+ int create_topic(const string& name);
+ // create a topic with push destination information and ARN
+ // if the topic already exists the destination and ARN values may be updated (considered succsess)
+ // return 0 on success, error code otherwise
+ int create_topic(const string& name, const rgw_pubsub_sub_dest& dest, const std::string& arn, const std::string& opaque_data);
+ // remove a topic according to its name
+ // if the topic does not exists it is a no-op (considered success)
+ // return 0 on success, error code otherwise
+ int remove_topic(const string& name);
+};
+
+template <class T>
+int RGWUserPubSub::read(const rgw_raw_obj& obj, T *result, RGWObjVersionTracker *objv_tracker)
+{
+ bufferlist bl;
+ int ret = rgw_get_system_obj(store, obj_ctx,
+ obj.pool, obj.oid,
+ bl,
+ objv_tracker,
+ nullptr, nullptr, nullptr);
+ if (ret < 0) {
+ return ret;
+ }
+
+ auto iter = bl.cbegin();
+ try {
+ decode(*result, iter);
+ } catch (buffer::error& err) {
+ return -EIO;
+ }
+
+ return 0;
+}
+
+template <class T>
+int RGWUserPubSub::write(const rgw_raw_obj& obj, const T& info, RGWObjVersionTracker *objv_tracker)
+{
+ bufferlist bl;
+ encode(info, bl);
+
+ int ret = rgw_put_system_obj(store, obj.pool, obj.oid,
+ bl, false, objv_tracker,
+ real_time());
+ if (ret < 0) {
+ return ret;
+ }
+
+ obj_ctx.invalidate(const_cast<rgw_raw_obj&>(obj));
+ return 0;
+}
+
+#endif
diff --git a/src/rgw/rgw_pubsub_push.cc b/src/rgw/rgw_pubsub_push.cc
new file mode 100644
index 00000000..a1719d9f
--- /dev/null
+++ b/src/rgw/rgw_pubsub_push.cc
@@ -0,0 +1,749 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "rgw_pubsub_push.h"
+#include <string>
+#include <sstream>
+#include <algorithm>
+#include "include/buffer_fwd.h"
+#include "common/Formatter.h"
+#include "common/async/completion.h"
+#include "rgw_common.h"
+#include "rgw_data_sync.h"
+#include "rgw_pubsub.h"
+#include "acconfig.h"
+#ifdef WITH_RADOSGW_AMQP_ENDPOINT
+#include "rgw_amqp.h"
+#endif
+#ifdef WITH_RADOSGW_KAFKA_ENDPOINT
+#include "rgw_kafka.h"
+#endif
+#include <boost/asio/yield.hpp>
+#include <boost/algorithm/string.hpp>
+#include <functional>
+#include "rgw_perf_counters.h"
+
+using namespace rgw;
+
+template<typename EventType>
+std::string json_format_pubsub_event(const EventType& event) {
+ std::stringstream ss;
+ JSONFormatter f(false);
+ {
+ Formatter::ObjectSection s(f, EventType::json_type_plural);
+ {
+ Formatter::ArraySection s(f, EventType::json_type_plural);
+ encode_json("", event, &f);
+ }
+ }
+ f.flush(ss);
+ return ss.str();
+}
+
+class RGWPubSubHTTPEndpoint : public RGWPubSubEndpoint {
+private:
+ const std::string endpoint;
+ std::string str_ack_level;
+ typedef unsigned ack_level_t;
+ ack_level_t ack_level; // TODO: not used for now
+ bool verify_ssl;
+ static const ack_level_t ACK_LEVEL_ANY = 0;
+ static const ack_level_t ACK_LEVEL_NON_ERROR = 1;
+
+ // PostCR implements async execution of RGWPostHTTPData via coroutine
+ class PostCR : public RGWPostHTTPData, public RGWSimpleCoroutine {
+ private:
+ RGWDataSyncEnv* const sync_env;
+ bufferlist read_bl;
+ const ack_level_t ack_level;
+
+ public:
+ PostCR(const std::string& _post_data,
+ RGWDataSyncEnv* _sync_env,
+ const std::string& endpoint,
+ ack_level_t _ack_level,
+ bool verify_ssl) :
+ RGWPostHTTPData(_sync_env->cct, "POST", endpoint, &read_bl, verify_ssl),
+ RGWSimpleCoroutine(_sync_env->cct),
+ sync_env(_sync_env),
+ ack_level (_ack_level) {
+ // ctor also set the data to send
+ set_post_data(_post_data);
+ set_send_length(_post_data.length());
+ }
+
+ // send message to endpoint
+ int send_request() override {
+ init_new_io(this);
+ const auto rc = sync_env->http_manager->add_request(this);
+ if (rc < 0) {
+ return rc;
+ }
+ if (perfcounter) perfcounter->inc(l_rgw_pubsub_push_pending);
+ return 0;
+ }
+
+ // wait for reply
+ int request_complete() override {
+ if (perfcounter) perfcounter->dec(l_rgw_pubsub_push_pending);
+ if (ack_level == ACK_LEVEL_ANY) {
+ return 0;
+ } else if (ack_level == ACK_LEVEL_NON_ERROR) {
+ // TODO check result code to be non-error
+ } else {
+ // TODO: check that result code == ack_level
+ }
+ return -1;
+ }
+ };
+
+public:
+ RGWPubSubHTTPEndpoint(const std::string& _endpoint,
+ const RGWHTTPArgs& args) : endpoint(_endpoint) {
+ bool exists;
+
+ str_ack_level = args.get("http-ack-level", &exists);
+ if (!exists || str_ack_level == "any") {
+ // "any" is default
+ ack_level = ACK_LEVEL_ANY;
+ } else if (str_ack_level == "non-error") {
+ ack_level = ACK_LEVEL_NON_ERROR;
+ } else {
+ ack_level = std::atoi(str_ack_level.c_str());
+ if (ack_level < 100 || ack_level >= 600) {
+ throw configuration_error("HTTP/S: invalid http-ack-level: " + str_ack_level);
+ }
+ }
+
+ auto str_verify_ssl = args.get("verify-ssl", &exists);
+ boost::algorithm::to_lower(str_verify_ssl);
+ // verify server certificate by default
+ if (!exists || str_verify_ssl == "true") {
+ verify_ssl = true;
+ } else if (str_verify_ssl == "false") {
+ verify_ssl = false;
+ } else {
+ throw configuration_error("HTTP/S: verify-ssl must be true/false, not: " + str_verify_ssl);
+ }
+ }
+
+ RGWCoroutine* send_to_completion_async(const rgw_pubsub_event& event, RGWDataSyncEnv* env) override {
+ return new PostCR(json_format_pubsub_event(event), env, endpoint, ack_level, verify_ssl);
+ }
+
+ RGWCoroutine* send_to_completion_async(const rgw_pubsub_s3_record& record, RGWDataSyncEnv* env) override {
+ return new PostCR(json_format_pubsub_event(record), env, endpoint, ack_level, verify_ssl);
+ }
+
+ int send_to_completion_async(CephContext* cct, const rgw_pubsub_s3_record& record, optional_yield y) override {
+ bufferlist read_bl;
+ RGWPostHTTPData request(cct, "POST", endpoint, &read_bl, verify_ssl);
+ const auto post_data = json_format_pubsub_event(record);
+ request.set_post_data(post_data);
+ request.set_send_length(post_data.length());
+ if (perfcounter) perfcounter->inc(l_rgw_pubsub_push_pending);
+ const auto rc = RGWHTTP::process(&request, y);
+ if (perfcounter) perfcounter->dec(l_rgw_pubsub_push_pending);
+ // TODO: use read_bl to process return code and handle according to ack level
+ return rc;
+ }
+
+ std::string to_str() const override {
+ std::string str("HTTP/S Endpoint");
+ str += "\nURI: " + endpoint;
+ str += "\nAck Level: " + str_ack_level;
+ str += (verify_ssl ? "\nverify SSL" : "\ndon't verify SSL");
+ return str;
+
+ }
+};
+
+#ifdef WITH_RADOSGW_AMQP_ENDPOINT
+class RGWPubSubAMQPEndpoint : public RGWPubSubEndpoint {
+private:
+ enum class ack_level_t {
+ None,
+ Broker,
+ Routable
+ };
+ CephContext* const cct;
+ const std::string endpoint;
+ const std::string topic;
+ const std::string exchange;
+ amqp::connection_ptr_t conn;
+ ack_level_t ack_level;
+ std::string str_ack_level;
+
+ static std::string get_exchange(const RGWHTTPArgs& args) {
+ bool exists;
+ const auto exchange = args.get("amqp-exchange", &exists);
+ if (!exists) {
+ throw configuration_error("AMQP: missing amqp-exchange");
+ }
+ return exchange;
+ }
+
+ // NoAckPublishCR implements async amqp publishing via coroutine
+ // This coroutine ends when it send the message and does not wait for an ack
+ class NoAckPublishCR : public RGWCoroutine {
+ private:
+ const std::string topic;
+ amqp::connection_ptr_t conn;
+ const std::string message;
+
+ public:
+ NoAckPublishCR(CephContext* cct,
+ const std::string& _topic,
+ amqp::connection_ptr_t& _conn,
+ const std::string& _message) :
+ RGWCoroutine(cct),
+ topic(_topic), conn(_conn), message(_message) {}
+
+ // send message to endpoint, without waiting for reply
+ int operate() override {
+ reenter(this) {
+ const auto rc = amqp::publish(conn, topic, message);
+ if (rc < 0) {
+ return set_cr_error(rc);
+ }
+ return set_cr_done();
+ }
+ return 0;
+ }
+ };
+
+ // AckPublishCR implements async amqp publishing via coroutine
+ // This coroutine ends when an ack is received from the borker
+ // note that it does not wait for an ack fron the end client
+ class AckPublishCR : public RGWCoroutine, public RGWIOProvider {
+ private:
+ const std::string topic;
+ amqp::connection_ptr_t conn;
+ const std::string message;
+ [[maybe_unused]] const ack_level_t ack_level; // TODO not used for now
+
+ public:
+ AckPublishCR(CephContext* cct,
+ const std::string& _topic,
+ amqp::connection_ptr_t& _conn,
+ const std::string& _message,
+ ack_level_t _ack_level) :
+ RGWCoroutine(cct),
+ topic(_topic), conn(_conn), message(_message), ack_level(_ack_level) {}
+
+ // send message to endpoint, waiting for reply
+ int operate() override {
+ reenter(this) {
+ yield {
+ init_new_io(this);
+ const auto rc = amqp::publish_with_confirm(conn,
+ topic,
+ message,
+ std::bind(&AckPublishCR::request_complete, this, std::placeholders::_1));
+ if (rc < 0) {
+ // failed to publish, does not wait for reply
+ return set_cr_error(rc);
+ }
+ // mark as blocked on the amqp answer
+ if (perfcounter) perfcounter->inc(l_rgw_pubsub_push_pending);
+ io_block();
+ return 0;
+ }
+ return set_cr_done();
+ }
+ return 0;
+ }
+
+ // callback invoked from the amqp manager thread when ack/nack is received
+ void request_complete(int status) {
+ ceph_assert(!is_done());
+ if (status != 0) {
+ // server replied with a nack
+ set_cr_error(status);
+ }
+ io_complete();
+ if (perfcounter) perfcounter->dec(l_rgw_pubsub_push_pending);
+ }
+
+ // TODO: why are these mandatory in RGWIOProvider?
+ void set_io_user_info(void *_user_info) override {
+ }
+
+ void *get_io_user_info() override {
+ return nullptr;
+ }
+ };
+
+public:
+ RGWPubSubAMQPEndpoint(const std::string& _endpoint,
+ const std::string& _topic,
+ const RGWHTTPArgs& args,
+ CephContext* _cct) :
+ cct(_cct),
+ endpoint(_endpoint),
+ topic(_topic),
+ exchange(get_exchange(args)),
+ conn(amqp::connect(endpoint, exchange)) {
+ if (!conn) {
+ throw configuration_error("AMQP: failed to create connection to: " + endpoint);
+ }
+ bool exists;
+ // get ack level
+ str_ack_level = args.get("amqp-ack-level", &exists);
+ if (!exists || str_ack_level == "broker") {
+ // "broker" is default
+ ack_level = ack_level_t::Broker;
+ } else if (str_ack_level == "none") {
+ ack_level = ack_level_t::None;
+ } else if (str_ack_level == "routable") {
+ ack_level = ack_level_t::Routable;
+ } else {
+ throw configuration_error("AMQP: invalid amqp-ack-level: " + str_ack_level);
+ }
+ }
+
+ RGWCoroutine* send_to_completion_async(const rgw_pubsub_event& event, RGWDataSyncEnv* env) override {
+ ceph_assert(conn);
+ if (ack_level == ack_level_t::None) {
+ return new NoAckPublishCR(cct, topic, conn, json_format_pubsub_event(event));
+ } else {
+ // TODO: currently broker and routable are the same - this will require different flags
+ // but the same mechanism
+ return new AckPublishCR(cct, topic, conn, json_format_pubsub_event(event), ack_level);
+ }
+ }
+
+ RGWCoroutine* send_to_completion_async(const rgw_pubsub_s3_record& record, RGWDataSyncEnv* env) override {
+ ceph_assert(conn);
+ if (ack_level == ack_level_t::None) {
+ return new NoAckPublishCR(cct, topic, conn, json_format_pubsub_event(record));
+ } else {
+ // TODO: currently broker and routable are the same - this will require different flags
+ // but the same mechanism
+ return new AckPublishCR(cct, topic, conn, json_format_pubsub_event(record), ack_level);
+ }
+ }
+
+ // this allows waiting untill "finish()" is called from a different thread
+ // waiting could be blocking the waiting thread or yielding, depending
+ // with compilation flag support and whether the optional_yield is set
+ class Waiter {
+ using Signature = void(boost::system::error_code);
+ using Completion = ceph::async::Completion<Signature>;
+ std::unique_ptr<Completion> completion = nullptr;
+ int ret;
+
+ mutable std::atomic<bool> done = false;
+ mutable std::mutex lock;
+ mutable std::condition_variable cond;
+
+ template <typename ExecutionContext, typename CompletionToken>
+ auto async_wait(ExecutionContext& ctx, CompletionToken&& token) {
+ boost::asio::async_completion<CompletionToken, Signature> init(token);
+ auto& handler = init.completion_handler;
+ {
+ std::unique_lock l{lock};
+ completion = Completion::create(ctx.get_executor(), std::move(handler));
+ }
+ return init.result.get();
+ }
+
+ public:
+ int wait(optional_yield y) {
+ if (done) {
+ return ret;
+ }
+#ifdef HAVE_BOOST_CONTEXT
+ if (y) {
+ auto& io_ctx = y.get_io_context();
+ auto& yield_ctx = y.get_yield_context();
+ boost::system::error_code ec;
+ async_wait(io_ctx, yield_ctx[ec]);
+ return -ec.value();
+ }
+#endif
+ std::unique_lock l(lock);
+ cond.wait(l, [this]{return (done==true);});
+ return ret;
+ }
+
+ void finish(int r) {
+ std::unique_lock l{lock};
+ ret = r;
+ done = true;
+ if (completion) {
+ boost::system::error_code ec(-ret, boost::system::system_category());
+ Completion::post(std::move(completion), ec);
+ } else {
+ cond.notify_all();
+ }
+ }
+ };
+
+ int send_to_completion_async(CephContext* cct, const rgw_pubsub_s3_record& record, optional_yield y) override {
+ ceph_assert(conn);
+ if (ack_level == ack_level_t::None) {
+ return amqp::publish(conn, topic, json_format_pubsub_event(record));
+ } else {
+ // TODO: currently broker and routable are the same - this will require different flags but the same mechanism
+ // note: dynamic allocation of Waiter is needed when this is invoked from a beast coroutine
+ auto w = std::unique_ptr<Waiter>(new Waiter);
+ const auto rc = amqp::publish_with_confirm(conn,
+ topic,
+ json_format_pubsub_event(record),
+ std::bind(&Waiter::finish, w.get(), std::placeholders::_1));
+ if (rc < 0) {
+ // failed to publish, does not wait for reply
+ return rc;
+ }
+ return w->wait(y);
+ }
+ }
+
+ std::string to_str() const override {
+ std::string str("AMQP(0.9.1) Endpoint");
+ str += "\nURI: " + endpoint;
+ str += "\nTopic: " + topic;
+ str += "\nExchange: " + exchange;
+ str += "\nAck Level: " + str_ack_level;
+ return str;
+ }
+};
+
+static const std::string AMQP_0_9_1("0-9-1");
+static const std::string AMQP_1_0("1-0");
+static const std::string AMQP_SCHEMA("amqp");
+#endif // ifdef WITH_RADOSGW_AMQP_ENDPOINT
+
+
+#ifdef WITH_RADOSGW_KAFKA_ENDPOINT
+class RGWPubSubKafkaEndpoint : public RGWPubSubEndpoint {
+private:
+ enum class ack_level_t {
+ None,
+ Broker,
+ };
+ CephContext* const cct;
+ const std::string topic;
+ kafka::connection_ptr_t conn;
+ const ack_level_t ack_level;
+
+ static bool get_verify_ssl(const RGWHTTPArgs& args) {
+ bool exists;
+ auto str_verify_ssl = args.get("verify-ssl", &exists);
+ if (!exists) {
+ // verify server certificate by default
+ return true;
+ }
+ boost::algorithm::to_lower(str_verify_ssl);
+ if (str_verify_ssl == "true") {
+ return true;
+ }
+ if (str_verify_ssl == "false") {
+ return false;
+ }
+ throw configuration_error("'verify-ssl' must be true/false, not: " + str_verify_ssl);
+ }
+
+ static bool get_use_ssl(const RGWHTTPArgs& args) {
+ bool exists;
+ auto str_use_ssl = args.get("use-ssl", &exists);
+ if (!exists) {
+ // by default ssl not used
+ return false;
+ }
+ boost::algorithm::to_lower(str_use_ssl);
+ if (str_use_ssl == "true") {
+ return true;
+ }
+ if (str_use_ssl == "false") {
+ return false;
+ }
+ throw configuration_error("'use-ssl' must be true/false, not: " + str_use_ssl);
+ }
+
+ static ack_level_t get_ack_level(const RGWHTTPArgs& args) {
+ bool exists;
+ // get ack level
+ const auto str_ack_level = args.get("kafka-ack-level", &exists);
+ if (!exists || str_ack_level == "broker") {
+ // "broker" is default
+ return ack_level_t::Broker;
+ }
+ if (str_ack_level == "none") {
+ return ack_level_t::None;
+ }
+ throw configuration_error("Kafka: invalid kafka-ack-level: " + str_ack_level);
+ }
+
+ // NoAckPublishCR implements async kafka publishing via coroutine
+ // This coroutine ends when it send the message and does not wait for an ack
+ class NoAckPublishCR : public RGWCoroutine {
+ private:
+ const std::string topic;
+ kafka::connection_ptr_t conn;
+ const std::string message;
+
+ public:
+ NoAckPublishCR(CephContext* cct,
+ const std::string& _topic,
+ kafka::connection_ptr_t& _conn,
+ const std::string& _message) :
+ RGWCoroutine(cct),
+ topic(_topic), conn(_conn), message(_message) {}
+
+ // send message to endpoint, without waiting for reply
+ int operate() override {
+ reenter(this) {
+ const auto rc = kafka::publish(conn, topic, message);
+ if (rc < 0) {
+ return set_cr_error(rc);
+ }
+ return set_cr_done();
+ }
+ return 0;
+ }
+ };
+
+ // AckPublishCR implements async kafka publishing via coroutine
+ // This coroutine ends when an ack is received from the borker
+ // note that it does not wait for an ack fron the end client
+ class AckPublishCR : public RGWCoroutine, public RGWIOProvider {
+ private:
+ const std::string topic;
+ kafka::connection_ptr_t conn;
+ const std::string message;
+
+ public:
+ AckPublishCR(CephContext* cct,
+ const std::string& _topic,
+ kafka::connection_ptr_t& _conn,
+ const std::string& _message) :
+ RGWCoroutine(cct),
+ topic(_topic), conn(_conn), message(_message) {}
+
+ // send message to endpoint, waiting for reply
+ int operate() override {
+ reenter(this) {
+ yield {
+ init_new_io(this);
+ const auto rc = kafka::publish_with_confirm(conn,
+ topic,
+ message,
+ std::bind(&AckPublishCR::request_complete, this, std::placeholders::_1));
+ if (rc < 0) {
+ // failed to publish, does not wait for reply
+ return set_cr_error(rc);
+ }
+ // mark as blocked on the kafka answer
+ if (perfcounter) perfcounter->inc(l_rgw_pubsub_push_pending);
+ io_block();
+ return 0;
+ }
+ return set_cr_done();
+ }
+ return 0;
+ }
+
+ // callback invoked from the kafka manager thread when ack/nack is received
+ void request_complete(int status) {
+ ceph_assert(!is_done());
+ if (status != 0) {
+ // server replied with a nack
+ set_cr_error(status);
+ }
+ io_complete();
+ if (perfcounter) perfcounter->dec(l_rgw_pubsub_push_pending);
+ }
+
+ // TODO: why are these mandatory in RGWIOProvider?
+ void set_io_user_info(void *_user_info) override {
+ }
+
+ void *get_io_user_info() override {
+ return nullptr;
+ }
+ };
+
+public:
+ RGWPubSubKafkaEndpoint(const std::string& _endpoint,
+ const std::string& _topic,
+ const RGWHTTPArgs& args,
+ CephContext* _cct) :
+ cct(_cct),
+ topic(_topic),
+ conn(kafka::connect(_endpoint, get_use_ssl(args), get_verify_ssl(args), args.get_optional("ca-location"))) ,
+ ack_level(get_ack_level(args)) {
+ if (!conn) {
+ throw configuration_error("Kafka: failed to create connection to: " + _endpoint);
+ }
+ }
+
+ RGWCoroutine* send_to_completion_async(const rgw_pubsub_event& event, RGWDataSyncEnv* env) override {
+ ceph_assert(conn);
+ if (ack_level == ack_level_t::None) {
+ return new NoAckPublishCR(cct, topic, conn, json_format_pubsub_event(event));
+ } else {
+ return new AckPublishCR(cct, topic, conn, json_format_pubsub_event(event));
+ }
+ }
+
+ RGWCoroutine* send_to_completion_async(const rgw_pubsub_s3_record& record, RGWDataSyncEnv* env) override {
+ ceph_assert(conn);
+ if (ack_level == ack_level_t::None) {
+ return new NoAckPublishCR(cct, topic, conn, json_format_pubsub_event(record));
+ } else {
+ return new AckPublishCR(cct, topic, conn, json_format_pubsub_event(record));
+ }
+ }
+
+ // this allows waiting untill "finish()" is called from a different thread
+ // waiting could be blocking the waiting thread or yielding, depending
+ // with compilation flag support and whether the optional_yield is set
+ class Waiter {
+ using Signature = void(boost::system::error_code);
+ using Completion = ceph::async::Completion<Signature>;
+ std::unique_ptr<Completion> completion = nullptr;
+ int ret;
+
+ mutable std::atomic<bool> done = false;
+ mutable std::mutex lock;
+ mutable std::condition_variable cond;
+
+ template <typename ExecutionContext, typename CompletionToken>
+ auto async_wait(ExecutionContext& ctx, CompletionToken&& token) {
+ boost::asio::async_completion<CompletionToken, Signature> init(token);
+ auto& handler = init.completion_handler;
+ {
+ std::unique_lock l{lock};
+ completion = Completion::create(ctx.get_executor(), std::move(handler));
+ }
+ return init.result.get();
+ }
+
+ public:
+ int wait(optional_yield y) {
+ if (done) {
+ return ret;
+ }
+#ifdef HAVE_BOOST_CONTEXT
+ if (y) {
+ auto& io_ctx = y.get_io_context();
+ auto& yield_ctx = y.get_yield_context();
+ boost::system::error_code ec;
+ async_wait(io_ctx, yield_ctx[ec]);
+ return -ec.value();
+ }
+#endif
+ std::unique_lock l(lock);
+ cond.wait(l, [this]{return (done==true);});
+ return ret;
+ }
+
+ void finish(int r) {
+ std::unique_lock l{lock};
+ ret = r;
+ done = true;
+ if (completion) {
+ boost::system::error_code ec(-ret, boost::system::system_category());
+ Completion::post(std::move(completion), ec);
+ } else {
+ cond.notify_all();
+ }
+ }
+ };
+
+ int send_to_completion_async(CephContext* cct, const rgw_pubsub_s3_record& record, optional_yield y) override {
+ ceph_assert(conn);
+ if (ack_level == ack_level_t::None) {
+ return kafka::publish(conn, topic, json_format_pubsub_event(record));
+ } else {
+ // note: dynamic allocation of Waiter is needed when this is invoked from a beast coroutine
+ auto w = std::unique_ptr<Waiter>(new Waiter);
+ const auto rc = kafka::publish_with_confirm(conn,
+ topic,
+ json_format_pubsub_event(record),
+ std::bind(&Waiter::finish, w.get(), std::placeholders::_1));
+ if (rc < 0) {
+ // failed to publish, does not wait for reply
+ return rc;
+ }
+ return w->wait(y);
+ }
+ }
+
+ std::string to_str() const override {
+ std::string str("Kafka Endpoint");
+ str += kafka::to_string(conn);
+ str += "\nTopic: " + topic;
+ return str;
+ }
+};
+
+static const std::string KAFKA_SCHEMA("kafka");
+#endif // ifdef WITH_RADOSGW_KAFKA_ENDPOINT
+
+static const std::string WEBHOOK_SCHEMA("webhook");
+static const std::string UNKNOWN_SCHEMA("unknown");
+static const std::string NO_SCHEMA("");
+
+const std::string& get_schema(const std::string& endpoint) {
+ if (endpoint.empty()) {
+ return NO_SCHEMA;
+ }
+ const auto pos = endpoint.find(':');
+ if (pos == std::string::npos) {
+ return UNKNOWN_SCHEMA;
+ }
+ const auto& schema = endpoint.substr(0,pos);
+ if (schema == "http" || schema == "https") {
+ return WEBHOOK_SCHEMA;
+#ifdef WITH_RADOSGW_AMQP_ENDPOINT
+ } else if (schema == "amqp") {
+ return AMQP_SCHEMA;
+#endif
+#ifdef WITH_RADOSGW_KAFKA_ENDPOINT
+ } else if (schema == "kafka") {
+ return KAFKA_SCHEMA;
+#endif
+ }
+ return UNKNOWN_SCHEMA;
+}
+
+RGWPubSubEndpoint::Ptr RGWPubSubEndpoint::create(const std::string& endpoint,
+ const std::string& topic,
+ const RGWHTTPArgs& args,
+ CephContext* cct) {
+ const auto& schema = get_schema(endpoint);
+ if (schema == WEBHOOK_SCHEMA) {
+ return Ptr(new RGWPubSubHTTPEndpoint(endpoint, args));
+#ifdef WITH_RADOSGW_AMQP_ENDPOINT
+ } else if (schema == AMQP_SCHEMA) {
+ bool exists;
+ std::string version = args.get("amqp-version", &exists);
+ if (!exists) {
+ version = AMQP_0_9_1;
+ }
+ if (version == AMQP_0_9_1) {
+ return Ptr(new RGWPubSubAMQPEndpoint(endpoint, topic, args, cct));
+ } else if (version == AMQP_1_0) {
+ throw configuration_error("AMQP: v1.0 not supported");
+ return nullptr;
+ } else {
+ throw configuration_error("AMQP: unknown version: " + version);
+ return nullptr;
+ }
+ } else if (schema == "amqps") {
+ throw configuration_error("AMQP: ssl not supported");
+ return nullptr;
+#endif
+#ifdef WITH_RADOSGW_KAFKA_ENDPOINT
+ } else if (schema == KAFKA_SCHEMA) {
+ return Ptr(new RGWPubSubKafkaEndpoint(endpoint, topic, args, cct));
+#endif
+ }
+
+ throw configuration_error("unknown schema in: " + endpoint);
+ return nullptr;
+}
+
diff --git a/src/rgw/rgw_pubsub_push.h b/src/rgw/rgw_pubsub_push.h
new file mode 100644
index 00000000..8cfdeb5f
--- /dev/null
+++ b/src/rgw/rgw_pubsub_push.h
@@ -0,0 +1,57 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#pragma once
+
+#include <string>
+#include <memory>
+#include <stdexcept>
+#include "include/buffer_fwd.h"
+#include "common/async/yield_context.h"
+
+// TODO the env should be used as a template parameter to differentiate the source that triggers the pushes
+class RGWDataSyncEnv;
+class RGWCoroutine;
+class RGWHTTPArgs;
+class CephContext;
+struct rgw_pubsub_event;
+struct rgw_pubsub_s3_record;
+
+// endpoint base class all endpoint - types should derive from it
+class RGWPubSubEndpoint {
+public:
+ RGWPubSubEndpoint() = default;
+ // endpoint should not be copied
+ RGWPubSubEndpoint(const RGWPubSubEndpoint&) = delete;
+ const RGWPubSubEndpoint& operator=(const RGWPubSubEndpoint&) = delete;
+
+ typedef std::unique_ptr<RGWPubSubEndpoint> Ptr;
+
+ // factory method for the actual notification endpoint
+ // derived class specific arguments are passed in http args format
+ // may throw a configuration_error if creation fails
+ static Ptr create(const std::string& endpoint, const std::string& topic, const RGWHTTPArgs& args, CephContext *cct=nullptr);
+
+ // this method is used in order to send notification (Ceph specific) and wait for completion
+ // in async manner via a coroutine when invoked in the data sync environment
+ virtual RGWCoroutine* send_to_completion_async(const rgw_pubsub_event& event, RGWDataSyncEnv* env) = 0;
+
+ // this method is used in order to send notification (S3 compliant) and wait for completion
+ // in async manner via a coroutine when invoked in the data sync environment
+ virtual RGWCoroutine* send_to_completion_async(const rgw_pubsub_s3_record& record, RGWDataSyncEnv* env) = 0;
+
+ // this method is used in order to send notification (S3 compliant) and wait for completion
+ // in async manner via a coroutine when invoked in the frontend environment
+ virtual int send_to_completion_async(CephContext* cct, const rgw_pubsub_s3_record& record, optional_yield y) = 0;
+
+ // present as string
+ virtual std::string to_str() const { return ""; }
+
+ virtual ~RGWPubSubEndpoint() = default;
+
+ // exception object for configuration error
+ struct configuration_error : public std::logic_error {
+ configuration_error(const std::string& what_arg) :
+ std::logic_error("pubsub endpoint configuration error: " + what_arg) {}
+ };
+};
+
diff --git a/src/rgw/rgw_putobj.cc b/src/rgw/rgw_putobj.cc
new file mode 100644
index 00000000..39410972
--- /dev/null
+++ b/src/rgw/rgw_putobj.cc
@@ -0,0 +1,99 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2018 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "rgw_putobj.h"
+
+namespace rgw::putobj {
+
+int ChunkProcessor::process(bufferlist&& data, uint64_t offset)
+{
+ ceph_assert(offset >= chunk.length());
+ uint64_t position = offset - chunk.length();
+
+ const bool flush = (data.length() == 0);
+ if (flush) {
+ if (chunk.length() > 0) {
+ int r = Pipe::process(std::move(chunk), position);
+ if (r < 0) {
+ return r;
+ }
+ }
+ return Pipe::process({}, offset);
+ }
+ chunk.claim_append(data);
+
+ // write each full chunk
+ while (chunk.length() >= chunk_size) {
+ bufferlist bl;
+ chunk.splice(0, chunk_size, &bl);
+
+ int r = Pipe::process(std::move(bl), position);
+ if (r < 0) {
+ return r;
+ }
+ position += chunk_size;
+ }
+ return 0;
+}
+
+
+int StripeProcessor::process(bufferlist&& data, uint64_t offset)
+{
+ ceph_assert(offset >= bounds.first);
+
+ const bool flush = (data.length() == 0);
+ if (flush) {
+ return Pipe::process({}, offset - bounds.first);
+ }
+
+ auto max = bounds.second - offset;
+ while (data.length() > max) {
+ if (max > 0) {
+ bufferlist bl;
+ data.splice(0, max, &bl);
+
+ int r = Pipe::process(std::move(bl), offset - bounds.first);
+ if (r < 0) {
+ return r;
+ }
+ offset += max;
+ }
+
+ // flush the current chunk
+ int r = Pipe::process({}, offset - bounds.first);
+ if (r < 0) {
+ return r;
+ }
+ // generate the next stripe
+ uint64_t stripe_size;
+ r = gen->next(offset, &stripe_size);
+ if (r < 0) {
+ return r;
+ }
+ ceph_assert(stripe_size > 0);
+
+ bounds.first = offset;
+ bounds.second = offset + stripe_size;
+
+ max = stripe_size;
+ }
+
+ if (data.length() == 0) { // don't flush the chunk here
+ return 0;
+ }
+ return Pipe::process(std::move(data), offset - bounds.first);
+}
+
+} // namespace rgw::putobj
diff --git a/src/rgw/rgw_putobj.h b/src/rgw/rgw_putobj.h
new file mode 100644
index 00000000..367bc5c0
--- /dev/null
+++ b/src/rgw/rgw_putobj.h
@@ -0,0 +1,79 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2018 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#pragma once
+
+#include "include/buffer.h"
+
+namespace rgw::putobj {
+
+// a simple streaming data processing abstraction
+class DataProcessor {
+ public:
+ virtual ~DataProcessor() {}
+
+ // consume a bufferlist in its entirety at the given object offset. an
+ // empty bufferlist is given to request that any buffered data be flushed,
+ // though this doesn't wait for completions
+ virtual int process(bufferlist&& data, uint64_t offset) = 0;
+};
+
+// for composing data processors into a pipeline
+class Pipe : public DataProcessor {
+ DataProcessor *next;
+ public:
+ explicit Pipe(DataProcessor *next) : next(next) {}
+
+ // passes the data on to the next processor
+ int process(bufferlist&& data, uint64_t offset) override {
+ return next->process(std::move(data), offset);
+ }
+};
+
+// pipe that writes to the next processor in discrete chunks
+class ChunkProcessor : public Pipe {
+ uint64_t chunk_size;
+ bufferlist chunk; // leftover bytes from the last call to process()
+ public:
+ ChunkProcessor(DataProcessor *next, uint64_t chunk_size)
+ : Pipe(next), chunk_size(chunk_size)
+ {}
+
+ int process(bufferlist&& data, uint64_t offset) override;
+};
+
+
+// interface to generate the next stripe description
+class StripeGenerator {
+ public:
+ virtual ~StripeGenerator() {}
+
+ virtual int next(uint64_t offset, uint64_t *stripe_size) = 0;
+};
+
+// pipe that respects stripe boundaries and restarts each stripe at offset 0
+class StripeProcessor : public Pipe {
+ StripeGenerator *gen;
+ std::pair<uint64_t, uint64_t> bounds; // bounds of current stripe
+ public:
+ StripeProcessor(DataProcessor *next, StripeGenerator *gen,
+ uint64_t first_stripe_size)
+ : Pipe(next), gen(gen), bounds(0, first_stripe_size)
+ {}
+
+ int process(bufferlist&& data, uint64_t data_offset) override;
+};
+
+} // namespace rgw::putobj
diff --git a/src/rgw/rgw_putobj_processor.cc b/src/rgw/rgw_putobj_processor.cc
new file mode 100644
index 00000000..3de30a82
--- /dev/null
+++ b/src/rgw/rgw_putobj_processor.cc
@@ -0,0 +1,670 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2018 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "rgw_aio.h"
+#include "rgw_putobj_processor.h"
+#include "rgw_multi.h"
+#include "services/svc_sys_obj.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+namespace rgw::putobj {
+
+int HeadObjectProcessor::process(bufferlist&& data, uint64_t logical_offset)
+{
+ const bool flush = (data.length() == 0);
+
+ // capture the first chunk for special handling
+ if (data_offset < head_chunk_size || data_offset == 0) {
+ if (flush) {
+ // flush partial chunk
+ return process_first_chunk(std::move(head_data), &processor);
+ }
+
+ auto remaining = head_chunk_size - data_offset;
+ auto count = std::min<uint64_t>(data.length(), remaining);
+ data.splice(0, count, &head_data);
+ data_offset += count;
+
+ if (data_offset == head_chunk_size) {
+ // process the first complete chunk
+ ceph_assert(head_data.length() == head_chunk_size);
+ int r = process_first_chunk(std::move(head_data), &processor);
+ if (r < 0) {
+ return r;
+ }
+ }
+ if (data.length() == 0) { // avoid flushing stripe processor
+ return 0;
+ }
+ }
+ ceph_assert(processor); // process_first_chunk() must initialize
+
+ // send everything else through the processor
+ auto write_offset = data_offset;
+ data_offset += data.length();
+ return processor->process(std::move(data), write_offset);
+}
+
+
+static int process_completed(const AioResultList& completed, RawObjSet *written)
+{
+ std::optional<int> error;
+ for (auto& r : completed) {
+ if (r.result >= 0) {
+ written->insert(r.obj.get_ref().obj);
+ } else if (!error) { // record first error code
+ error = r.result;
+ }
+ }
+ return error.value_or(0);
+}
+
+int RadosWriter::set_stripe_obj(const rgw_raw_obj& raw_obj)
+{
+ stripe_obj = store->svc.rados->obj(raw_obj);
+ return stripe_obj.open();
+}
+
+int RadosWriter::process(bufferlist&& bl, uint64_t offset)
+{
+ bufferlist data = std::move(bl);
+ const uint64_t cost = data.length();
+ if (cost == 0) { // no empty writes, use aio directly for creates
+ return 0;
+ }
+ librados::ObjectWriteOperation op;
+ if (offset == 0) {
+ op.write_full(data);
+ } else {
+ op.write(offset, data);
+ }
+ constexpr uint64_t id = 0; // unused
+ auto c = aio->submit(stripe_obj, &op, cost, id);
+ return process_completed(c, &written);
+}
+
+int RadosWriter::write_exclusive(const bufferlist& data)
+{
+ const uint64_t cost = data.length();
+
+ librados::ObjectWriteOperation op;
+ op.create(true); // exclusive create
+ op.write_full(data);
+
+ constexpr uint64_t id = 0; // unused
+ auto c = aio->submit(stripe_obj, &op, cost, id);
+ auto d = aio->drain();
+ c.splice(c.end(), d);
+ return process_completed(c, &written);
+}
+
+int RadosWriter::drain()
+{
+ return process_completed(aio->drain(), &written);
+}
+
+RadosWriter::~RadosWriter()
+{
+ // wait on any outstanding aio completions
+ process_completed(aio->drain(), &written);
+
+ bool need_to_remove_head = false;
+ std::optional<rgw_raw_obj> raw_head;
+ if (!head_obj.empty()) {
+ raw_head.emplace();
+ store->obj_to_raw(bucket_info.placement_rule, head_obj, &*raw_head);
+ }
+
+ /**
+ * We should delete the object in the "multipart" namespace to avoid race condition.
+ * Such race condition is caused by the fact that the multipart object is the gatekeeper of a multipart
+ * upload, when it is deleted, a second upload would start with the same suffix("2/"), therefore, objects
+ * written by the second upload may be deleted by the first upload.
+ * details is describled on #11749
+ *
+ * The above comment still stands, but instead of searching for a specific object in the multipart
+ * namespace, we just make sure that we remove the object that is marked as the head object after
+ * we remove all the other raw objects. Note that we use different call to remove the head object,
+ * as this one needs to go via the bucket index prepare/complete 2-phase commit scheme.
+ */
+ for (const auto& obj : written) {
+ if (raw_head && obj == *raw_head) {
+ ldout(store->ctx(), 5) << "NOTE: we should not process the head object (" << obj << ") here" << dendl;
+ need_to_remove_head = true;
+ continue;
+ }
+
+ int r = store->delete_raw_obj(obj);
+ if (r < 0 && r != -ENOENT) {
+ ldout(store->ctx(), 5) << "WARNING: failed to remove obj (" << obj << "), leaked" << dendl;
+ }
+ }
+
+ if (need_to_remove_head) {
+ ldout(store->ctx(), 5) << "NOTE: we are going to process the head obj (" << *raw_head << ")" << dendl;
+ int r = store->delete_obj(obj_ctx, bucket_info, head_obj, 0, 0);
+ if (r < 0 && r != -ENOENT) {
+ ldout(store->ctx(), 0) << "WARNING: failed to remove obj (" << *raw_head << "), leaked" << dendl;
+ }
+ }
+}
+
+
+// advance to the next stripe
+int ManifestObjectProcessor::next(uint64_t offset, uint64_t *pstripe_size)
+{
+ // advance the manifest
+ int r = manifest_gen.create_next(offset);
+ if (r < 0) {
+ return r;
+ }
+
+ rgw_raw_obj stripe_obj = manifest_gen.get_cur_obj(store);
+
+ uint64_t chunk_size = 0;
+ r = store->get_max_chunk_size(stripe_obj.pool, &chunk_size);
+ if (r < 0) {
+ return r;
+ }
+ r = writer.set_stripe_obj(stripe_obj);
+ if (r < 0) {
+ return r;
+ }
+
+ chunk = ChunkProcessor(&writer, chunk_size);
+ *pstripe_size = manifest_gen.cur_stripe_max_size();
+ return 0;
+}
+
+
+int AtomicObjectProcessor::process_first_chunk(bufferlist&& data,
+ DataProcessor **processor)
+{
+ first_chunk = std::move(data);
+ *processor = &stripe;
+ return 0;
+}
+
+int AtomicObjectProcessor::prepare()
+{
+ uint64_t max_head_chunk_size;
+ uint64_t head_max_size;
+ uint64_t chunk_size = 0;
+ uint64_t alignment;
+ rgw_pool head_pool;
+
+ if (!store->get_obj_data_pool(bucket_info.placement_rule, head_obj, &head_pool)) {
+ return -EIO;
+ }
+
+ int r = store->get_max_chunk_size(head_pool, &max_head_chunk_size, &alignment);
+ if (r < 0) {
+ return r;
+ }
+
+ bool same_pool = true;
+
+ if (bucket_info.placement_rule != tail_placement_rule) {
+ rgw_pool tail_pool;
+ if (!store->get_obj_data_pool(tail_placement_rule, head_obj, &tail_pool)) {
+ return -EIO;
+ }
+
+ if (tail_pool != head_pool) {
+ same_pool = false;
+
+ r = store->get_max_chunk_size(tail_pool, &chunk_size);
+ if (r < 0) {
+ return r;
+ }
+
+ head_max_size = 0;
+ }
+ }
+
+ if (same_pool) {
+ head_max_size = max_head_chunk_size;
+ chunk_size = max_head_chunk_size;
+ }
+
+ uint64_t stripe_size;
+ const uint64_t default_stripe_size = store->ctx()->_conf->rgw_obj_stripe_size;
+
+ store->get_max_aligned_size(default_stripe_size, alignment, &stripe_size);
+
+ manifest.set_trivial_rule(head_max_size, stripe_size);
+
+ r = manifest_gen.create_begin(store->ctx(), &manifest,
+ bucket_info.placement_rule,
+ &tail_placement_rule,
+ head_obj.bucket, head_obj);
+ if (r < 0) {
+ return r;
+ }
+
+ rgw_raw_obj stripe_obj = manifest_gen.get_cur_obj(store);
+
+ r = writer.set_stripe_obj(stripe_obj);
+ if (r < 0) {
+ return r;
+ }
+
+ set_head_chunk_size(head_max_size);
+ // initialize the processors
+ chunk = ChunkProcessor(&writer, chunk_size);
+ stripe = StripeProcessor(&chunk, this, head_max_size);
+ return 0;
+}
+
+int AtomicObjectProcessor::complete(size_t accounted_size,
+ const std::string& etag,
+ ceph::real_time *mtime,
+ ceph::real_time set_mtime,
+ std::map<std::string, bufferlist>& attrs,
+ ceph::real_time delete_at,
+ const char *if_match,
+ const char *if_nomatch,
+ const std::string *user_data,
+ rgw_zone_set *zones_trace,
+ bool *pcanceled)
+{
+ int r = writer.drain();
+ if (r < 0) {
+ return r;
+ }
+ const uint64_t actual_size = get_actual_size();
+ r = manifest_gen.create_next(actual_size);
+ if (r < 0) {
+ return r;
+ }
+
+ obj_ctx.set_atomic(head_obj);
+
+ RGWRados::Object op_target(store, bucket_info, obj_ctx, head_obj);
+
+ /* some object types shouldn't be versioned, e.g., multipart parts */
+ op_target.set_versioning_disabled(!bucket_info.versioning_enabled());
+
+ RGWRados::Object::Write obj_op(&op_target);
+
+ obj_op.meta.data = &first_chunk;
+ obj_op.meta.manifest = &manifest;
+ obj_op.meta.ptag = &unique_tag; /* use req_id as operation tag */
+ obj_op.meta.if_match = if_match;
+ obj_op.meta.if_nomatch = if_nomatch;
+ obj_op.meta.mtime = mtime;
+ obj_op.meta.set_mtime = set_mtime;
+ obj_op.meta.owner = owner;
+ obj_op.meta.flags = PUT_OBJ_CREATE;
+ obj_op.meta.olh_epoch = olh_epoch;
+ obj_op.meta.delete_at = delete_at;
+ obj_op.meta.user_data = user_data;
+ obj_op.meta.zones_trace = zones_trace;
+ obj_op.meta.modify_tail = true;
+
+ r = obj_op.write_meta(actual_size, accounted_size, attrs);
+ if (r < 0) {
+ return r;
+ }
+ if (!obj_op.meta.canceled) {
+ // on success, clear the set of objects for deletion
+ writer.clear_written();
+ }
+ if (pcanceled) {
+ *pcanceled = obj_op.meta.canceled;
+ }
+ return 0;
+}
+
+
+int MultipartObjectProcessor::process_first_chunk(bufferlist&& data,
+ DataProcessor **processor)
+{
+ // write the first chunk of the head object as part of an exclusive create,
+ // then drain to wait for the result in case of EEXIST
+ int r = writer.write_exclusive(data);
+ if (r == -EEXIST) {
+ // randomize the oid prefix and reprepare the head/manifest
+ std::string oid_rand = gen_rand_alphanumeric(store->ctx(), 32);
+
+ mp.init(target_obj.key.name, upload_id, oid_rand);
+ manifest.set_prefix(target_obj.key.name + "." + oid_rand);
+
+ r = prepare_head();
+ if (r < 0) {
+ return r;
+ }
+ // resubmit the write op on the new head object
+ r = writer.write_exclusive(data);
+ }
+ if (r < 0) {
+ return r;
+ }
+ *processor = &stripe;
+ return 0;
+}
+
+int MultipartObjectProcessor::prepare_head()
+{
+ const uint64_t default_stripe_size = store->ctx()->_conf->rgw_obj_stripe_size;
+ uint64_t chunk_size;
+ uint64_t stripe_size;
+ uint64_t alignment;
+
+ int r = store->get_max_chunk_size(tail_placement_rule, target_obj, &chunk_size, &alignment);
+ if (r < 0) {
+ ldout(store->ctx(), 0) << "ERROR: unexpected: get_max_chunk_size(): placement_rule=" << tail_placement_rule.to_str() << " obj=" << target_obj << " returned r=" << r << dendl;
+ return r;
+ }
+ store->get_max_aligned_size(default_stripe_size, alignment, &stripe_size);
+
+ manifest.set_multipart_part_rule(stripe_size, part_num);
+
+ r = manifest_gen.create_begin(store->ctx(), &manifest,
+ bucket_info.placement_rule,
+ &tail_placement_rule,
+ target_obj.bucket, target_obj);
+ if (r < 0) {
+ return r;
+ }
+
+ rgw_raw_obj stripe_obj = manifest_gen.get_cur_obj(store);
+ rgw_raw_obj_to_obj(head_obj.bucket, stripe_obj, &head_obj);
+ head_obj.index_hash_source = target_obj.key.name;
+
+ r = writer.set_stripe_obj(stripe_obj);
+ if (r < 0) {
+ return r;
+ }
+ stripe_size = manifest_gen.cur_stripe_max_size();
+ set_head_chunk_size(stripe_size);
+
+ chunk = ChunkProcessor(&writer, chunk_size);
+ stripe = StripeProcessor(&chunk, this, stripe_size);
+ return 0;
+}
+
+int MultipartObjectProcessor::prepare()
+{
+ manifest.set_prefix(target_obj.key.name + "." + upload_id);
+
+ return prepare_head();
+}
+
+int MultipartObjectProcessor::complete(size_t accounted_size,
+ const std::string& etag,
+ ceph::real_time *mtime,
+ ceph::real_time set_mtime,
+ std::map<std::string, bufferlist>& attrs,
+ ceph::real_time delete_at,
+ const char *if_match,
+ const char *if_nomatch,
+ const std::string *user_data,
+ rgw_zone_set *zones_trace,
+ bool *pcanceled)
+{
+ int r = writer.drain();
+ if (r < 0) {
+ return r;
+ }
+ const uint64_t actual_size = get_actual_size();
+ r = manifest_gen.create_next(actual_size);
+ if (r < 0) {
+ return r;
+ }
+
+ RGWRados::Object op_target(store, bucket_info, obj_ctx, head_obj);
+ op_target.set_versioning_disabled(true);
+ RGWRados::Object::Write obj_op(&op_target);
+
+ obj_op.meta.set_mtime = set_mtime;
+ obj_op.meta.mtime = mtime;
+ obj_op.meta.owner = owner;
+ obj_op.meta.delete_at = delete_at;
+ obj_op.meta.zones_trace = zones_trace;
+ obj_op.meta.modify_tail = true;
+
+ r = obj_op.write_meta(actual_size, accounted_size, attrs);
+ if (r < 0)
+ return r;
+
+ bufferlist bl;
+ RGWUploadPartInfo info;
+ string p = "part.";
+ bool sorted_omap = is_v2_upload_id(upload_id);
+
+ if (sorted_omap) {
+ char buf[32];
+ snprintf(buf, sizeof(buf), "%08d", part_num);
+ p.append(buf);
+ } else {
+ p.append(part_num_str);
+ }
+ info.num = part_num;
+ info.etag = etag;
+ info.size = actual_size;
+ info.accounted_size = accounted_size;
+ info.modified = real_clock::now();
+ info.manifest = manifest;
+
+ bool compressed;
+ r = rgw_compression_info_from_attrset(attrs, compressed, info.cs_info);
+ if (r < 0) {
+ ldout(store->ctx(), 1) << "cannot get compression info" << dendl;
+ return r;
+ }
+
+ encode(info, bl);
+
+ rgw_obj meta_obj;
+ meta_obj.init_ns(bucket_info.bucket, mp.get_meta(), RGW_OBJ_NS_MULTIPART);
+ meta_obj.set_in_extra_data(true);
+
+ rgw_raw_obj raw_meta_obj;
+
+ store->obj_to_raw(bucket_info.placement_rule, meta_obj, &raw_meta_obj);
+
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+ auto sysobj = obj_ctx.get_obj(raw_meta_obj);
+
+ r = sysobj.omap()
+ .set_must_exist(true)
+ .set(p, bl);
+ if (r < 0) {
+ return r == -ENOENT ? -ERR_NO_SUCH_UPLOAD : r;
+ }
+
+ if (!obj_op.meta.canceled) {
+ // on success, clear the set of objects for deletion
+ writer.clear_written();
+ }
+ if (pcanceled) {
+ *pcanceled = obj_op.meta.canceled;
+ }
+ return 0;
+}
+
+int AppendObjectProcessor::process_first_chunk(bufferlist &&data, rgw::putobj::DataProcessor **processor)
+{
+ int r = writer.write_exclusive(data);
+ if (r < 0) {
+ return r;
+ }
+ *processor = &stripe;
+ return 0;
+}
+
+int AppendObjectProcessor::prepare()
+{
+ RGWObjState *astate;
+ int r = store->get_obj_state(&obj_ctx, bucket_info, head_obj, &astate);
+ if (r < 0) {
+ return r;
+ }
+ cur_size = astate->size;
+ *cur_accounted_size = astate->accounted_size;
+ if (!astate->exists) {
+ if (position != 0) {
+ ldout(store->ctx(), 5) << "ERROR: Append position should be zero" << dendl;
+ return -ERR_POSITION_NOT_EQUAL_TO_LENGTH;
+ } else {
+ cur_part_num = 1;
+ //set the prefix
+ char buf[33];
+ gen_rand_alphanumeric(store->ctx(), buf, sizeof(buf) - 1);
+ string oid_prefix = head_obj.key.name;
+ oid_prefix.append(".");
+ oid_prefix.append(buf);
+ oid_prefix.append("_");
+ manifest.set_prefix(oid_prefix);
+ }
+ } else {
+ // check whether the object appendable
+ map<string, bufferlist>::iterator iter = astate->attrset.find(RGW_ATTR_APPEND_PART_NUM);
+ if (iter == astate->attrset.end()) {
+ ldout(store->ctx(), 5) << "ERROR: The object is not appendable" << dendl;
+ return -ERR_OBJECT_NOT_APPENDABLE;
+ }
+ if (position != *cur_accounted_size) {
+ ldout(store->ctx(), 5) << "ERROR: Append position should be equal to the obj size" << dendl;
+ return -ERR_POSITION_NOT_EQUAL_TO_LENGTH;
+ }
+ try {
+ decode(cur_part_num, iter->second);
+ } catch (buffer::error& err) {
+ ldout(store->ctx(), 5) << "ERROR: failed to decode part num" << dendl;
+ return -EIO;
+ }
+ cur_part_num++;
+ //get the current obj etag
+ iter = astate->attrset.find(RGW_ATTR_ETAG);
+ if (iter != astate->attrset.end()) {
+ string s = rgw_string_unquote(iter->second.c_str());
+ size_t pos = s.find("-");
+ cur_etag = s.substr(0, pos);
+ }
+
+ iter = astate->attrset.find(RGW_ATTR_STORAGE_CLASS);
+ if (iter != astate->attrset.end()) {
+ tail_placement_rule.storage_class = iter->second.to_str();
+ }
+ cur_manifest = &astate->manifest;
+ manifest.set_prefix(cur_manifest->get_prefix());
+ astate->keep_tail = true;
+ }
+ manifest.set_multipart_part_rule(store->ctx()->_conf->rgw_obj_stripe_size, cur_part_num);
+
+ r = manifest_gen.create_begin(store->ctx(), &manifest, bucket_info.placement_rule, &tail_placement_rule, head_obj.bucket, head_obj);
+ if (r < 0) {
+ return r;
+ }
+ rgw_raw_obj stripe_obj = manifest_gen.get_cur_obj(store);
+
+ uint64_t chunk_size = 0;
+ r = store->get_max_chunk_size(stripe_obj.pool, &chunk_size);
+ if (r < 0) {
+ return r;
+ }
+ r = writer.set_stripe_obj(std::move(stripe_obj));
+ if (r < 0) {
+ return r;
+ }
+
+ uint64_t stripe_size = manifest_gen.cur_stripe_max_size();
+
+ uint64_t max_head_size = std::min(chunk_size, stripe_size);
+ set_head_chunk_size(max_head_size);
+
+ // initialize the processors
+ chunk = ChunkProcessor(&writer, chunk_size);
+ stripe = StripeProcessor(&chunk, this, stripe_size);
+
+ return 0;
+}
+
+int AppendObjectProcessor::complete(size_t accounted_size, const string &etag, ceph::real_time *mtime,
+ ceph::real_time set_mtime, map <string, bufferlist> &attrs,
+ ceph::real_time delete_at, const char *if_match, const char *if_nomatch,
+ const string *user_data, rgw_zone_set *zones_trace, bool *pcanceled)
+{
+ int r = writer.drain();
+ if (r < 0)
+ return r;
+ const uint64_t actual_size = get_actual_size();
+ r = manifest_gen.create_next(actual_size);
+ if (r < 0) {
+ return r;
+ }
+ obj_ctx.set_atomic(head_obj);
+ RGWRados::Object op_target(store, bucket_info, obj_ctx, head_obj);
+ //For Append obj, disable versioning
+ op_target.set_versioning_disabled(true);
+ RGWRados::Object::Write obj_op(&op_target);
+ if (cur_manifest) {
+ cur_manifest->append(manifest, store->svc.zone);
+ obj_op.meta.manifest = cur_manifest;
+ } else {
+ obj_op.meta.manifest = &manifest;
+ }
+ obj_op.meta.ptag = &unique_tag; /* use req_id as operation tag */
+ obj_op.meta.mtime = mtime;
+ obj_op.meta.set_mtime = set_mtime;
+ obj_op.meta.owner = owner;
+ obj_op.meta.flags = PUT_OBJ_CREATE;
+ obj_op.meta.delete_at = delete_at;
+ obj_op.meta.user_data = user_data;
+ obj_op.meta.zones_trace = zones_trace;
+ obj_op.meta.modify_tail = true;
+ obj_op.meta.appendable = true;
+ //Add the append part number
+ bufferlist cur_part_num_bl;
+ encode(cur_part_num, cur_part_num_bl);
+ attrs[RGW_ATTR_APPEND_PART_NUM] = cur_part_num_bl;
+ //calculate the etag
+ if (!cur_etag.empty()) {
+ MD5 hash;
+ char petag[CEPH_CRYPTO_MD5_DIGESTSIZE];
+ char final_etag[CEPH_CRYPTO_MD5_DIGESTSIZE];
+ char final_etag_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 16];
+ hex_to_buf(cur_etag.c_str(), petag, CEPH_CRYPTO_MD5_DIGESTSIZE);
+ hash.Update((const unsigned char *)petag, sizeof(petag));
+ hex_to_buf(etag.c_str(), petag, CEPH_CRYPTO_MD5_DIGESTSIZE);
+ hash.Update((const unsigned char *)petag, sizeof(petag));
+ hash.Final((unsigned char *)final_etag);
+ buf_to_hex((unsigned char *)final_etag, sizeof(final_etag), final_etag_str);
+ snprintf(&final_etag_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2], sizeof(final_etag_str) - CEPH_CRYPTO_MD5_DIGESTSIZE * 2,
+ "-%lld", (long long)cur_part_num);
+ bufferlist etag_bl;
+ etag_bl.append(final_etag_str, strlen(final_etag_str) + 1);
+ attrs[RGW_ATTR_ETAG] = etag_bl;
+ }
+ r = obj_op.write_meta(actual_size + cur_size, accounted_size + *cur_accounted_size, attrs);
+ if (r < 0) {
+ return r;
+ }
+ if (!obj_op.meta.canceled) {
+ // on success, clear the set of objects for deletion
+ writer.clear_written();
+ }
+ if (pcanceled) {
+ *pcanceled = obj_op.meta.canceled;
+ }
+ *cur_accounted_size += accounted_size;
+
+ return 0;
+}
+
+} // namespace rgw::putobj
diff --git a/src/rgw/rgw_putobj_processor.h b/src/rgw/rgw_putobj_processor.h
new file mode 100644
index 00000000..8d265f17
--- /dev/null
+++ b/src/rgw/rgw_putobj_processor.h
@@ -0,0 +1,263 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2018 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#pragma once
+
+#include <optional>
+
+#include "rgw_putobj.h"
+#include "rgw_rados.h"
+#include "services/svc_rados.h"
+
+namespace rgw {
+
+class Aio;
+
+namespace putobj {
+
+// a data consumer that writes an object in a bucket
+class ObjectProcessor : public DataProcessor {
+ public:
+ // prepare to start processing object data
+ virtual int prepare() = 0;
+
+ // complete the operation and make its result visible to clients
+ virtual int complete(size_t accounted_size, const std::string& etag,
+ ceph::real_time *mtime, ceph::real_time set_mtime,
+ std::map<std::string, bufferlist>& attrs,
+ ceph::real_time delete_at,
+ const char *if_match, const char *if_nomatch,
+ const std::string *user_data,
+ rgw_zone_set *zones_trace, bool *canceled) = 0;
+};
+
+// an object processor with special handling for the first chunk of the head.
+// the virtual process_first_chunk() function returns a processor to handle the
+// rest of the object
+class HeadObjectProcessor : public ObjectProcessor {
+ uint64_t head_chunk_size;
+ // buffer to capture the first chunk of the head object
+ bufferlist head_data;
+ // initialized after process_first_chunk() to process everything else
+ DataProcessor *processor = nullptr;
+ uint64_t data_offset = 0; // maximum offset of data written (ie compressed)
+ protected:
+ uint64_t get_actual_size() const { return data_offset; }
+
+ // process the first chunk of data and return a processor for the rest
+ virtual int process_first_chunk(bufferlist&& data,
+ DataProcessor **processor) = 0;
+ public:
+ HeadObjectProcessor(uint64_t head_chunk_size)
+ : head_chunk_size(head_chunk_size)
+ {}
+
+ void set_head_chunk_size(uint64_t size) { head_chunk_size = size; }
+
+ // cache first chunk for process_first_chunk(), then forward everything else
+ // to the returned processor
+ int process(bufferlist&& data, uint64_t logical_offset) final override;
+};
+
+
+using RawObjSet = std::set<rgw_raw_obj>;
+
+// a data sink that writes to rados objects and deletes them on cancelation
+class RadosWriter : public DataProcessor {
+ Aio *const aio;
+ RGWRados *const store;
+ const RGWBucketInfo& bucket_info;
+ RGWObjectCtx& obj_ctx;
+ const rgw_obj head_obj;
+ RGWSI_RADOS::Obj stripe_obj; // current stripe object
+ RawObjSet written; // set of written objects for deletion
+
+ public:
+ RadosWriter(Aio *aio, RGWRados *store, const RGWBucketInfo& bucket_info,
+ RGWObjectCtx& obj_ctx, const rgw_obj& head_obj)
+ : aio(aio), store(store), bucket_info(bucket_info),
+ obj_ctx(obj_ctx), head_obj(head_obj)
+ {}
+ ~RadosWriter();
+
+ // change the current stripe object
+ int set_stripe_obj(const rgw_raw_obj& obj);
+
+ // write the data at the given offset of the current stripe object
+ int process(bufferlist&& data, uint64_t stripe_offset) override;
+
+ // write the data as an exclusive create and wait for it to complete
+ int write_exclusive(const bufferlist& data);
+
+ int drain();
+
+ // when the operation completes successfully, clear the set of written objects
+ // so they aren't deleted on destruction
+ void clear_written() { written.clear(); }
+};
+
+// a rados object processor that stripes according to RGWObjManifest
+class ManifestObjectProcessor : public HeadObjectProcessor,
+ public StripeGenerator {
+ protected:
+ RGWRados *const store;
+ const RGWBucketInfo& bucket_info;
+ rgw_placement_rule tail_placement_rule;
+ const rgw_user& owner;
+ RGWObjectCtx& obj_ctx;
+ rgw_obj head_obj;
+
+ RadosWriter writer;
+ RGWObjManifest manifest;
+ RGWObjManifest::generator manifest_gen;
+ ChunkProcessor chunk;
+ StripeProcessor stripe;
+
+ // implements StripeGenerator
+ int next(uint64_t offset, uint64_t *stripe_size) override;
+
+ public:
+ ManifestObjectProcessor(Aio *aio, RGWRados *store,
+ const RGWBucketInfo& bucket_info,
+ const rgw_placement_rule *ptail_placement_rule,
+ const rgw_user& owner, RGWObjectCtx& obj_ctx,
+ const rgw_obj& head_obj)
+ : HeadObjectProcessor(0),
+ store(store), bucket_info(bucket_info),
+ owner(owner),
+ obj_ctx(obj_ctx), head_obj(head_obj),
+ writer(aio, store, bucket_info, obj_ctx, head_obj),
+ chunk(&writer, 0), stripe(&chunk, this, 0) {
+ if (ptail_placement_rule) {
+ tail_placement_rule = *ptail_placement_rule;
+ }
+ }
+
+ void set_tail_placement(const rgw_placement_rule&& tpr) {
+ tail_placement_rule = tpr;
+ }
+};
+
+
+// a processor that completes with an atomic write to the head object as part of
+// a bucket index transaction
+class AtomicObjectProcessor : public ManifestObjectProcessor {
+ const std::optional<uint64_t> olh_epoch;
+ const std::string unique_tag;
+ bufferlist first_chunk; // written with the head in complete()
+
+ int process_first_chunk(bufferlist&& data, DataProcessor **processor) override;
+ public:
+ AtomicObjectProcessor(Aio *aio, RGWRados *store,
+ const RGWBucketInfo& bucket_info,
+ const rgw_placement_rule *ptail_placement_rule,
+ const rgw_user& owner,
+ RGWObjectCtx& obj_ctx, const rgw_obj& head_obj,
+ std::optional<uint64_t> olh_epoch,
+ const std::string& unique_tag)
+ : ManifestObjectProcessor(aio, store, bucket_info, ptail_placement_rule,
+ owner, obj_ctx, head_obj),
+ olh_epoch(olh_epoch), unique_tag(unique_tag)
+ {}
+
+ // prepare a trivial manifest
+ int prepare() override;
+ // write the head object atomically in a bucket index transaction
+ int complete(size_t accounted_size, const std::string& etag,
+ ceph::real_time *mtime, ceph::real_time set_mtime,
+ std::map<std::string, bufferlist>& attrs,
+ ceph::real_time delete_at,
+ const char *if_match, const char *if_nomatch,
+ const std::string *user_data,
+ rgw_zone_set *zones_trace, bool *canceled) override;
+
+};
+
+
+// a processor for multipart parts, which don't require atomic completion. the
+// part's head is written with an exclusive create to detect racing uploads of
+// the same part/upload id, which are restarted with a random oid prefix
+class MultipartObjectProcessor : public ManifestObjectProcessor {
+ const rgw_obj target_obj; // target multipart object
+ const std::string upload_id;
+ const int part_num;
+ const std::string part_num_str;
+ RGWMPObj mp;
+
+ // write the first chunk and wait on aio->drain() for its completion.
+ // on EEXIST, retry with random prefix
+ int process_first_chunk(bufferlist&& data, DataProcessor **processor) override;
+ // prepare the head stripe and manifest
+ int prepare_head();
+ public:
+ MultipartObjectProcessor(Aio *aio, RGWRados *store,
+ const RGWBucketInfo& bucket_info,
+ const rgw_placement_rule *ptail_placement_rule,
+ const rgw_user& owner, RGWObjectCtx& obj_ctx,
+ const rgw_obj& head_obj,
+ const std::string& upload_id, uint64_t part_num,
+ const std::string& part_num_str)
+ : ManifestObjectProcessor(aio, store, bucket_info, ptail_placement_rule,
+ owner, obj_ctx, head_obj),
+ target_obj(head_obj), upload_id(upload_id),
+ part_num(part_num), part_num_str(part_num_str),
+ mp(head_obj.key.name, upload_id)
+ {}
+
+ // prepare a multipart manifest
+ int prepare() override;
+ // write the head object attributes in a bucket index transaction, then
+ // register the completed part with the multipart meta object
+ int complete(size_t accounted_size, const std::string& etag,
+ ceph::real_time *mtime, ceph::real_time set_mtime,
+ std::map<std::string, bufferlist>& attrs,
+ ceph::real_time delete_at,
+ const char *if_match, const char *if_nomatch,
+ const std::string *user_data,
+ rgw_zone_set *zones_trace, bool *canceled) override;
+};
+
+ class AppendObjectProcessor : public ManifestObjectProcessor {
+ uint64_t cur_part_num;
+ uint64_t position;
+ uint64_t cur_size;
+ uint64_t *cur_accounted_size;
+ string cur_etag;
+ const std::string unique_tag;
+
+ RGWObjManifest *cur_manifest;
+
+ int process_first_chunk(bufferlist&& data, DataProcessor **processor) override;
+
+ public:
+ AppendObjectProcessor(Aio *aio, RGWRados *store, const RGWBucketInfo& bucket_info,
+ const rgw_placement_rule *ptail_placement_rule,
+ const rgw_user& owner, RGWObjectCtx& obj_ctx,const rgw_obj& head_obj,
+ const std::string& unique_tag, uint64_t position, uint64_t *cur_accounted_size)
+ : ManifestObjectProcessor(aio, store, bucket_info, ptail_placement_rule, owner, obj_ctx, head_obj),
+ position(position), cur_size(0), cur_accounted_size(cur_accounted_size),
+ unique_tag(unique_tag), cur_manifest(nullptr)
+ {}
+ int prepare() override;
+ int complete(size_t accounted_size, const string& etag,
+ ceph::real_time *mtime, ceph::real_time set_mtime,
+ map<string, bufferlist>& attrs, ceph::real_time delete_at,
+ const char *if_match, const char *if_nomatch, const string *user_data,
+ rgw_zone_set *zones_trace, bool *canceled) override;
+ };
+
+} // namespace putobj
+} // namespace rgw
+
diff --git a/src/rgw/rgw_quota.cc b/src/rgw/rgw_quota.cc
new file mode 100644
index 00000000..052bc7a5
--- /dev/null
+++ b/src/rgw/rgw_quota.cc
@@ -0,0 +1,1034 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2013 Inktank, Inc
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#include "include/utime.h"
+#include "common/lru_map.h"
+#include "common/RefCountedObj.h"
+#include "common/Thread.h"
+#include "common/Mutex.h"
+#include "common/RWLock.h"
+
+#include "rgw_common.h"
+#include "rgw_rados.h"
+#include "rgw_quota.h"
+#include "rgw_bucket.h"
+#include "rgw_user.h"
+
+#include "services/svc_sys_obj.h"
+
+#include <atomic>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+
+struct RGWQuotaCacheStats {
+ RGWStorageStats stats;
+ utime_t expiration;
+ utime_t async_refresh_time;
+};
+
+template<class T>
+class RGWQuotaCache {
+protected:
+ RGWRados *store;
+ lru_map<T, RGWQuotaCacheStats> stats_map;
+ RefCountedWaitObject *async_refcount;
+
+ class StatsAsyncTestSet : public lru_map<T, RGWQuotaCacheStats>::UpdateContext {
+ int objs_delta;
+ uint64_t added_bytes;
+ uint64_t removed_bytes;
+ public:
+ StatsAsyncTestSet() : objs_delta(0), added_bytes(0), removed_bytes(0) {}
+ bool update(RGWQuotaCacheStats *entry) override {
+ if (entry->async_refresh_time.sec() == 0)
+ return false;
+
+ entry->async_refresh_time = utime_t(0, 0);
+
+ return true;
+ }
+ };
+
+ virtual int fetch_stats_from_storage(const rgw_user& user, const rgw_bucket& bucket, RGWStorageStats& stats) = 0;
+
+ virtual bool map_find(const rgw_user& user, const rgw_bucket& bucket, RGWQuotaCacheStats& qs) = 0;
+
+ virtual bool map_find_and_update(const rgw_user& user, const rgw_bucket& bucket, typename lru_map<T, RGWQuotaCacheStats>::UpdateContext *ctx) = 0;
+ virtual void map_add(const rgw_user& user, const rgw_bucket& bucket, RGWQuotaCacheStats& qs) = 0;
+
+ virtual void data_modified(const rgw_user& user, rgw_bucket& bucket) {}
+public:
+ RGWQuotaCache(RGWRados *_store, int size) : store(_store), stats_map(size) {
+ async_refcount = new RefCountedWaitObject;
+ }
+ virtual ~RGWQuotaCache() {
+ async_refcount->put_wait(); /* wait for all pending async requests to complete */
+ }
+
+ int get_stats(const rgw_user& user, const rgw_bucket& bucket, RGWStorageStats& stats, RGWQuotaInfo& quota);
+ void adjust_stats(const rgw_user& user, rgw_bucket& bucket, int objs_delta, uint64_t added_bytes, uint64_t removed_bytes);
+
+ virtual bool can_use_cached_stats(RGWQuotaInfo& quota, RGWStorageStats& stats);
+
+ void set_stats(const rgw_user& user, const rgw_bucket& bucket, RGWQuotaCacheStats& qs, RGWStorageStats& stats);
+ int async_refresh(const rgw_user& user, const rgw_bucket& bucket, RGWQuotaCacheStats& qs);
+ void async_refresh_response(const rgw_user& user, rgw_bucket& bucket, RGWStorageStats& stats);
+ void async_refresh_fail(const rgw_user& user, rgw_bucket& bucket);
+
+ class AsyncRefreshHandler {
+ protected:
+ RGWRados *store;
+ RGWQuotaCache<T> *cache;
+ public:
+ AsyncRefreshHandler(RGWRados *_store, RGWQuotaCache<T> *_cache) : store(_store), cache(_cache) {}
+ virtual ~AsyncRefreshHandler() {}
+
+ virtual int init_fetch() = 0;
+ virtual void drop_reference() = 0;
+ };
+
+ virtual AsyncRefreshHandler *allocate_refresh_handler(const rgw_user& user, const rgw_bucket& bucket) = 0;
+};
+
+template<class T>
+bool RGWQuotaCache<T>::can_use_cached_stats(RGWQuotaInfo& quota, RGWStorageStats& cached_stats)
+{
+ if (quota.max_size >= 0) {
+ if (quota.max_size_soft_threshold < 0) {
+ quota.max_size_soft_threshold = quota.max_size * store->ctx()->_conf->rgw_bucket_quota_soft_threshold;
+ }
+
+ if (cached_stats.size_rounded >= (uint64_t)quota.max_size_soft_threshold) {
+ ldout(store->ctx(), 20) << "quota: can't use cached stats, exceeded soft threshold (size): "
+ << cached_stats.size_rounded << " >= " << quota.max_size_soft_threshold << dendl;
+ return false;
+ }
+ }
+
+ if (quota.max_objects >= 0) {
+ if (quota.max_objs_soft_threshold < 0) {
+ quota.max_objs_soft_threshold = quota.max_objects * store->ctx()->_conf->rgw_bucket_quota_soft_threshold;
+ }
+
+ if (cached_stats.num_objects >= (uint64_t)quota.max_objs_soft_threshold) {
+ ldout(store->ctx(), 20) << "quota: can't use cached stats, exceeded soft threshold (num objs): "
+ << cached_stats.num_objects << " >= " << quota.max_objs_soft_threshold << dendl;
+ return false;
+ }
+ }
+
+ return true;
+}
+
+template<class T>
+int RGWQuotaCache<T>::async_refresh(const rgw_user& user, const rgw_bucket& bucket, RGWQuotaCacheStats& qs)
+{
+ /* protect against multiple updates */
+ StatsAsyncTestSet test_update;
+ if (!map_find_and_update(user, bucket, &test_update)) {
+ /* most likely we just raced with another update */
+ return 0;
+ }
+
+ async_refcount->get();
+
+
+ AsyncRefreshHandler *handler = allocate_refresh_handler(user, bucket);
+
+ int ret = handler->init_fetch();
+ if (ret < 0) {
+ async_refcount->put();
+ handler->drop_reference();
+ return ret;
+ }
+
+ return 0;
+}
+
+template<class T>
+void RGWQuotaCache<T>::async_refresh_fail(const rgw_user& user, rgw_bucket& bucket)
+{
+ ldout(store->ctx(), 20) << "async stats refresh response for bucket=" << bucket << dendl;
+
+ async_refcount->put();
+}
+
+template<class T>
+void RGWQuotaCache<T>::async_refresh_response(const rgw_user& user, rgw_bucket& bucket, RGWStorageStats& stats)
+{
+ ldout(store->ctx(), 20) << "async stats refresh response for bucket=" << bucket << dendl;
+
+ RGWQuotaCacheStats qs;
+
+ map_find(user, bucket, qs);
+
+ set_stats(user, bucket, qs, stats);
+
+ async_refcount->put();
+}
+
+template<class T>
+void RGWQuotaCache<T>::set_stats(const rgw_user& user, const rgw_bucket& bucket, RGWQuotaCacheStats& qs, RGWStorageStats& stats)
+{
+ qs.stats = stats;
+ qs.expiration = ceph_clock_now();
+ qs.async_refresh_time = qs.expiration;
+ qs.expiration += store->ctx()->_conf->rgw_bucket_quota_ttl;
+ qs.async_refresh_time += store->ctx()->_conf->rgw_bucket_quota_ttl / 2;
+
+ map_add(user, bucket, qs);
+}
+
+template<class T>
+int RGWQuotaCache<T>::get_stats(const rgw_user& user, const rgw_bucket& bucket, RGWStorageStats& stats, RGWQuotaInfo& quota) {
+ RGWQuotaCacheStats qs;
+ utime_t now = ceph_clock_now();
+ if (map_find(user, bucket, qs)) {
+ if (qs.async_refresh_time.sec() > 0 && now >= qs.async_refresh_time) {
+ int r = async_refresh(user, bucket, qs);
+ if (r < 0) {
+ ldout(store->ctx(), 0) << "ERROR: quota async refresh returned ret=" << r << dendl;
+
+ /* continue processing, might be a transient error, async refresh is just optimization */
+ }
+ }
+
+ if (can_use_cached_stats(quota, qs.stats) && qs.expiration >
+ ceph_clock_now()) {
+ stats = qs.stats;
+ return 0;
+ }
+ }
+
+ int ret = fetch_stats_from_storage(user, bucket, stats);
+ if (ret < 0 && ret != -ENOENT)
+ return ret;
+
+ set_stats(user, bucket, qs, stats);
+
+ return 0;
+}
+
+
+template<class T>
+class RGWQuotaStatsUpdate : public lru_map<T, RGWQuotaCacheStats>::UpdateContext {
+ const int objs_delta;
+ const uint64_t added_bytes;
+ const uint64_t removed_bytes;
+public:
+ RGWQuotaStatsUpdate(const int objs_delta,
+ const uint64_t added_bytes,
+ const uint64_t removed_bytes)
+ : objs_delta(objs_delta),
+ added_bytes(added_bytes),
+ removed_bytes(removed_bytes) {
+ }
+
+ bool update(RGWQuotaCacheStats * const entry) override {
+ const uint64_t rounded_added = rgw_rounded_objsize(added_bytes);
+ const uint64_t rounded_removed = rgw_rounded_objsize(removed_bytes);
+
+ if (((int64_t)(entry->stats.size + added_bytes - removed_bytes)) >= 0) {
+ entry->stats.size += added_bytes - removed_bytes;
+ } else {
+ entry->stats.size = 0;
+ }
+
+ if (((int64_t)(entry->stats.size_rounded + rounded_added - rounded_removed)) >= 0) {
+ entry->stats.size_rounded += rounded_added - rounded_removed;
+ } else {
+ entry->stats.size_rounded = 0;
+ }
+
+ if (((int64_t)(entry->stats.num_objects + objs_delta)) >= 0) {
+ entry->stats.num_objects += objs_delta;
+ } else {
+ entry->stats.num_objects = 0;
+ }
+
+ return true;
+ }
+};
+
+
+template<class T>
+void RGWQuotaCache<T>::adjust_stats(const rgw_user& user, rgw_bucket& bucket, int objs_delta,
+ uint64_t added_bytes, uint64_t removed_bytes)
+{
+ RGWQuotaStatsUpdate<T> update(objs_delta, added_bytes, removed_bytes);
+ map_find_and_update(user, bucket, &update);
+
+ data_modified(user, bucket);
+}
+
+class BucketAsyncRefreshHandler : public RGWQuotaCache<rgw_bucket>::AsyncRefreshHandler,
+ public RGWGetBucketStats_CB {
+ rgw_user user;
+public:
+ BucketAsyncRefreshHandler(RGWRados *_store, RGWQuotaCache<rgw_bucket> *_cache,
+ const rgw_user& _user, const rgw_bucket& _bucket) :
+ RGWQuotaCache<rgw_bucket>::AsyncRefreshHandler(_store, _cache),
+ RGWGetBucketStats_CB(_bucket), user(_user) {}
+
+ void drop_reference() override { put(); }
+ void handle_response(int r) override;
+ int init_fetch() override;
+};
+
+int BucketAsyncRefreshHandler::init_fetch()
+{
+ RGWBucketInfo bucket_info;
+
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+
+ int r = store->get_bucket_instance_info(obj_ctx, bucket, bucket_info, NULL, NULL);
+ if (r < 0) {
+ ldout(store->ctx(), 0) << "could not get bucket info for bucket=" << bucket << " r=" << r << dendl;
+ return r;
+ }
+
+ ldout(store->ctx(), 20) << "initiating async quota refresh for bucket=" << bucket << dendl;
+
+ r = store->get_bucket_stats_async(bucket_info, RGW_NO_SHARD, this);
+ if (r < 0) {
+ ldout(store->ctx(), 0) << "could not get bucket info for bucket=" << bucket.name << dendl;
+
+ /* get_bucket_stats_async() dropped our reference already */
+ return r;
+ }
+
+ return 0;
+}
+
+void BucketAsyncRefreshHandler::handle_response(const int r)
+{
+ if (r < 0) {
+ ldout(store->ctx(), 20) << "AsyncRefreshHandler::handle_response() r=" << r << dendl;
+ cache->async_refresh_fail(user, bucket);
+ return;
+ }
+
+ RGWStorageStats bs;
+
+ for (const auto& pair : *stats) {
+ const RGWStorageStats& s = pair.second;
+
+ bs.size += s.size;
+ bs.size_rounded += s.size_rounded;
+ bs.num_objects += s.num_objects;
+ }
+
+ cache->async_refresh_response(user, bucket, bs);
+}
+
+class RGWBucketStatsCache : public RGWQuotaCache<rgw_bucket> {
+protected:
+ bool map_find(const rgw_user& user, const rgw_bucket& bucket, RGWQuotaCacheStats& qs) override {
+ return stats_map.find(bucket, qs);
+ }
+
+ bool map_find_and_update(const rgw_user& user, const rgw_bucket& bucket, lru_map<rgw_bucket, RGWQuotaCacheStats>::UpdateContext *ctx) override {
+ return stats_map.find_and_update(bucket, NULL, ctx);
+ }
+
+ void map_add(const rgw_user& user, const rgw_bucket& bucket, RGWQuotaCacheStats& qs) override {
+ stats_map.add(bucket, qs);
+ }
+
+ int fetch_stats_from_storage(const rgw_user& user, const rgw_bucket& bucket, RGWStorageStats& stats) override;
+
+public:
+ explicit RGWBucketStatsCache(RGWRados *_store) : RGWQuotaCache<rgw_bucket>(_store, _store->ctx()->_conf->rgw_bucket_quota_cache_size) {
+ }
+
+ AsyncRefreshHandler *allocate_refresh_handler(const rgw_user& user, const rgw_bucket& bucket) override {
+ return new BucketAsyncRefreshHandler(store, this, user, bucket);
+ }
+};
+
+int RGWBucketStatsCache::fetch_stats_from_storage(const rgw_user& user, const rgw_bucket& bucket, RGWStorageStats& stats)
+{
+ RGWBucketInfo bucket_info;
+
+ RGWSysObjectCtx obj_ctx = store->svc.sysobj->init_obj_ctx();
+
+ int r = store->get_bucket_instance_info(obj_ctx, bucket, bucket_info, NULL, NULL);
+ if (r < 0) {
+ ldout(store->ctx(), 0) << "could not get bucket info for bucket=" << bucket << " r=" << r << dendl;
+ return r;
+ }
+
+ string bucket_ver;
+ string master_ver;
+
+ map<RGWObjCategory, RGWStorageStats> bucket_stats;
+ r = store->get_bucket_stats(bucket_info, RGW_NO_SHARD, &bucket_ver,
+ &master_ver, bucket_stats, nullptr);
+ if (r < 0) {
+ ldout(store->ctx(), 0) << "could not get bucket stats for bucket="
+ << bucket.name << dendl;
+ return r;
+ }
+
+ stats = RGWStorageStats();
+
+ for (const auto& pair : bucket_stats) {
+ const RGWStorageStats& s = pair.second;
+
+ stats.size += s.size;
+ stats.size_rounded += s.size_rounded;
+ stats.num_objects += s.num_objects;
+ }
+
+ return 0;
+}
+
+class UserAsyncRefreshHandler : public RGWQuotaCache<rgw_user>::AsyncRefreshHandler,
+ public RGWGetUserStats_CB {
+ rgw_bucket bucket;
+public:
+ UserAsyncRefreshHandler(RGWRados *_store, RGWQuotaCache<rgw_user> *_cache,
+ const rgw_user& _user, const rgw_bucket& _bucket) :
+ RGWQuotaCache<rgw_user>::AsyncRefreshHandler(_store, _cache),
+ RGWGetUserStats_CB(_user),
+ bucket(_bucket) {}
+
+ void drop_reference() override { put(); }
+ int init_fetch() override;
+ void handle_response(int r) override;
+};
+
+int UserAsyncRefreshHandler::init_fetch()
+{
+ ldout(store->ctx(), 20) << "initiating async quota refresh for user=" << user << dendl;
+ int r = store->get_user_stats_async(user, this);
+ if (r < 0) {
+ ldout(store->ctx(), 0) << "could not get bucket info for user=" << user << dendl;
+
+ /* get_bucket_stats_async() dropped our reference already */
+ return r;
+ }
+
+ return 0;
+}
+
+void UserAsyncRefreshHandler::handle_response(int r)
+{
+ if (r < 0) {
+ ldout(store->ctx(), 20) << "AsyncRefreshHandler::handle_response() r=" << r << dendl;
+ cache->async_refresh_fail(user, bucket);
+ return;
+ }
+
+ cache->async_refresh_response(user, bucket, stats);
+}
+
+class RGWUserStatsCache : public RGWQuotaCache<rgw_user> {
+ std::atomic<bool> down_flag = { false };
+ RWLock rwlock;
+ map<rgw_bucket, rgw_user> modified_buckets;
+
+ /* thread, sync recent modified buckets info */
+ class BucketsSyncThread : public Thread {
+ CephContext *cct;
+ RGWUserStatsCache *stats;
+
+ Mutex lock;
+ Cond cond;
+ public:
+
+ BucketsSyncThread(CephContext *_cct, RGWUserStatsCache *_s) : cct(_cct), stats(_s), lock("RGWUserStatsCache::BucketsSyncThread") {}
+
+ void *entry() override {
+ ldout(cct, 20) << "BucketsSyncThread: start" << dendl;
+ do {
+ map<rgw_bucket, rgw_user> buckets;
+
+ stats->swap_modified_buckets(buckets);
+
+ for (map<rgw_bucket, rgw_user>::iterator iter = buckets.begin(); iter != buckets.end(); ++iter) {
+ rgw_bucket bucket = iter->first;
+ rgw_user& user = iter->second;
+ ldout(cct, 20) << "BucketsSyncThread: sync user=" << user << " bucket=" << bucket << dendl;
+ int r = stats->sync_bucket(user, bucket);
+ if (r < 0) {
+ ldout(cct, 0) << "WARNING: sync_bucket() returned r=" << r << dendl;
+ }
+ }
+
+ if (stats->going_down())
+ break;
+
+ lock.Lock();
+ cond.WaitInterval(lock, utime_t(cct->_conf->rgw_user_quota_bucket_sync_interval, 0));
+ lock.Unlock();
+ } while (!stats->going_down());
+ ldout(cct, 20) << "BucketsSyncThread: done" << dendl;
+
+ return NULL;
+ }
+
+ void stop() {
+ Mutex::Locker l(lock);
+ cond.Signal();
+ }
+ };
+
+ /*
+ * thread, full sync all users stats periodically
+ *
+ * only sync non idle users or ones that never got synced before, this is needed so that
+ * users that didn't have quota turned on before (or existed before the user objclass
+ * tracked stats) need to get their backend stats up to date.
+ */
+ class UserSyncThread : public Thread {
+ CephContext *cct;
+ RGWUserStatsCache *stats;
+
+ Mutex lock;
+ Cond cond;
+ public:
+
+ UserSyncThread(CephContext *_cct, RGWUserStatsCache *_s) : cct(_cct), stats(_s), lock("RGWUserStatsCache::UserSyncThread") {}
+
+ void *entry() override {
+ ldout(cct, 20) << "UserSyncThread: start" << dendl;
+ do {
+ int ret = stats->sync_all_users();
+ if (ret < 0) {
+ ldout(cct, 5) << "ERROR: sync_all_users() returned ret=" << ret << dendl;
+ }
+
+ if (stats->going_down())
+ break;
+
+ lock.Lock();
+ cond.WaitInterval(lock, utime_t(cct->_conf->rgw_user_quota_sync_interval, 0));
+ lock.Unlock();
+ } while (!stats->going_down());
+ ldout(cct, 20) << "UserSyncThread: done" << dendl;
+
+ return NULL;
+ }
+
+ void stop() {
+ Mutex::Locker l(lock);
+ cond.Signal();
+ }
+ };
+
+ BucketsSyncThread *buckets_sync_thread;
+ UserSyncThread *user_sync_thread;
+protected:
+ bool map_find(const rgw_user& user,const rgw_bucket& bucket, RGWQuotaCacheStats& qs) override {
+ return stats_map.find(user, qs);
+ }
+
+ bool map_find_and_update(const rgw_user& user, const rgw_bucket& bucket, lru_map<rgw_user, RGWQuotaCacheStats>::UpdateContext *ctx) override {
+ return stats_map.find_and_update(user, NULL, ctx);
+ }
+
+ void map_add(const rgw_user& user, const rgw_bucket& bucket, RGWQuotaCacheStats& qs) override {
+ stats_map.add(user, qs);
+ }
+
+ int fetch_stats_from_storage(const rgw_user& user, const rgw_bucket& bucket, RGWStorageStats& stats) override;
+ int sync_bucket(const rgw_user& rgw_user, rgw_bucket& bucket);
+ int sync_user(const rgw_user& user);
+ int sync_all_users();
+
+ void data_modified(const rgw_user& user, rgw_bucket& bucket) override;
+
+ void swap_modified_buckets(map<rgw_bucket, rgw_user>& out) {
+ rwlock.get_write();
+ modified_buckets.swap(out);
+ rwlock.unlock();
+ }
+
+ template<class T> /* easier doing it as a template, Thread doesn't have ->stop() */
+ void stop_thread(T **pthr) {
+ T *thread = *pthr;
+ if (!thread)
+ return;
+
+ thread->stop();
+ thread->join();
+ delete thread;
+ *pthr = NULL;
+ }
+
+public:
+ RGWUserStatsCache(RGWRados *_store, bool quota_threads) : RGWQuotaCache<rgw_user>(_store, _store->ctx()->_conf->rgw_bucket_quota_cache_size),
+ rwlock("RGWUserStatsCache::rwlock") {
+ if (quota_threads) {
+ buckets_sync_thread = new BucketsSyncThread(store->ctx(), this);
+ buckets_sync_thread->create("rgw_buck_st_syn");
+ user_sync_thread = new UserSyncThread(store->ctx(), this);
+ user_sync_thread->create("rgw_user_st_syn");
+ } else {
+ buckets_sync_thread = NULL;
+ user_sync_thread = NULL;
+ }
+ }
+ ~RGWUserStatsCache() override {
+ stop();
+ }
+
+ AsyncRefreshHandler *allocate_refresh_handler(const rgw_user& user, const rgw_bucket& bucket) override {
+ return new UserAsyncRefreshHandler(store, this, user, bucket);
+ }
+
+ bool can_use_cached_stats(RGWQuotaInfo& quota, RGWStorageStats& stats) override {
+ /* in the user case, the cached stats may contain a better estimation of the totals, as
+ * the backend is only periodically getting updated.
+ */
+ return true;
+ }
+
+ bool going_down() {
+ return down_flag;
+ }
+
+ void stop() {
+ down_flag = true;
+ rwlock.get_write();
+ stop_thread(&buckets_sync_thread);
+ rwlock.unlock();
+ stop_thread(&user_sync_thread);
+ }
+};
+
+int RGWUserStatsCache::fetch_stats_from_storage(const rgw_user& user, const rgw_bucket& bucket, RGWStorageStats& stats)
+{
+ int r = store->get_user_stats(user, stats);
+ if (r < 0) {
+ ldout(store->ctx(), 0) << "could not get user stats for user=" << user << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+int RGWUserStatsCache::sync_bucket(const rgw_user& user, rgw_bucket& bucket)
+{
+ RGWBucketInfo bucket_info;
+
+ RGWSysObjectCtx obj_ctx = store->svc.sysobj->init_obj_ctx();
+
+ int r = store->get_bucket_instance_info(obj_ctx, bucket, bucket_info, NULL, NULL);
+ if (r < 0) {
+ ldout(store->ctx(), 0) << "could not get bucket info for bucket=" << bucket << " r=" << r << dendl;
+ return r;
+ }
+
+ r = rgw_bucket_sync_user_stats(store, user, bucket_info);
+ if (r < 0) {
+ ldout(store->ctx(), 0) << "ERROR: rgw_bucket_sync_user_stats() for user=" << user << ", bucket=" << bucket << " returned " << r << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+int RGWUserStatsCache::sync_user(const rgw_user& user)
+{
+ cls_user_header header;
+ string user_str = user.to_str();
+ int ret = store->cls_user_get_header(user_str, &header);
+ if (ret < 0) {
+ ldout(store->ctx(), 5) << "ERROR: can't read user header: ret=" << ret << dendl;
+ return ret;
+ }
+
+ if (!store->ctx()->_conf->rgw_user_quota_sync_idle_users &&
+ header.last_stats_update < header.last_stats_sync) {
+ ldout(store->ctx(), 20) << "user is idle, not doing a full sync (user=" << user << ")" << dendl;
+ return 0;
+ }
+
+ real_time when_need_full_sync = header.last_stats_sync;
+ when_need_full_sync += make_timespan(store->ctx()->_conf->rgw_user_quota_sync_wait_time);
+
+ // check if enough time passed since last full sync
+ /* FIXME: missing check? */
+
+ ret = rgw_user_sync_all_stats(store, user);
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "ERROR: failed user stats sync, ret=" << ret << dendl;
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWUserStatsCache::sync_all_users()
+{
+ string key = "user";
+ void *handle;
+
+ int ret = store->meta_mgr->list_keys_init(key, &handle);
+ if (ret < 0) {
+ ldout(store->ctx(), 10) << "ERROR: can't get key: ret=" << ret << dendl;
+ return ret;
+ }
+
+ bool truncated;
+ int max = 1000;
+
+ do {
+ list<string> keys;
+ ret = store->meta_mgr->list_keys_next(handle, max, keys, &truncated);
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "ERROR: lists_keys_next(): ret=" << ret << dendl;
+ goto done;
+ }
+ for (list<string>::iterator iter = keys.begin();
+ iter != keys.end() && !going_down();
+ ++iter) {
+ rgw_user user(*iter);
+ ldout(store->ctx(), 20) << "RGWUserStatsCache: sync user=" << user << dendl;
+ int ret = sync_user(user);
+ if (ret < 0) {
+ ldout(store->ctx(), 5) << "ERROR: sync_user() failed, user=" << user << " ret=" << ret << dendl;
+
+ /* continuing to next user */
+ continue;
+ }
+ }
+ } while (truncated);
+
+ ret = 0;
+done:
+ store->meta_mgr->list_keys_complete(handle);
+ return ret;
+}
+
+void RGWUserStatsCache::data_modified(const rgw_user& user, rgw_bucket& bucket)
+{
+ /* racy, but it's ok */
+ rwlock.get_read();
+ bool need_update = modified_buckets.find(bucket) == modified_buckets.end();
+ rwlock.unlock();
+
+ if (need_update) {
+ rwlock.get_write();
+ modified_buckets[bucket] = user;
+ rwlock.unlock();
+ }
+}
+
+
+class RGWQuotaInfoApplier {
+ /* NOTE: no non-static field allowed as instances are supposed to live in
+ * the static memory only. */
+protected:
+ RGWQuotaInfoApplier() = default;
+
+public:
+ virtual ~RGWQuotaInfoApplier() {}
+
+ virtual bool is_size_exceeded(const char * const entity,
+ const RGWQuotaInfo& qinfo,
+ const RGWStorageStats& stats,
+ const uint64_t size) const = 0;
+
+ virtual bool is_num_objs_exceeded(const char * const entity,
+ const RGWQuotaInfo& qinfo,
+ const RGWStorageStats& stats,
+ const uint64_t num_objs) const = 0;
+
+ static const RGWQuotaInfoApplier& get_instance(const RGWQuotaInfo& qinfo);
+};
+
+class RGWQuotaInfoDefApplier : public RGWQuotaInfoApplier {
+public:
+ bool is_size_exceeded(const char * const entity,
+ const RGWQuotaInfo& qinfo,
+ const RGWStorageStats& stats,
+ const uint64_t size) const override;
+
+ bool is_num_objs_exceeded(const char * const entity,
+ const RGWQuotaInfo& qinfo,
+ const RGWStorageStats& stats,
+ const uint64_t num_objs) const override;
+};
+
+class RGWQuotaInfoRawApplier : public RGWQuotaInfoApplier {
+public:
+ bool is_size_exceeded(const char * const entity,
+ const RGWQuotaInfo& qinfo,
+ const RGWStorageStats& stats,
+ const uint64_t size) const override;
+
+ bool is_num_objs_exceeded(const char * const entity,
+ const RGWQuotaInfo& qinfo,
+ const RGWStorageStats& stats,
+ const uint64_t num_objs) const override;
+};
+
+
+bool RGWQuotaInfoDefApplier::is_size_exceeded(const char * const entity,
+ const RGWQuotaInfo& qinfo,
+ const RGWStorageStats& stats,
+ const uint64_t size) const
+{
+ if (qinfo.max_size < 0) {
+ /* The limit is not enabled. */
+ return false;
+ }
+
+ const uint64_t cur_size = stats.size_rounded;
+ const uint64_t new_size = rgw_rounded_objsize(size);
+
+ if (cur_size + new_size > static_cast<uint64_t>(qinfo.max_size)) {
+ dout(10) << "quota exceeded: stats.size_rounded=" << stats.size_rounded
+ << " size=" << new_size << " "
+ << entity << "_quota.max_size=" << qinfo.max_size << dendl;
+ return true;
+ }
+
+ return false;
+}
+
+bool RGWQuotaInfoDefApplier::is_num_objs_exceeded(const char * const entity,
+ const RGWQuotaInfo& qinfo,
+ const RGWStorageStats& stats,
+ const uint64_t num_objs) const
+{
+ if (qinfo.max_objects < 0) {
+ /* The limit is not enabled. */
+ return false;
+ }
+
+ if (stats.num_objects + num_objs > static_cast<uint64_t>(qinfo.max_objects)) {
+ dout(10) << "quota exceeded: stats.num_objects=" << stats.num_objects
+ << " " << entity << "_quota.max_objects=" << qinfo.max_objects
+ << dendl;
+ return true;
+ }
+
+ return false;
+}
+
+bool RGWQuotaInfoRawApplier::is_size_exceeded(const char * const entity,
+ const RGWQuotaInfo& qinfo,
+ const RGWStorageStats& stats,
+ const uint64_t size) const
+{
+ if (qinfo.max_size < 0) {
+ /* The limit is not enabled. */
+ return false;
+ }
+
+ const uint64_t cur_size = stats.size;
+
+ if (cur_size + size > static_cast<uint64_t>(qinfo.max_size)) {
+ dout(10) << "quota exceeded: stats.size=" << stats.size
+ << " size=" << size << " "
+ << entity << "_quota.max_size=" << qinfo.max_size << dendl;
+ return true;
+ }
+
+ return false;
+}
+
+bool RGWQuotaInfoRawApplier::is_num_objs_exceeded(const char * const entity,
+ const RGWQuotaInfo& qinfo,
+ const RGWStorageStats& stats,
+ const uint64_t num_objs) const
+{
+ if (qinfo.max_objects < 0) {
+ /* The limit is not enabled. */
+ return false;
+ }
+
+ if (stats.num_objects + num_objs > static_cast<uint64_t>(qinfo.max_objects)) {
+ dout(10) << "quota exceeded: stats.num_objects=" << stats.num_objects
+ << " " << entity << "_quota.max_objects=" << qinfo.max_objects
+ << dendl;
+ return true;
+ }
+
+ return false;
+}
+
+const RGWQuotaInfoApplier& RGWQuotaInfoApplier::get_instance(
+ const RGWQuotaInfo& qinfo)
+{
+ static RGWQuotaInfoDefApplier default_qapplier;
+ static RGWQuotaInfoRawApplier raw_qapplier;
+
+ if (qinfo.check_on_raw) {
+ return raw_qapplier;
+ } else {
+ return default_qapplier;
+ }
+}
+
+
+class RGWQuotaHandlerImpl : public RGWQuotaHandler {
+ RGWRados *store;
+ RGWBucketStatsCache bucket_stats_cache;
+ RGWUserStatsCache user_stats_cache;
+
+ int check_quota(const char * const entity,
+ const RGWQuotaInfo& quota,
+ const RGWStorageStats& stats,
+ const uint64_t num_objs,
+ const uint64_t size) {
+ if (!quota.enabled) {
+ return 0;
+ }
+
+ const auto& quota_applier = RGWQuotaInfoApplier::get_instance(quota);
+
+ ldout(store->ctx(), 20) << entity
+ << " quota: max_objects=" << quota.max_objects
+ << " max_size=" << quota.max_size << dendl;
+
+
+ if (quota_applier.is_num_objs_exceeded(entity, quota, stats, num_objs)) {
+ return -ERR_QUOTA_EXCEEDED;
+ }
+
+ if (quota_applier.is_size_exceeded(entity, quota, stats, size)) {
+ return -ERR_QUOTA_EXCEEDED;
+ }
+
+ ldout(store->ctx(), 20) << entity << " quota OK:"
+ << " stats.num_objects=" << stats.num_objects
+ << " stats.size=" << stats.size << dendl;
+ return 0;
+ }
+public:
+ RGWQuotaHandlerImpl(RGWRados *_store, bool quota_threads) : store(_store),
+ bucket_stats_cache(_store),
+ user_stats_cache(_store, quota_threads) {}
+
+ int check_quota(const rgw_user& user,
+ rgw_bucket& bucket,
+ RGWQuotaInfo& user_quota,
+ RGWQuotaInfo& bucket_quota,
+ uint64_t num_objs,
+ uint64_t size) override {
+
+ if (!bucket_quota.enabled && !user_quota.enabled) {
+ return 0;
+ }
+
+ /*
+ * we need to fetch bucket stats if the user quota is enabled, because
+ * the whole system relies on us periodically updating the user's bucket
+ * stats in the user's header, this happens in get_stats() if we actually
+ * fetch that info and not rely on cached data
+ */
+
+ if (bucket_quota.enabled) {
+ RGWStorageStats bucket_stats;
+ int ret = bucket_stats_cache.get_stats(user, bucket, bucket_stats,
+ bucket_quota);
+ if (ret < 0) {
+ return ret;
+ }
+ ret = check_quota("bucket", bucket_quota, bucket_stats, num_objs, size);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ if (user_quota.enabled) {
+ RGWStorageStats user_stats;
+ int ret = user_stats_cache.get_stats(user, bucket, user_stats, user_quota);
+ if (ret < 0) {
+ return ret;
+ }
+ ret = check_quota("user", user_quota, user_stats, num_objs, size);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+ return 0;
+ }
+
+ void update_stats(const rgw_user& user, rgw_bucket& bucket, int obj_delta, uint64_t added_bytes, uint64_t removed_bytes) override {
+ bucket_stats_cache.adjust_stats(user, bucket, obj_delta, added_bytes, removed_bytes);
+ user_stats_cache.adjust_stats(user, bucket, obj_delta, added_bytes, removed_bytes);
+ }
+
+ int check_bucket_shards(uint64_t max_objs_per_shard, uint64_t num_shards,
+ const rgw_user& user, const rgw_bucket& bucket, RGWQuotaInfo& bucket_quota,
+ uint64_t num_objs, bool& need_resharding, uint32_t *suggested_num_shards) override
+ {
+ RGWStorageStats bucket_stats;
+ int ret = bucket_stats_cache.get_stats(user, bucket, bucket_stats,
+ bucket_quota);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (bucket_stats.num_objects + num_objs > num_shards * max_objs_per_shard) {
+ ldout(store->ctx(), 0) << __func__ << ": resharding needed: stats.num_objects=" << bucket_stats.num_objects
+ << " shard max_objects=" << max_objs_per_shard * num_shards << dendl;
+ need_resharding = true;
+ if (suggested_num_shards) {
+ *suggested_num_shards = (bucket_stats.num_objects + num_objs) * 2 / max_objs_per_shard;
+ }
+ } else {
+ need_resharding = false;
+ }
+
+ return 0;
+ }
+
+};
+
+
+RGWQuotaHandler *RGWQuotaHandler::generate_handler(RGWRados *store, bool quota_threads)
+{
+ return new RGWQuotaHandlerImpl(store, quota_threads);
+}
+
+void RGWQuotaHandler::free_handler(RGWQuotaHandler *handler)
+{
+ delete handler;
+}
+
+
+void rgw_apply_default_bucket_quota(RGWQuotaInfo& quota, const ConfigProxy& conf)
+{
+ if (conf->rgw_bucket_default_quota_max_objects >= 0) {
+ quota.max_objects = conf->rgw_bucket_default_quota_max_objects;
+ quota.enabled = true;
+ }
+ if (conf->rgw_bucket_default_quota_max_size >= 0) {
+ quota.max_size = conf->rgw_bucket_default_quota_max_size;
+ quota.enabled = true;
+ }
+}
+
+void rgw_apply_default_user_quota(RGWQuotaInfo& quota, const ConfigProxy& conf)
+{
+ if (conf->rgw_user_default_quota_max_objects >= 0) {
+ quota.max_objects = conf->rgw_user_default_quota_max_objects;
+ quota.enabled = true;
+ }
+ if (conf->rgw_user_default_quota_max_size >= 0) {
+ quota.max_size = conf->rgw_user_default_quota_max_size;
+ quota.enabled = true;
+ }
+}
diff --git a/src/rgw/rgw_quota.h b/src/rgw/rgw_quota.h
new file mode 100644
index 00000000..a048aa7d
--- /dev/null
+++ b/src/rgw/rgw_quota.h
@@ -0,0 +1,123 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2013 Inktank, Inc
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_RGW_QUOTA_H
+#define CEPH_RGW_QUOTA_H
+
+#include "include/utime.h"
+#include "common/config_fwd.h"
+#include "common/lru_map.h"
+
+#include <atomic>
+
+static inline int64_t rgw_rounded_kb(int64_t bytes)
+{
+ return (bytes + 1023) / 1024;
+}
+
+class RGWRados;
+class JSONObj;
+
+struct RGWQuotaInfo {
+ template<class T> friend class RGWQuotaCache;
+protected:
+ /* The quota thresholds after which comparing against cached storage stats
+ * is disallowed. Those fields may be accessed only by the RGWQuotaCache.
+ * They are not intended as tunables but rather as a mean to store results
+ * of repeating calculations in the quota cache subsystem. */
+ int64_t max_size_soft_threshold;
+ int64_t max_objs_soft_threshold;
+
+public:
+ int64_t max_size;
+ int64_t max_objects;
+ bool enabled;
+ /* Do we want to compare with raw, not rounded RGWStorageStats::size (true)
+ * or maybe rounded-to-4KiB RGWStorageStats::size_rounded (false)? */
+ bool check_on_raw;
+
+ RGWQuotaInfo()
+ : max_size_soft_threshold(-1),
+ max_objs_soft_threshold(-1),
+ max_size(-1),
+ max_objects(-1),
+ enabled(false),
+ check_on_raw(false) {
+ }
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(3, 1, bl);
+ if (max_size < 0) {
+ encode(-rgw_rounded_kb(abs(max_size)), bl);
+ } else {
+ encode(rgw_rounded_kb(max_size), bl);
+ }
+ encode(max_objects, bl);
+ encode(enabled, bl);
+ encode(max_size, bl);
+ encode(check_on_raw, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START_LEGACY_COMPAT_LEN(3, 1, 1, bl);
+ int64_t max_size_kb;
+ decode(max_size_kb, bl);
+ decode(max_objects, bl);
+ decode(enabled, bl);
+ if (struct_v < 2) {
+ max_size = max_size_kb * 1024;
+ } else {
+ decode(max_size, bl);
+ }
+ if (struct_v >= 3) {
+ decode(check_on_raw, bl);
+ }
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const;
+
+ void decode_json(JSONObj *obj);
+
+};
+WRITE_CLASS_ENCODER(RGWQuotaInfo)
+
+struct rgw_bucket;
+
+class RGWQuotaHandler {
+public:
+ RGWQuotaHandler() {}
+ virtual ~RGWQuotaHandler() {
+ }
+ virtual int check_quota(const rgw_user& bucket_owner, rgw_bucket& bucket,
+ RGWQuotaInfo& user_quota, RGWQuotaInfo& bucket_quota,
+ uint64_t num_objs, uint64_t size) = 0;
+
+ virtual int check_bucket_shards(uint64_t max_objs_per_shard, uint64_t num_shards,
+ const rgw_user& bucket_owner, const rgw_bucket& bucket,
+ RGWQuotaInfo& bucket_quota, uint64_t num_objs, bool& need_resharding,
+ uint32_t *suggested_num_shards) = 0;
+
+ virtual void update_stats(const rgw_user& bucket_owner, rgw_bucket& bucket, int obj_delta, uint64_t added_bytes, uint64_t removed_bytes) = 0;
+
+ static RGWQuotaHandler *generate_handler(RGWRados *store, bool quota_threads);
+ static void free_handler(RGWQuotaHandler *handler);
+};
+
+// apply default quotas from configuration
+void rgw_apply_default_bucket_quota(RGWQuotaInfo& quota, const ConfigProxy& conf);
+void rgw_apply_default_user_quota(RGWQuotaInfo& quota, const ConfigProxy& conf);
+
+#endif
diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc
new file mode 100644
index 00000000..4d3ae0b8
--- /dev/null
+++ b/src/rgw/rgw_rados.cc
@@ -0,0 +1,10734 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "include/compat.h"
+#include <errno.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sstream>
+
+#include <boost/algorithm/string.hpp>
+#include <string_view>
+
+#include <boost/container/flat_set.hpp>
+#include <boost/format.hpp>
+#include <boost/optional.hpp>
+#include <boost/utility/in_place_factory.hpp>
+
+#include "common/ceph_json.h"
+
+#include "common/errno.h"
+#include "common/Formatter.h"
+#include "common/Throttle.h"
+
+#include "rgw_rados.h"
+#include "rgw_zone.h"
+#include "rgw_cache.h"
+#include "rgw_acl.h"
+#include "rgw_acl_s3.h" /* for dumping s3policy in debug log */
+#include "rgw_aio_throttle.h"
+#include "rgw_bucket.h"
+#include "rgw_rest_conn.h"
+#include "rgw_cr_rados.h"
+#include "rgw_cr_rest.h"
+#include "rgw_putobj_processor.h"
+
+#include "cls/rgw/cls_rgw_ops.h"
+#include "cls/rgw/cls_rgw_client.h"
+#include "cls/rgw/cls_rgw_const.h"
+#include "cls/refcount/cls_refcount_client.h"
+#include "cls/version/cls_version_client.h"
+#include "cls/log/cls_log_client.h"
+#include "cls/timeindex/cls_timeindex_client.h"
+#include "cls/lock/cls_lock_client.h"
+#include "cls/user/cls_user_client.h"
+#include "cls/otp/cls_otp_client.h"
+#include "osd/osd_types.h"
+
+#include "rgw_tools.h"
+#include "rgw_coroutine.h"
+#include "rgw_compression.h"
+#include "rgw_etag_verifier.h"
+
+#undef fork // fails to compile RGWPeriod::fork() below
+
+#include "common/Clock.h"
+
+using namespace librados;
+
+#include <string>
+#include <iostream>
+#include <vector>
+#include <atomic>
+#include <list>
+#include <map>
+#include "include/random.h"
+
+#include "rgw_gc.h"
+#include "rgw_lc.h"
+
+#include "rgw_object_expirer_core.h"
+#include "rgw_sync.h"
+#include "rgw_sync_counters.h"
+#include "rgw_sync_trace.h"
+#include "rgw_data_sync.h"
+#include "rgw_realm_watcher.h"
+#include "rgw_reshard.h"
+
+#include "services/svc_zone.h"
+#include "services/svc_zone_utils.h"
+#include "services/svc_quota.h"
+#include "services/svc_sync_modules.h"
+#include "services/svc_sys_obj.h"
+#include "services/svc_sys_obj_cache.h"
+
+#include "compressor/Compressor.h"
+
+#ifdef WITH_LTTNG
+#define TRACEPOINT_DEFINE
+#define TRACEPOINT_PROBE_DYNAMIC_LINKAGE
+#include "tracing/rgw_rados.h"
+#undef TRACEPOINT_PROBE_DYNAMIC_LINKAGE
+#undef TRACEPOINT_DEFINE
+#else
+#define tracepoint(...)
+#endif
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+
+static string shadow_ns = "shadow";
+static string dir_oid_prefix = ".dir.";
+static string default_bucket_index_pool_suffix = "rgw.buckets.index";
+static string default_storage_extra_pool_suffix = "rgw.buckets.non-ec";
+
+static string log_lock_name = "rgw_log_lock";
+static RGWObjCategory main_category = RGWObjCategory::Main;
+#define RGW_USAGE_OBJ_PREFIX "usage."
+
+#define dout_subsys ceph_subsys_rgw
+
+const std::string MP_META_SUFFIX = ".meta";
+
+
+static bool rgw_get_obj_data_pool(const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params,
+ const rgw_placement_rule& head_placement_rule,
+ const rgw_obj& obj, rgw_pool *pool)
+{
+ if (!zone_params.get_head_data_pool(head_placement_rule, obj, pool)) {
+ RGWZonePlacementInfo placement;
+ if (!zone_params.get_placement(zonegroup.default_placement.name, &placement)) {
+ return false;
+ }
+
+ if (!obj.in_extra_data) {
+ *pool = placement.get_data_pool(zonegroup.default_placement.storage_class);
+ } else {
+ *pool = placement.get_data_extra_pool();
+ }
+ }
+
+ return true;
+}
+
+static bool rgw_obj_to_raw(const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params,
+ const rgw_placement_rule& head_placement_rule,
+ const rgw_obj& obj, rgw_raw_obj *raw_obj)
+{
+ get_obj_bucket_and_oid_loc(obj, raw_obj->oid, raw_obj->loc);
+
+ return rgw_get_obj_data_pool(zonegroup, zone_params, head_placement_rule, obj, &raw_obj->pool);
+}
+
+rgw_raw_obj rgw_obj_select::get_raw_obj(const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params) const
+{
+ if (!is_raw) {
+ rgw_raw_obj r;
+ rgw_obj_to_raw(zonegroup, zone_params, placement_rule, obj, &r);
+ return r;
+ }
+ return raw_obj;
+}
+
+rgw_raw_obj rgw_obj_select::get_raw_obj(RGWRados *store) const
+{
+ if (!is_raw) {
+ rgw_raw_obj r;
+ store->obj_to_raw(placement_rule, obj, &r);
+ return r;
+ }
+ return raw_obj;
+}
+
+void RGWObjVersionTracker::prepare_op_for_read(ObjectReadOperation *op)
+{
+ obj_version *check_objv = version_for_check();
+
+ if (check_objv) {
+ cls_version_check(*op, *check_objv, VER_COND_EQ);
+ }
+
+ cls_version_read(*op, &read_version);
+}
+
+void RGWObjVersionTracker::prepare_op_for_write(ObjectWriteOperation *op)
+{
+ obj_version *check_objv = version_for_check();
+ obj_version *modify_version = version_for_write();
+
+ if (check_objv) {
+ cls_version_check(*op, *check_objv, VER_COND_EQ);
+ }
+
+ if (modify_version) {
+ cls_version_set(*op, *modify_version);
+ } else {
+ cls_version_inc(*op);
+ }
+}
+
+void RGWObjVersionTracker::apply_write()
+{
+ const bool checked = (read_version.ver != 0);
+ const bool incremented = (write_version.ver == 0);
+
+ if (checked && incremented) {
+ // apply cls_version_inc() so our next operation can recheck it
+ ++read_version.ver;
+ } else {
+ read_version = write_version;
+ }
+ write_version = obj_version();
+}
+
+void RGWObjManifest::obj_iterator::operator++()
+{
+ if (manifest->explicit_objs) {
+ ++explicit_iter;
+
+ update_explicit_pos();
+
+ update_location();
+ return;
+ }
+
+ uint64_t obj_size = manifest->get_obj_size();
+ uint64_t head_size = manifest->get_head_size();
+
+ if (ofs == obj_size) {
+ return;
+ }
+
+ if (manifest->rules.empty()) {
+ return;
+ }
+
+ /* are we still pointing at the head? */
+ if (ofs < head_size) {
+ rule_iter = manifest->rules.begin();
+ RGWObjManifestRule *rule = &rule_iter->second;
+ ofs = std::min(head_size, obj_size);
+ stripe_ofs = ofs;
+ cur_stripe = 1;
+ stripe_size = std::min(obj_size - ofs, rule->stripe_max_size);
+ if (rule->part_size > 0) {
+ stripe_size = std::min(stripe_size, rule->part_size);
+ }
+ update_location();
+ return;
+ }
+
+ RGWObjManifestRule *rule = &rule_iter->second;
+
+ stripe_ofs += rule->stripe_max_size;
+ cur_stripe++;
+ dout(20) << "RGWObjManifest::operator++(): rule->part_size=" << rule->part_size << " rules.size()=" << manifest->rules.size() << dendl;
+
+ if (rule->part_size > 0) {
+ /* multi part, multi stripes object */
+
+ dout(20) << "RGWObjManifest::operator++(): stripe_ofs=" << stripe_ofs << " part_ofs=" << part_ofs << " rule->part_size=" << rule->part_size << dendl;
+
+ if (stripe_ofs >= part_ofs + rule->part_size) {
+ /* moved to the next part */
+ cur_stripe = 0;
+ part_ofs += rule->part_size;
+ stripe_ofs = part_ofs;
+
+ bool last_rule = (next_rule_iter == manifest->rules.end());
+ /* move to the next rule? */
+ if (!last_rule && stripe_ofs >= next_rule_iter->second.start_ofs) {
+ rule_iter = next_rule_iter;
+ last_rule = (next_rule_iter == manifest->rules.end());
+ if (!last_rule) {
+ ++next_rule_iter;
+ }
+ cur_part_id = rule_iter->second.start_part_num;
+ } else {
+ cur_part_id++;
+ }
+
+ rule = &rule_iter->second;
+ }
+
+ stripe_size = std::min(rule->part_size - (stripe_ofs - part_ofs), rule->stripe_max_size);
+ }
+
+ cur_override_prefix = rule->override_prefix;
+
+ ofs = stripe_ofs;
+ if (ofs > obj_size) {
+ ofs = obj_size;
+ stripe_ofs = ofs;
+ stripe_size = 0;
+ }
+
+ dout(20) << "RGWObjManifest::operator++(): result: ofs=" << ofs << " stripe_ofs=" << stripe_ofs << " part_ofs=" << part_ofs << " rule->part_size=" << rule->part_size << dendl;
+ update_location();
+}
+
+int RGWObjManifest::generator::create_begin(CephContext *cct, RGWObjManifest *_m,
+ const rgw_placement_rule& head_placement_rule,
+ const rgw_placement_rule *tail_placement_rule,
+ const rgw_bucket& _b, const rgw_obj& _obj)
+{
+ manifest = _m;
+
+ if (!tail_placement_rule) {
+ manifest->set_tail_placement(head_placement_rule, _b);
+ } else {
+ rgw_placement_rule new_tail_rule = *tail_placement_rule;
+ new_tail_rule.inherit_from(head_placement_rule);
+ manifest->set_tail_placement(new_tail_rule, _b);
+ }
+
+ manifest->set_head(head_placement_rule, _obj, 0);
+ last_ofs = 0;
+
+ if (manifest->get_prefix().empty()) {
+ char buf[33];
+ gen_rand_alphanumeric(cct, buf, sizeof(buf) - 1);
+
+ string oid_prefix = ".";
+ oid_prefix.append(buf);
+ oid_prefix.append("_");
+
+ manifest->set_prefix(oid_prefix);
+ }
+
+ bool found = manifest->get_rule(0, &rule);
+ if (!found) {
+ derr << "ERROR: manifest->get_rule() could not find rule" << dendl;
+ return -EIO;
+ }
+
+ uint64_t head_size = manifest->get_head_size();
+
+ if (head_size > 0) {
+ cur_stripe_size = head_size;
+ } else {
+ cur_stripe_size = rule.stripe_max_size;
+ }
+
+ cur_part_id = rule.start_part_num;
+
+ manifest->get_implicit_location(cur_part_id, cur_stripe, 0, NULL, &cur_obj);
+
+ // Normal object which not generated through copy operation
+ manifest->set_tail_instance(_obj.key.instance);
+
+ manifest->update_iterators();
+
+ return 0;
+}
+
+int RGWObjManifest::generator::create_next(uint64_t ofs)
+{
+ if (ofs < last_ofs) /* only going forward */
+ return -EINVAL;
+
+ uint64_t max_head_size = manifest->get_max_head_size();
+
+ if (ofs < max_head_size) {
+ manifest->set_head_size(ofs);
+ }
+
+ if (ofs >= max_head_size) {
+ manifest->set_head_size(max_head_size);
+ cur_stripe = (ofs - max_head_size) / rule.stripe_max_size;
+ cur_stripe_size = rule.stripe_max_size;
+
+ if (cur_part_id == 0 && max_head_size > 0) {
+ cur_stripe++;
+ }
+ }
+
+ last_ofs = ofs;
+ manifest->set_obj_size(ofs);
+
+ manifest->get_implicit_location(cur_part_id, cur_stripe, ofs, NULL, &cur_obj);
+
+ manifest->update_iterators();
+
+ return 0;
+}
+
+const RGWObjManifest::obj_iterator& RGWObjManifest::obj_begin()
+{
+ return begin_iter;
+}
+
+const RGWObjManifest::obj_iterator& RGWObjManifest::obj_end()
+{
+ return end_iter;
+}
+
+RGWObjManifest::obj_iterator RGWObjManifest::obj_find(uint64_t ofs)
+{
+ if (ofs > obj_size) {
+ ofs = obj_size;
+ }
+ RGWObjManifest::obj_iterator iter(this);
+ iter.seek(ofs);
+ return iter;
+}
+
+int RGWObjManifest::append(RGWObjManifest& m, const RGWZoneGroup& zonegroup,
+ const RGWZoneParams& zone_params)
+{
+ if (explicit_objs || m.explicit_objs) {
+ return append_explicit(m, zonegroup, zone_params);
+ }
+
+ if (rules.empty()) {
+ *this = m;
+ return 0;
+ }
+
+ string override_prefix;
+
+ if (prefix.empty()) {
+ prefix = m.prefix;
+ }
+
+ if (prefix != m.prefix) {
+ override_prefix = m.prefix;
+ }
+
+ map<uint64_t, RGWObjManifestRule>::iterator miter = m.rules.begin();
+ if (miter == m.rules.end()) {
+ return append_explicit(m, zonegroup, zone_params);
+ }
+
+ for (; miter != m.rules.end(); ++miter) {
+ map<uint64_t, RGWObjManifestRule>::reverse_iterator last_rule = rules.rbegin();
+
+ RGWObjManifestRule& rule = last_rule->second;
+
+ if (rule.part_size == 0) {
+ rule.part_size = obj_size - rule.start_ofs;
+ }
+
+ RGWObjManifestRule& next_rule = miter->second;
+ if (!next_rule.part_size) {
+ next_rule.part_size = m.obj_size - next_rule.start_ofs;
+ }
+
+ string rule_prefix = prefix;
+ if (!rule.override_prefix.empty()) {
+ rule_prefix = rule.override_prefix;
+ }
+
+ string next_rule_prefix = m.prefix;
+ if (!next_rule.override_prefix.empty()) {
+ next_rule_prefix = next_rule.override_prefix;
+ }
+
+ if (rule.part_size != next_rule.part_size ||
+ rule.stripe_max_size != next_rule.stripe_max_size ||
+ rule_prefix != next_rule_prefix) {
+ if (next_rule_prefix != prefix) {
+ append_rules(m, miter, &next_rule_prefix);
+ } else {
+ append_rules(m, miter, NULL);
+ }
+ break;
+ }
+
+ uint64_t expected_part_num = rule.start_part_num + 1;
+ if (rule.part_size > 0) {
+ expected_part_num = rule.start_part_num + (obj_size + next_rule.start_ofs - rule.start_ofs) / rule.part_size;
+ }
+
+ if (expected_part_num != next_rule.start_part_num) {
+ append_rules(m, miter, NULL);
+ break;
+ }
+ }
+
+ set_obj_size(obj_size + m.obj_size);
+
+ return 0;
+}
+
+int RGWObjManifest::append(RGWObjManifest& m, RGWSI_Zone *zone_svc)
+{
+ return append(m, zone_svc->get_zonegroup(), zone_svc->get_zone_params());
+}
+
+void RGWObjManifest::append_rules(RGWObjManifest& m, map<uint64_t, RGWObjManifestRule>::iterator& miter,
+ string *override_prefix)
+{
+ for (; miter != m.rules.end(); ++miter) {
+ RGWObjManifestRule rule = miter->second;
+ rule.start_ofs += obj_size;
+ if (override_prefix)
+ rule.override_prefix = *override_prefix;
+ rules[rule.start_ofs] = rule;
+ }
+}
+
+void RGWObjManifest::convert_to_explicit(const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params)
+{
+ if (explicit_objs) {
+ return;
+ }
+ obj_iterator iter = obj_begin();
+
+ while (iter != obj_end()) {
+ RGWObjManifestPart& part = objs[iter.get_stripe_ofs()];
+ const rgw_obj_select& os = iter.get_location();
+ const rgw_raw_obj& raw_loc = os.get_raw_obj(zonegroup, zone_params);
+ part.loc_ofs = 0;
+
+ uint64_t ofs = iter.get_stripe_ofs();
+
+ if (ofs == 0) {
+ part.loc = obj;
+ } else {
+ rgw_raw_obj_to_obj(tail_placement.bucket, raw_loc, &part.loc);
+ }
+ ++iter;
+ uint64_t next_ofs = iter.get_stripe_ofs();
+
+ part.size = next_ofs - ofs;
+ }
+
+ explicit_objs = true;
+ rules.clear();
+ prefix.clear();
+}
+
+int RGWObjManifest::append_explicit(RGWObjManifest& m, const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params)
+{
+ if (!explicit_objs) {
+ convert_to_explicit(zonegroup, zone_params);
+ }
+ if (!m.explicit_objs) {
+ m.convert_to_explicit(zonegroup, zone_params);
+ }
+ map<uint64_t, RGWObjManifestPart>::iterator iter;
+ uint64_t base = obj_size;
+ for (iter = m.objs.begin(); iter != m.objs.end(); ++iter) {
+ RGWObjManifestPart& part = iter->second;
+ objs[base + iter->first] = part;
+ }
+ obj_size += m.obj_size;
+
+ return 0;
+}
+
+bool RGWObjManifest::get_rule(uint64_t ofs, RGWObjManifestRule *rule)
+{
+ if (rules.empty()) {
+ return false;
+ }
+
+ map<uint64_t, RGWObjManifestRule>::iterator iter = rules.upper_bound(ofs);
+ if (iter != rules.begin()) {
+ --iter;
+ }
+
+ *rule = iter->second;
+
+ return true;
+}
+
+void RGWObjVersionTracker::generate_new_write_ver(CephContext *cct)
+{
+ write_version.ver = 1;
+#define TAG_LEN 24
+
+ write_version.tag.clear();
+ append_rand_alpha(cct, write_version.tag, write_version.tag, TAG_LEN);
+}
+
+class RGWMetaNotifierManager : public RGWCoroutinesManager {
+ RGWRados *store;
+ RGWHTTPManager http_manager;
+
+public:
+ RGWMetaNotifierManager(RGWRados *_store) : RGWCoroutinesManager(_store->ctx(), _store->get_cr_registry()), store(_store),
+ http_manager(store->ctx(), completion_mgr) {
+ http_manager.start();
+ }
+
+ int notify_all(map<string, RGWRESTConn *>& conn_map, set<int>& shards) {
+ rgw_http_param_pair pairs[] = { { "type", "metadata" },
+ { "notify", NULL },
+ { NULL, NULL } };
+
+ list<RGWCoroutinesStack *> stacks;
+ for (map<string, RGWRESTConn *>::iterator iter = conn_map.begin(); iter != conn_map.end(); ++iter) {
+ RGWRESTConn *conn = iter->second;
+ RGWCoroutinesStack *stack = new RGWCoroutinesStack(store->ctx(), this);
+ stack->call(new RGWPostRESTResourceCR<set<int>, int>(store->ctx(), conn, &http_manager, "/admin/log", pairs, shards, NULL));
+
+ stacks.push_back(stack);
+ }
+ return run(stacks);
+ }
+};
+
+class RGWDataNotifierManager : public RGWCoroutinesManager {
+ RGWRados *store;
+ RGWHTTPManager http_manager;
+
+public:
+ RGWDataNotifierManager(RGWRados *_store) : RGWCoroutinesManager(_store->ctx(), _store->get_cr_registry()), store(_store),
+ http_manager(store->ctx(), completion_mgr) {
+ http_manager.start();
+ }
+
+ int notify_all(map<string, RGWRESTConn *>& conn_map, map<int, set<string> >& shards) {
+ rgw_http_param_pair pairs[] = { { "type", "data" },
+ { "notify", NULL },
+ { "source-zone", store->svc.zone->get_zone_params().get_id().c_str() },
+ { NULL, NULL } };
+
+ list<RGWCoroutinesStack *> stacks;
+ for (map<string, RGWRESTConn *>::iterator iter = conn_map.begin(); iter != conn_map.end(); ++iter) {
+ RGWRESTConn *conn = iter->second;
+ RGWCoroutinesStack *stack = new RGWCoroutinesStack(store->ctx(), this);
+ stack->call(new RGWPostRESTResourceCR<map<int, set<string> >, int>(store->ctx(), conn, &http_manager, "/admin/log", pairs, shards, NULL));
+
+ stacks.push_back(stack);
+ }
+ return run(stacks);
+ }
+};
+
+/* class RGWRadosThread */
+
+void RGWRadosThread::start()
+{
+ worker = new Worker(cct, this);
+ worker->create(thread_name.c_str());
+}
+
+void RGWRadosThread::stop()
+{
+ down_flag = true;
+ stop_process();
+ if (worker) {
+ worker->signal();
+ worker->join();
+ }
+ delete worker;
+ worker = NULL;
+}
+
+void *RGWRadosThread::Worker::entry() {
+ uint64_t msec = processor->interval_msec();
+ utime_t interval = utime_t(msec / 1000, (msec % 1000) * 1000000);
+
+ do {
+ utime_t start = ceph_clock_now();
+ int r = processor->process();
+ if (r < 0) {
+ dout(0) << "ERROR: processor->process() returned error r=" << r << dendl;
+ }
+
+ if (processor->going_down())
+ break;
+
+ utime_t end = ceph_clock_now();
+ end -= start;
+
+ uint64_t cur_msec = processor->interval_msec();
+ if (cur_msec != msec) { /* was it reconfigured? */
+ msec = cur_msec;
+ interval = utime_t(msec / 1000, (msec % 1000) * 1000000);
+ }
+
+ if (cur_msec > 0) {
+ if (interval <= end)
+ continue; // next round
+
+ utime_t wait_time = interval;
+ wait_time -= end;
+
+ wait_interval(wait_time);
+ } else {
+ wait();
+ }
+ } while (!processor->going_down());
+
+ return NULL;
+}
+
+class RGWMetaNotifier : public RGWRadosThread {
+ RGWMetaNotifierManager notify_mgr;
+ RGWMetadataLog *const log;
+
+ uint64_t interval_msec() override {
+ return cct->_conf->rgw_md_notify_interval_msec;
+ }
+ void stop_process() override {
+ notify_mgr.stop();
+ }
+public:
+ RGWMetaNotifier(RGWRados *_store, RGWMetadataLog* log)
+ : RGWRadosThread(_store, "meta-notifier"), notify_mgr(_store), log(log) {}
+
+ int process() override;
+};
+
+int RGWMetaNotifier::process()
+{
+ set<int> shards;
+
+ log->read_clear_modified(shards);
+
+ if (shards.empty()) {
+ return 0;
+ }
+
+ for (set<int>::iterator iter = shards.begin(); iter != shards.end(); ++iter) {
+ ldout(cct, 20) << __func__ << "(): notifying mdlog change, shard_id=" << *iter << dendl;
+ }
+
+ notify_mgr.notify_all(store->svc.zone->get_zone_conn_map(), shards);
+
+ return 0;
+}
+
+class RGWDataNotifier : public RGWRadosThread {
+ RGWDataNotifierManager notify_mgr;
+
+ uint64_t interval_msec() override {
+ return cct->_conf.get_val<int64_t>("rgw_data_notify_interval_msec");
+ }
+ void stop_process() override {
+ notify_mgr.stop();
+ }
+public:
+ RGWDataNotifier(RGWRados *_store) : RGWRadosThread(_store, "data-notifier"), notify_mgr(_store) {}
+
+ int process() override;
+};
+
+int RGWDataNotifier::process()
+{
+ if (!store->data_log) {
+ return 0;
+ }
+
+ map<int, set<string> > shards;
+
+ store->data_log->read_clear_modified(shards);
+
+ if (shards.empty()) {
+ return 0;
+ }
+
+ for (map<int, set<string> >::iterator iter = shards.begin(); iter != shards.end(); ++iter) {
+ ldout(cct, 20) << __func__ << "(): notifying datalog change, shard_id=" << iter->first << ": " << iter->second << dendl;
+ }
+
+ notify_mgr.notify_all(store->svc.zone->get_zone_data_notify_to_map(), shards);
+
+ return 0;
+}
+
+class RGWSyncProcessorThread : public RGWRadosThread {
+public:
+ RGWSyncProcessorThread(RGWRados *_store, const string& thread_name = "radosgw") : RGWRadosThread(_store, thread_name) {}
+ RGWSyncProcessorThread(RGWRados *_store) : RGWRadosThread(_store) {}
+ ~RGWSyncProcessorThread() override {}
+ int init() override = 0 ;
+ int process() override = 0;
+};
+
+class RGWMetaSyncProcessorThread : public RGWSyncProcessorThread
+{
+ RGWMetaSyncStatusManager sync;
+
+ uint64_t interval_msec() override {
+ return 0; /* no interval associated, it'll run once until stopped */
+ }
+ void stop_process() override {
+ sync.stop();
+ }
+public:
+ RGWMetaSyncProcessorThread(RGWRados *_store, RGWAsyncRadosProcessor *async_rados)
+ : RGWSyncProcessorThread(_store, "meta-sync"), sync(_store, async_rados) {}
+
+ void wakeup_sync_shards(set<int>& shard_ids) {
+ for (set<int>::iterator iter = shard_ids.begin(); iter != shard_ids.end(); ++iter) {
+ sync.wakeup(*iter);
+ }
+ }
+ RGWMetaSyncStatusManager* get_manager() { return &sync; }
+
+ int init() override {
+ int ret = sync.init();
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "ERROR: sync.init() returned " << ret << dendl;
+ return ret;
+ }
+ return 0;
+ }
+
+ int process() override {
+ sync.run();
+ return 0;
+ }
+};
+
+class RGWDataSyncProcessorThread : public RGWSyncProcessorThread
+{
+ PerfCountersRef counters;
+ RGWDataSyncStatusManager sync;
+ bool initialized;
+
+ uint64_t interval_msec() override {
+ if (initialized) {
+ return 0; /* no interval associated, it'll run once until stopped */
+ } else {
+#define DATA_SYNC_INIT_WAIT_SEC 20
+ return DATA_SYNC_INIT_WAIT_SEC * 1000;
+ }
+ }
+ void stop_process() override {
+ sync.stop();
+ }
+public:
+ RGWDataSyncProcessorThread(RGWRados *_store, RGWAsyncRadosProcessor *async_rados,
+ const RGWZone* source_zone)
+ : RGWSyncProcessorThread(_store, "data-sync"),
+ counters(sync_counters::build(store->ctx(), std::string("data-sync-from-") + source_zone->name)),
+ sync(_store, async_rados, source_zone->id, counters.get()),
+ initialized(false) {}
+
+ void wakeup_sync_shards(map<int, set<string> >& shard_ids) {
+ for (map<int, set<string> >::iterator iter = shard_ids.begin(); iter != shard_ids.end(); ++iter) {
+ sync.wakeup(iter->first, iter->second);
+ }
+ }
+ RGWDataSyncStatusManager* get_manager() { return &sync; }
+
+ int init() override {
+ return 0;
+ }
+
+ int process() override {
+ while (!initialized) {
+ if (going_down()) {
+ return 0;
+ }
+ int ret = sync.init();
+ if (ret >= 0) {
+ initialized = true;
+ break;
+ }
+ /* we'll be back! */
+ return 0;
+ }
+ sync.run();
+ return 0;
+ }
+};
+
+class RGWSyncLogTrimThread : public RGWSyncProcessorThread, DoutPrefixProvider
+{
+ RGWCoroutinesManager crs;
+ RGWRados *store;
+ rgw::BucketTrimManager *bucket_trim;
+ RGWHTTPManager http;
+ const utime_t trim_interval;
+
+ uint64_t interval_msec() override { return 0; }
+ void stop_process() override { crs.stop(); }
+public:
+ RGWSyncLogTrimThread(RGWRados *store, rgw::BucketTrimManager *bucket_trim,
+ int interval)
+ : RGWSyncProcessorThread(store, "sync-log-trim"),
+ crs(store->ctx(), store->get_cr_registry()), store(store),
+ bucket_trim(bucket_trim),
+ http(store->ctx(), crs.get_completion_mgr()),
+ trim_interval(interval, 0)
+ {}
+
+ int init() override {
+ return http.start();
+ }
+ int process() override {
+ list<RGWCoroutinesStack*> stacks;
+ auto meta = new RGWCoroutinesStack(store->ctx(), &crs);
+ meta->call(create_meta_log_trim_cr(this, store, &http,
+ cct->_conf->rgw_md_log_max_shards,
+ trim_interval));
+ stacks.push_back(meta);
+
+ auto data = new RGWCoroutinesStack(store->ctx(), &crs);
+ data->call(create_data_log_trim_cr(store, &http,
+ cct->_conf->rgw_data_log_num_shards,
+ trim_interval));
+ stacks.push_back(data);
+
+ auto bucket = new RGWCoroutinesStack(store->ctx(), &crs);
+ bucket->call(bucket_trim->create_bucket_trim_cr(&http));
+ stacks.push_back(bucket);
+
+ crs.run(stacks);
+ return 0;
+ }
+
+ // implements DoutPrefixProvider
+ CephContext *get_cct() const override { return store->ctx(); }
+ unsigned get_subsys() const
+ {
+ return dout_subsys;
+ }
+
+ std::ostream& gen_prefix(std::ostream& out) const
+ {
+ return out << "sync log trim: ";
+ }
+
+};
+
+void RGWRados::wakeup_meta_sync_shards(set<int>& shard_ids)
+{
+ Mutex::Locker l(meta_sync_thread_lock);
+ if (meta_sync_processor_thread) {
+ meta_sync_processor_thread->wakeup_sync_shards(shard_ids);
+ }
+}
+
+void RGWRados::wakeup_data_sync_shards(const string& source_zone, map<int, set<string> >& shard_ids)
+{
+ ldout(ctx(), 20) << __func__ << ": source_zone=" << source_zone << ", shard_ids=" << shard_ids << dendl;
+ Mutex::Locker l(data_sync_thread_lock);
+ map<string, RGWDataSyncProcessorThread *>::iterator iter = data_sync_processor_threads.find(source_zone);
+ if (iter == data_sync_processor_threads.end()) {
+ ldout(ctx(), 10) << __func__ << ": couldn't find sync thread for zone " << source_zone << ", skipping async data sync processing" << dendl;
+ return;
+ }
+
+ RGWDataSyncProcessorThread *thread = iter->second;
+ ceph_assert(thread);
+ thread->wakeup_sync_shards(shard_ids);
+}
+
+RGWMetaSyncStatusManager* RGWRados::get_meta_sync_manager()
+{
+ Mutex::Locker l(meta_sync_thread_lock);
+ if (meta_sync_processor_thread) {
+ return meta_sync_processor_thread->get_manager();
+ }
+ return nullptr;
+}
+
+RGWDataSyncStatusManager* RGWRados::get_data_sync_manager(const std::string& source_zone)
+{
+ Mutex::Locker l(data_sync_thread_lock);
+ auto thread = data_sync_processor_threads.find(source_zone);
+ if (thread == data_sync_processor_threads.end()) {
+ return nullptr;
+ }
+ return thread->second->get_manager();
+}
+
+int RGWRados::get_required_alignment(const rgw_pool& pool, uint64_t *alignment)
+{
+ IoCtx ioctx;
+ int r = open_pool_ctx(pool, ioctx, false);
+ if (r < 0) {
+ ldout(cct, 0) << "ERROR: open_pool_ctx() returned " << r << dendl;
+ return r;
+ }
+
+ bool requires;
+ r = ioctx.pool_requires_alignment2(&requires);
+ if (r < 0) {
+ ldout(cct, 0) << "ERROR: ioctx.pool_requires_alignment2() returned "
+ << r << dendl;
+ return r;
+ }
+
+ if (!requires) {
+ *alignment = 0;
+ return 0;
+ }
+
+ uint64_t align;
+ r = ioctx.pool_required_alignment2(&align);
+ if (r < 0) {
+ ldout(cct, 0) << "ERROR: ioctx.pool_required_alignment2() returned "
+ << r << dendl;
+ return r;
+ }
+ if (align != 0) {
+ ldout(cct, 20) << "required alignment=" << align << dendl;
+ }
+ *alignment = align;
+ return 0;
+}
+
+void RGWRados::get_max_aligned_size(uint64_t size, uint64_t alignment, uint64_t *max_size)
+{
+ if (alignment == 0) {
+ *max_size = size;
+ return;
+ }
+
+ if (size <= alignment) {
+ *max_size = alignment;
+ return;
+ }
+
+ *max_size = size - (size % alignment);
+}
+
+int RGWRados::get_max_chunk_size(const rgw_pool& pool, uint64_t *max_chunk_size, uint64_t *palignment)
+{
+ uint64_t alignment;
+ int r = get_required_alignment(pool, &alignment);
+ if (r < 0) {
+ return r;
+ }
+
+ if (palignment) {
+ *palignment = alignment;
+ }
+
+ uint64_t config_chunk_size = cct->_conf->rgw_max_chunk_size;
+
+ get_max_aligned_size(config_chunk_size, alignment, max_chunk_size);
+
+ ldout(cct, 20) << "max_chunk_size=" << *max_chunk_size << dendl;
+
+ return 0;
+}
+
+int RGWRados::get_max_chunk_size(const rgw_placement_rule& placement_rule, const rgw_obj& obj,
+ uint64_t *max_chunk_size, uint64_t *palignment)
+{
+ rgw_pool pool;
+ if (!get_obj_data_pool(placement_rule, obj, &pool)) {
+ ldout(cct, 0) << "ERROR: failed to get data pool for object " << obj << dendl;
+ return -EIO;
+ }
+ return get_max_chunk_size(pool, max_chunk_size, palignment);
+}
+
+class RGWIndexCompletionManager;
+
+struct complete_op_data {
+ Mutex lock{"complete_op_data"};
+ AioCompletion *rados_completion{nullptr};
+ int manager_shard_id{-1};
+ RGWIndexCompletionManager *manager{nullptr};
+ rgw_obj obj;
+ RGWModifyOp op;
+ string tag;
+ rgw_bucket_entry_ver ver;
+ cls_rgw_obj_key key;
+ rgw_bucket_dir_entry_meta dir_meta;
+ list<cls_rgw_obj_key> remove_objs;
+ bool log_op;
+ uint16_t bilog_op;
+ rgw_zone_set zones_trace;
+
+ bool stopped{false};
+
+ void stop() {
+ Mutex::Locker l(lock);
+ stopped = true;
+ }
+};
+
+class RGWIndexCompletionThread : public RGWRadosThread {
+ RGWRados *store;
+
+ uint64_t interval_msec() override {
+ return 0;
+ }
+
+ list<complete_op_data *> completions;
+
+ Mutex completions_lock;
+public:
+ RGWIndexCompletionThread(RGWRados *_store)
+ : RGWRadosThread(_store, "index-complete"), store(_store), completions_lock("RGWIndexCompletionThread::completions_lock") {}
+
+ int process() override;
+
+ void add_completion(complete_op_data *completion) {
+ {
+ Mutex::Locker l(completions_lock);
+ completions.push_back(completion);
+ }
+
+ signal();
+ }
+};
+
+int RGWIndexCompletionThread::process()
+{
+ list<complete_op_data *> comps;
+
+ {
+ Mutex::Locker l(completions_lock);
+ completions.swap(comps);
+ }
+
+ for (auto c : comps) {
+ std::unique_ptr<complete_op_data> up{c};
+
+ if (going_down()) {
+ continue;
+ }
+ ldout(store->ctx(), 20) << __func__ << "(): handling completion for key=" << c->key << dendl;
+
+ RGWRados::BucketShard bs(store);
+ RGWBucketInfo bucket_info;
+
+ int r = bs.init(c->obj.bucket, c->obj, &bucket_info);
+ if (r < 0) {
+ ldout(cct, 0) << "ERROR: " << __func__ << "(): failed to initialize BucketShard, obj=" << c->obj << " r=" << r << dendl;
+ /* not much to do */
+ continue;
+ }
+
+ r = store->guard_reshard(&bs, c->obj, bucket_info,
+ [&](RGWRados::BucketShard *bs) -> int {
+ librados::ObjectWriteOperation o;
+ cls_rgw_guard_bucket_resharding(o, -ERR_BUSY_RESHARDING);
+ cls_rgw_bucket_complete_op(o, c->op, c->tag, c->ver, c->key, c->dir_meta, &c->remove_objs,
+ c->log_op, c->bilog_op, &c->zones_trace);
+ return bs->index_ctx.operate(bs->bucket_obj, &o);
+ });
+ if (r < 0) {
+ ldout(cct, 0) << "ERROR: " << __func__ << "(): bucket index completion failed, obj=" << c->obj << " r=" << r << dendl;
+ /* ignoring error, can't do anything about it */
+ continue;
+ }
+ r = store->data_log->add_entry(bs.bucket, bs.shard_id);
+ if (r < 0) {
+ lderr(store->ctx()) << "ERROR: failed writing data log" << dendl;
+ }
+ }
+
+ return 0;
+}
+
+class RGWIndexCompletionManager {
+ RGWRados *store{nullptr};
+ vector<Mutex *> locks;
+ vector<set<complete_op_data *> > completions;
+
+ RGWIndexCompletionThread *completion_thread{nullptr};
+
+ int num_shards;
+
+ std::atomic<int> cur_shard {0};
+
+
+public:
+ RGWIndexCompletionManager(RGWRados *_store) : store(_store) {
+ num_shards = store->ctx()->_conf->rgw_thread_pool_size;
+
+ for (int i = 0; i < num_shards; i++) {
+ char buf[64];
+ snprintf(buf, sizeof(buf), "RGWIndexCompletionManager::lock::%d", i);
+ locks.push_back(new Mutex(buf));
+ }
+
+ completions.resize(num_shards);
+ }
+ ~RGWIndexCompletionManager() {
+ stop();
+
+ for (auto l : locks) {
+ delete l;
+ }
+ }
+
+ int next_shard() {
+ int result = cur_shard % num_shards;
+ cur_shard++;
+ return result;
+ }
+
+ void create_completion(const rgw_obj& obj,
+ RGWModifyOp op, string& tag,
+ rgw_bucket_entry_ver& ver,
+ const cls_rgw_obj_key& key,
+ rgw_bucket_dir_entry_meta& dir_meta,
+ list<cls_rgw_obj_key> *remove_objs, bool log_op,
+ uint16_t bilog_op,
+ rgw_zone_set *zones_trace,
+ complete_op_data **result);
+ bool handle_completion(completion_t cb, complete_op_data *arg);
+
+ int start() {
+ completion_thread = new RGWIndexCompletionThread(store);
+ int ret = completion_thread->init();
+ if (ret < 0) {
+ return ret;
+ }
+ completion_thread->start();
+ return 0;
+ }
+ void stop() {
+ if (completion_thread) {
+ completion_thread->stop();
+ delete completion_thread;
+ }
+
+ for (int i = 0; i < num_shards; ++i) {
+ Mutex::Locker l(*locks[i]);
+ for (auto c : completions[i]) {
+ c->stop();
+ }
+ }
+ completions.clear();
+ }
+};
+
+static void obj_complete_cb(completion_t cb, void *arg)
+{
+ complete_op_data *completion = (complete_op_data *)arg;
+ completion->lock.Lock();
+ if (completion->stopped) {
+ completion->lock.Unlock(); /* can drop lock, no one else is referencing us */
+ delete completion;
+ return;
+ }
+ bool need_delete = completion->manager->handle_completion(cb, completion);
+ completion->lock.Unlock();
+ if (need_delete) {
+ delete completion;
+ }
+}
+
+
+void RGWIndexCompletionManager::create_completion(const rgw_obj& obj,
+ RGWModifyOp op, string& tag,
+ rgw_bucket_entry_ver& ver,
+ const cls_rgw_obj_key& key,
+ rgw_bucket_dir_entry_meta& dir_meta,
+ list<cls_rgw_obj_key> *remove_objs, bool log_op,
+ uint16_t bilog_op,
+ rgw_zone_set *zones_trace,
+ complete_op_data **result)
+{
+ complete_op_data *entry = new complete_op_data;
+
+ int shard_id = next_shard();
+
+ entry->manager_shard_id = shard_id;
+ entry->manager = this;
+ entry->obj = obj;
+ entry->op = op;
+ entry->tag = tag;
+ entry->ver = ver;
+ entry->key = key;
+ entry->dir_meta = dir_meta;
+ entry->log_op = log_op;
+ entry->bilog_op = bilog_op;
+
+ if (remove_objs) {
+ for (auto iter = remove_objs->begin(); iter != remove_objs->end(); ++iter) {
+ entry->remove_objs.push_back(*iter);
+ }
+ }
+
+ if (zones_trace) {
+ entry->zones_trace = *zones_trace;
+ } else {
+ entry->zones_trace.insert(store->svc.zone->get_zone().id);
+ }
+
+ *result = entry;
+
+ entry->rados_completion = librados::Rados::aio_create_completion(entry, NULL, obj_complete_cb);
+
+ Mutex::Locker l(*locks[shard_id]);
+ completions[shard_id].insert(entry);
+}
+
+bool RGWIndexCompletionManager::handle_completion(completion_t cb, complete_op_data *arg)
+{
+ int shard_id = arg->manager_shard_id;
+ {
+ Mutex::Locker l(*locks[shard_id]);
+
+ auto& comps = completions[shard_id];
+
+ auto iter = comps.find(arg);
+ if (iter == comps.end()) {
+ return true;
+ }
+
+ comps.erase(iter);
+ }
+
+ int r = rados_aio_get_return_value(cb);
+ if (r != -ERR_BUSY_RESHARDING) {
+ return true;
+ }
+ completion_thread->add_completion(arg);
+ return false;
+}
+
+void RGWRados::finalize()
+{
+ cct->get_admin_socket()->unregister_commands(this);
+ if (run_sync_thread) {
+ Mutex::Locker l(meta_sync_thread_lock);
+ meta_sync_processor_thread->stop();
+
+ Mutex::Locker dl(data_sync_thread_lock);
+ for (auto iter : data_sync_processor_threads) {
+ RGWDataSyncProcessorThread *thread = iter.second;
+ thread->stop();
+ }
+ if (sync_log_trimmer) {
+ sync_log_trimmer->stop();
+ }
+ }
+ if (async_rados) {
+ async_rados->stop();
+ }
+ if (run_sync_thread) {
+ delete meta_sync_processor_thread;
+ meta_sync_processor_thread = NULL;
+ Mutex::Locker dl(data_sync_thread_lock);
+ for (auto iter : data_sync_processor_threads) {
+ RGWDataSyncProcessorThread *thread = iter.second;
+ delete thread;
+ }
+ data_sync_processor_threads.clear();
+ delete sync_log_trimmer;
+ sync_log_trimmer = nullptr;
+ bucket_trim = boost::none;
+ }
+ if (meta_notifier) {
+ meta_notifier->stop();
+ delete meta_notifier;
+ }
+ if (data_notifier) {
+ data_notifier->stop();
+ delete data_notifier;
+ }
+ delete data_log;
+ delete sync_tracer;
+ if (async_rados) {
+ delete async_rados;
+ }
+
+ delete lc;
+ lc = NULL;
+
+ delete gc;
+ gc = NULL;
+
+ delete obj_expirer;
+ obj_expirer = NULL;
+
+ RGWQuotaHandler::free_handler(quota_handler);
+ if (cr_registry) {
+ cr_registry->put();
+ }
+
+ svc.shutdown();
+
+ delete meta_mgr;
+ delete binfo_cache;
+ delete obj_tombstone_cache;
+
+ if (reshard_wait.get()) {
+ reshard_wait->stop();
+ reshard_wait.reset();
+ }
+
+ if (run_reshard_thread) {
+ reshard->stop_processor();
+ }
+ delete reshard;
+ delete index_completion_manager;
+}
+
+/**
+ * Initialize the RADOS instance and prepare to do other ops
+ * Returns 0 on success, -ERR# on failure.
+ */
+int RGWRados::init_rados()
+{
+ int ret = 0;
+ auto admin_socket = cct->get_admin_socket();
+ for (auto cmd : admin_commands) {
+ int r = admin_socket->register_command(cmd[0], cmd[1], this,
+ cmd[2]);
+ if (r < 0) {
+ lderr(cct) << "ERROR: fail to register admin socket command (r=" << r
+ << ")" << dendl;
+ return r;
+ }
+ }
+
+ ret = rados.init_with_context(cct);
+ if (ret < 0) {
+ return ret;
+ }
+ ret = rados.connect();
+ if (ret < 0) {
+ return ret;
+ }
+
+ auto crs = std::unique_ptr<RGWCoroutinesManagerRegistry>{
+ new RGWCoroutinesManagerRegistry(cct)};
+ ret = crs->hook_to_admin_command("cr dump");
+ if (ret < 0) {
+ return ret;
+ }
+
+ meta_mgr = new RGWMetadataManager(cct, this);
+ data_log = new RGWDataChangesLog(cct, this);
+ cr_registry = crs.release();
+ return ret;
+}
+
+int RGWRados::register_to_service_map(const string& daemon_type, const map<string, string>& meta)
+{
+ map<string,string> metadata = meta;
+ metadata["num_handles"] = "1"s;
+ metadata["zonegroup_id"] = svc.zone->get_zonegroup().get_id();
+ metadata["zonegroup_name"] = svc.zone->get_zonegroup().get_name();
+ metadata["zone_name"] = svc.zone->zone_name();
+ metadata["zone_id"] = svc.zone->zone_id();
+ string name = cct->_conf->name.get_id();
+ if (name.compare(0, 4, "rgw.") == 0) {
+ name = name.substr(4);
+ }
+ int ret = rados.service_daemon_register(daemon_type, name, metadata);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: service_daemon_register() returned ret=" << ret << ": " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWRados::update_service_map(std::map<std::string, std::string>&& status)
+{
+ int ret = rados.service_daemon_update_status(move(status));
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: service_daemon_update_status() returned ret=" << ret << ": " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * Initialize the RADOS instance and prepare to do other ops
+ * Returns 0 on success, -ERR# on failure.
+ */
+int RGWRados::init_complete()
+{
+ int ret;
+
+ /*
+ * create sync module instance even if we don't run sync thread, might need it for radosgw-admin
+ */
+ auto& zone_public_config = svc.zone->get_zone();
+ ret = svc.sync_modules->get_manager()->create_instance(cct, zone_public_config.tier_type, svc.zone->get_zone_params().tier_config, &sync_module);
+ if (ret < 0) {
+ lderr(cct) << "ERROR: failed to init sync module instance, ret=" << ret << dendl;
+ if (ret == -ENOENT) {
+ lderr(cct) << "ERROR: " << zone_public_config.tier_type
+ << " sync module does not exist. valid sync modules: "
+ << svc.sync_modules->get_manager()->get_registered_module_names()
+ << dendl;
+ }
+ return ret;
+ }
+
+ period_puller.reset(new RGWPeriodPuller(this));
+ period_history.reset(new RGWPeriodHistory(cct, period_puller.get(),
+ svc.zone->get_current_period()));
+
+ ret = open_root_pool_ctx();
+ if (ret < 0)
+ return ret;
+
+ ret = open_gc_pool_ctx();
+ if (ret < 0)
+ return ret;
+
+ ret = open_lc_pool_ctx();
+ if (ret < 0)
+ return ret;
+
+ ret = open_objexp_pool_ctx();
+ if (ret < 0)
+ return ret;
+
+ ret = open_reshard_pool_ctx();
+ if (ret < 0)
+ return ret;
+
+ pools_initialized = true;
+
+ gc = new RGWGC();
+ gc->initialize(cct, this);
+
+ obj_expirer = new RGWObjectExpirer(this);
+
+ if (use_gc_thread) {
+ gc->start_processor();
+ obj_expirer->start_processor();
+ }
+
+ auto& current_period = svc.zone->get_current_period();
+ auto& zonegroup = svc.zone->get_zonegroup();
+ auto& zone_params = svc.zone->get_zone_params();
+ auto& zone = svc.zone->get_zone();
+
+ /* no point of running sync thread if we don't have a master zone configured
+ or there is no rest_master_conn */
+ if (zonegroup.master_zone.empty() || !svc.zone->get_master_conn()
+ || current_period.get_id().empty()) {
+ run_sync_thread = false;
+ }
+
+ if (run_sync_thread) {
+ // initialize the log period history
+ meta_mgr->init_oldest_log_period();
+ }
+
+ async_rados = new RGWAsyncRadosProcessor(this, cct->_conf->rgw_num_async_rados_threads);
+ async_rados->start();
+
+ ret = meta_mgr->init(current_period.get_id());
+ if (ret < 0) {
+ lderr(cct) << "ERROR: failed to initialize metadata log: "
+ << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ if (svc.zone->is_meta_master()) {
+ auto md_log = meta_mgr->get_log(current_period.get_id());
+ meta_notifier = new RGWMetaNotifier(this, md_log);
+ meta_notifier->start();
+ }
+
+ /* init it anyway, might run sync through radosgw-admin explicitly */
+ sync_tracer = new RGWSyncTraceManager(cct, cct->_conf->rgw_sync_trace_history_size);
+ sync_tracer->init(this);
+ ret = sync_tracer->hook_to_admin_command();
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (run_sync_thread) {
+ for (const auto &pt: zonegroup.placement_targets) {
+ if (zone_params.placement_pools.find(pt.second.name)
+ == zone_params.placement_pools.end()){
+ ldout(cct, 0) << "WARNING: This zone does not contain the placement target "
+ << pt.second.name << " present in zonegroup" << dendl;
+ }
+ }
+ Mutex::Locker l(meta_sync_thread_lock);
+ meta_sync_processor_thread = new RGWMetaSyncProcessorThread(this, async_rados);
+ ret = meta_sync_processor_thread->init();
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: failed to initialize meta sync thread" << dendl;
+ return ret;
+ }
+ meta_sync_processor_thread->start();
+
+ // configure the bucket trim manager
+ rgw::BucketTrimConfig config;
+ rgw::configure_bucket_trim(cct, config);
+
+ bucket_trim.emplace(this, config);
+ ret = bucket_trim->init();
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: failed to start bucket trim manager" << dendl;
+ return ret;
+ }
+ data_log->set_observer(&*bucket_trim);
+
+ Mutex::Locker dl(data_sync_thread_lock);
+ for (auto source_zone : svc.zone->get_data_sync_source_zones()) {
+ ldout(cct, 5) << "starting data sync thread for zone " << source_zone->name << dendl;
+ auto *thread = new RGWDataSyncProcessorThread(this, async_rados, source_zone);
+ ret = thread->init();
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: failed to initialize data sync thread" << dendl;
+ return ret;
+ }
+ thread->start();
+ data_sync_processor_threads[source_zone->id] = thread;
+ }
+ auto interval = cct->_conf->rgw_sync_log_trim_interval;
+ if (interval > 0) {
+ sync_log_trimmer = new RGWSyncLogTrimThread(this, &*bucket_trim, interval);
+ ret = sync_log_trimmer->init();
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: failed to initialize sync log trim thread" << dendl;
+ return ret;
+ }
+ sync_log_trimmer->start();
+ }
+ }
+ data_notifier = new RGWDataNotifier(this);
+ data_notifier->start();
+
+ binfo_cache = new RGWChainedCacheImpl<bucket_info_entry>;
+ binfo_cache->init(svc.cache);
+
+ lc = new RGWLC();
+ lc->initialize(cct, this);
+
+ if (use_lc_thread)
+ lc->start_processor();
+
+ quota_handler = RGWQuotaHandler::generate_handler(this, quota_threads);
+
+ bucket_index_max_shards = (cct->_conf->rgw_override_bucket_index_max_shards ? cct->_conf->rgw_override_bucket_index_max_shards :
+ zone.bucket_index_max_shards);
+ if (bucket_index_max_shards > get_max_bucket_shards()) {
+ bucket_index_max_shards = get_max_bucket_shards();
+ ldout(cct, 1) << __func__ << " bucket index max shards is too large, reset to value: "
+ << get_max_bucket_shards() << dendl;
+ }
+ ldout(cct, 20) << __func__ << " bucket index max shards: " << bucket_index_max_shards << dendl;
+
+ bool need_tombstone_cache = !svc.zone->get_zone_data_notify_to_map().empty(); /* have zones syncing from us */
+
+ if (need_tombstone_cache) {
+ obj_tombstone_cache = new tombstone_cache_t(cct->_conf->rgw_obj_tombstone_cache_size);
+ }
+
+ reshard_wait = std::make_shared<RGWReshardWait>();
+
+ reshard = new RGWReshard(this);
+
+ /* only the master zone in the zonegroup reshards buckets */
+ run_reshard_thread = run_reshard_thread && (zonegroup.master_zone == zone.id);
+ if (run_reshard_thread) {
+ reshard->start_processor();
+ }
+
+ index_completion_manager = new RGWIndexCompletionManager(this);
+ ret = index_completion_manager->start();
+
+ return ret;
+}
+
+int RGWRados::init_svc(bool raw)
+{
+ if (raw) {
+ return svc.init_raw(cct, use_cache);
+ }
+
+ return svc.init(cct, use_cache);
+}
+
+/**
+ * Initialize the RADOS instance and prepare to do other ops
+ * Returns 0 on success, -ERR# on failure.
+ */
+int RGWRados::initialize()
+{
+ int ret;
+
+ inject_notify_timeout_probability =
+ cct->_conf.get_val<double>("rgw_inject_notify_timeout_probability");
+ max_notify_retries = cct->_conf.get_val<uint64_t>("rgw_max_notify_retries");
+
+ ret = init_svc(false);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: failed to init services (ret=" << cpp_strerror(-ret) << ")" << dendl;
+ return ret;
+ }
+
+ host_id = svc.zone_utils->gen_host_id();
+
+ ret = init_rados();
+ if (ret < 0)
+ return ret;
+
+ return init_complete();
+}
+
+/**
+ * Open the pool used as root for this gateway
+ * Returns: 0 on success, -ERR# otherwise.
+ */
+int RGWRados::open_root_pool_ctx()
+{
+ return rgw_init_ioctx(get_rados_handle(), svc.zone->get_zone_params().domain_root, root_pool_ctx, true, true);
+}
+
+int RGWRados::open_gc_pool_ctx()
+{
+ return rgw_init_ioctx(get_rados_handle(), svc.zone->get_zone_params().gc_pool, gc_pool_ctx, true, true);
+}
+
+int RGWRados::open_lc_pool_ctx()
+{
+ return rgw_init_ioctx(get_rados_handle(), svc.zone->get_zone_params().lc_pool, lc_pool_ctx, true, true);
+}
+
+int RGWRados::open_objexp_pool_ctx()
+{
+ return rgw_init_ioctx(get_rados_handle(), svc.zone->get_zone_params().log_pool, objexp_pool_ctx, true, true);
+}
+
+int RGWRados::open_reshard_pool_ctx()
+{
+ return rgw_init_ioctx(get_rados_handle(), svc.zone->get_zone_params().reshard_pool, reshard_pool_ctx, true, true);
+}
+
+int RGWRados::open_pool_ctx(const rgw_pool& pool, librados::IoCtx& io_ctx,
+ bool mostly_omap)
+{
+ constexpr bool create = true; // create the pool if it doesn't exist
+ return rgw_init_ioctx(get_rados_handle(), pool, io_ctx, create, mostly_omap);
+}
+
+void RGWRados::build_bucket_index_marker(const string& shard_id_str, const string& shard_marker,
+ string *marker) {
+ if (marker) {
+ *marker = shard_id_str;
+ marker->append(BucketIndexShardsManager::KEY_VALUE_SEPARATOR);
+ marker->append(shard_marker);
+ }
+}
+
+int RGWRados::open_bucket_index_ctx(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx)
+{
+ const rgw_pool& explicit_pool = bucket_info.bucket.explicit_placement.index_pool;
+
+ if (!explicit_pool.empty()) {
+ return open_pool_ctx(explicit_pool, index_ctx, false);
+ }
+
+ auto& zonegroup = svc.zone->get_zonegroup();
+ auto& zone_params = svc.zone->get_zone_params();
+
+ const rgw_placement_rule *rule = &bucket_info.placement_rule;
+ if (rule->empty()) {
+ rule = &zonegroup.default_placement;
+ }
+ auto iter = zone_params.placement_pools.find(rule->name);
+ if (iter == zone_params.placement_pools.end()) {
+ ldout(cct, 0) << "could not find placement rule " << *rule << " within zonegroup " << dendl;
+ return -EINVAL;
+ }
+
+ int r = open_pool_ctx(iter->second.index_pool, index_ctx, true);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+/**** logs ****/
+
+struct log_list_state {
+ string prefix;
+ librados::IoCtx io_ctx;
+ librados::NObjectIterator obit;
+};
+
+int RGWRados::log_list_init(const string& prefix, RGWAccessHandle *handle)
+{
+ log_list_state *state = new log_list_state;
+ int r = rgw_init_ioctx(get_rados_handle(), svc.zone->get_zone_params().log_pool, state->io_ctx);
+ if (r < 0) {
+ delete state;
+ return r;
+ }
+ state->prefix = prefix;
+ state->obit = state->io_ctx.nobjects_begin();
+ *handle = (RGWAccessHandle)state;
+ return 0;
+}
+
+int RGWRados::log_list_next(RGWAccessHandle handle, string *name)
+{
+ log_list_state *state = static_cast<log_list_state *>(handle);
+ while (true) {
+ if (state->obit == state->io_ctx.nobjects_end()) {
+ delete state;
+ return -ENOENT;
+ }
+ if (state->prefix.length() &&
+ state->obit->get_oid().find(state->prefix) != 0) {
+ state->obit++;
+ continue;
+ }
+ *name = state->obit->get_oid();
+ state->obit++;
+ break;
+ }
+ return 0;
+}
+
+int RGWRados::log_remove(const string& name)
+{
+ librados::IoCtx io_ctx;
+ int r = rgw_init_ioctx(get_rados_handle(), svc.zone->get_zone_params().log_pool, io_ctx);
+ if (r < 0)
+ return r;
+ return io_ctx.remove(name);
+}
+
+struct log_show_state {
+ librados::IoCtx io_ctx;
+ bufferlist bl;
+ bufferlist::const_iterator p;
+ string name;
+ uint64_t pos;
+ bool eof;
+ log_show_state() : pos(0), eof(false) {}
+};
+
+int RGWRados::log_show_init(const string& name, RGWAccessHandle *handle)
+{
+ log_show_state *state = new log_show_state;
+ int r = rgw_init_ioctx(get_rados_handle(), svc.zone->get_zone_params().log_pool, state->io_ctx);
+ if (r < 0) {
+ delete state;
+ return r;
+ }
+ state->name = name;
+ *handle = (RGWAccessHandle)state;
+ return 0;
+}
+
+int RGWRados::log_show_next(RGWAccessHandle handle, rgw_log_entry *entry)
+{
+ log_show_state *state = static_cast<log_show_state *>(handle);
+ off_t off = state->p.get_off();
+
+ ldout(cct, 10) << "log_show_next pos " << state->pos << " bl " << state->bl.length()
+ << " off " << off
+ << " eof " << (int)state->eof
+ << dendl;
+ // read some?
+ unsigned chunk = 1024*1024;
+ if ((state->bl.length() - off) < chunk/2 && !state->eof) {
+ bufferlist more;
+ int r = state->io_ctx.read(state->name, more, chunk, state->pos);
+ if (r < 0)
+ return r;
+ state->pos += r;
+ bufferlist old;
+ try {
+ old.substr_of(state->bl, off, state->bl.length() - off);
+ } catch (buffer::error& err) {
+ return -EINVAL;
+ }
+ state->bl.clear();
+ state->bl.claim(old);
+ state->bl.claim_append(more);
+ state->p = state->bl.cbegin();
+ if ((unsigned)r < chunk)
+ state->eof = true;
+ ldout(cct, 10) << " read " << r << dendl;
+ }
+
+ if (state->p.end())
+ return 0; // end of file
+ try {
+ decode(*entry, state->p);
+ }
+ catch (const buffer::error &e) {
+ return -EINVAL;
+ }
+ return 1;
+}
+
+/**
+ * usage_log_hash: get usage log key hash, based on name and index
+ *
+ * Get the usage object name. Since a user may have more than 1
+ * object holding that info (multiple shards), we use index to
+ * specify that shard number. Once index exceeds max shards it
+ * wraps.
+ * If name is not being set, results for all users will be returned
+ * and index will wrap only after total shards number.
+ *
+ * @param cct [in] ceph context
+ * @param name [in] user name
+ * @param hash [out] hash value
+ * @param index [in] shard index number
+ */
+static void usage_log_hash(CephContext *cct, const string& name, string& hash, uint32_t index)
+{
+ uint32_t val = index;
+
+ if (!name.empty()) {
+ int max_user_shards = cct->_conf->rgw_usage_max_user_shards;
+ val %= max_user_shards;
+ val += ceph_str_hash_linux(name.c_str(), name.size());
+ }
+ char buf[17];
+ int max_shards = cct->_conf->rgw_usage_max_shards;
+ snprintf(buf, sizeof(buf), RGW_USAGE_OBJ_PREFIX "%u", (unsigned)(val % max_shards));
+ hash = buf;
+}
+
+int RGWRados::log_usage(map<rgw_user_bucket, RGWUsageBatch>& usage_info)
+{
+ uint32_t index = 0;
+
+ map<string, rgw_usage_log_info> log_objs;
+
+ string hash;
+ string last_user;
+
+ /* restructure usage map, zone by object hash */
+ map<rgw_user_bucket, RGWUsageBatch>::iterator iter;
+ for (iter = usage_info.begin(); iter != usage_info.end(); ++iter) {
+ const rgw_user_bucket& ub = iter->first;
+ RGWUsageBatch& info = iter->second;
+
+ if (ub.user.empty()) {
+ ldout(cct, 0) << "WARNING: RGWRados::log_usage(): user name empty (bucket=" << ub.bucket << "), skipping" << dendl;
+ continue;
+ }
+
+ if (ub.user != last_user) {
+ /* index *should* be random, but why waste extra cycles
+ in most cases max user shards is not going to exceed 1,
+ so just incrementing it */
+ usage_log_hash(cct, ub.user, hash, index++);
+ }
+ last_user = ub.user;
+ vector<rgw_usage_log_entry>& v = log_objs[hash].entries;
+
+ for (auto miter = info.m.begin(); miter != info.m.end(); ++miter) {
+ v.push_back(miter->second);
+ }
+ }
+
+ map<string, rgw_usage_log_info>::iterator liter;
+
+ for (liter = log_objs.begin(); liter != log_objs.end(); ++liter) {
+ int r = cls_obj_usage_log_add(liter->first, liter->second);
+ if (r < 0)
+ return r;
+ }
+ return 0;
+}
+
+int RGWRados::read_usage(const rgw_user& user, const string& bucket_name, uint64_t start_epoch, uint64_t end_epoch,
+ uint32_t max_entries, bool *is_truncated, RGWUsageIter& usage_iter, map<rgw_user_bucket,
+ rgw_usage_log_entry>& usage)
+{
+ uint32_t num = max_entries;
+ string hash, first_hash;
+ string user_str = user.to_str();
+ usage_log_hash(cct, user_str, first_hash, 0);
+
+ if (usage_iter.index) {
+ usage_log_hash(cct, user_str, hash, usage_iter.index);
+ } else {
+ hash = first_hash;
+ }
+
+ usage.clear();
+
+ do {
+ map<rgw_user_bucket, rgw_usage_log_entry> ret_usage;
+ map<rgw_user_bucket, rgw_usage_log_entry>::iterator iter;
+
+ int ret = cls_obj_usage_log_read(hash, user_str, bucket_name, start_epoch, end_epoch, num,
+ usage_iter.read_iter, ret_usage, is_truncated);
+ if (ret == -ENOENT)
+ goto next;
+
+ if (ret < 0)
+ return ret;
+
+ num -= ret_usage.size();
+
+ for (iter = ret_usage.begin(); iter != ret_usage.end(); ++iter) {
+ usage[iter->first].aggregate(iter->second);
+ }
+
+next:
+ if (!*is_truncated) {
+ usage_iter.read_iter.clear();
+ usage_log_hash(cct, user_str, hash, ++usage_iter.index);
+ }
+ } while (num && !*is_truncated && hash != first_hash);
+ return 0;
+}
+
+int RGWRados::trim_usage(const rgw_user& user, const string& bucket_name, uint64_t start_epoch, uint64_t end_epoch)
+{
+ uint32_t index = 0;
+ string hash, first_hash;
+ string user_str = user.to_str();
+ usage_log_hash(cct, user_str, first_hash, index);
+
+ hash = first_hash;
+ do {
+ int ret = cls_obj_usage_log_trim(hash, user_str, bucket_name, start_epoch, end_epoch);
+
+ if (ret < 0 && ret != -ENOENT)
+ return ret;
+
+ usage_log_hash(cct, user_str, hash, ++index);
+ } while (hash != first_hash);
+
+ return 0;
+}
+
+
+int RGWRados::clear_usage()
+{
+ auto max_shards = cct->_conf->rgw_usage_max_shards;
+ int ret=0;
+ for (unsigned i=0; i < max_shards; i++){
+ string oid = RGW_USAGE_OBJ_PREFIX + to_string(i);
+ ret = cls_obj_usage_log_clear(oid);
+ if (ret < 0){
+ ldout(cct,0) << "usage clear on oid="<< oid << "failed with ret=" << ret << dendl;
+ return ret;
+ }
+ }
+ return ret;
+}
+
+int RGWRados::key_to_shard_id(const string& key, int max_shards)
+{
+ return rgw_shard_id(key, max_shards);
+}
+
+void RGWRados::shard_name(const string& prefix, unsigned max_shards, const string& key, string& name, int *shard_id)
+{
+ uint32_t val = ceph_str_hash_linux(key.c_str(), key.size());
+ char buf[16];
+ if (shard_id) {
+ *shard_id = val % max_shards;
+ }
+ snprintf(buf, sizeof(buf), "%u", (unsigned)(val % max_shards));
+ name = prefix + buf;
+}
+
+void RGWRados::shard_name(const string& prefix, unsigned max_shards, const string& section, const string& key, string& name)
+{
+ uint32_t val = ceph_str_hash_linux(key.c_str(), key.size());
+ val ^= ceph_str_hash_linux(section.c_str(), section.size());
+ char buf[16];
+ snprintf(buf, sizeof(buf), "%u", (unsigned)(val % max_shards));
+ name = prefix + buf;
+}
+
+void RGWRados::shard_name(const string& prefix, unsigned shard_id, string& name)
+{
+ char buf[16];
+ snprintf(buf, sizeof(buf), "%u", shard_id);
+ name = prefix + buf;
+
+}
+
+void RGWRados::time_log_prepare_entry(cls_log_entry& entry, const real_time& ut, const string& section, const string& key, bufferlist& bl)
+{
+ cls_log_add_prepare_entry(entry, utime_t(ut), section, key, bl);
+}
+
+int RGWRados::time_log_add_init(librados::IoCtx& io_ctx)
+{
+ return rgw_init_ioctx(get_rados_handle(), svc.zone->get_zone_params().log_pool, io_ctx, true);
+
+}
+
+int RGWRados::time_log_add(const string& oid, const real_time& ut, const string& section, const string& key, bufferlist& bl)
+{
+ librados::IoCtx io_ctx;
+
+ int r = time_log_add_init(io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ ObjectWriteOperation op;
+ utime_t t(ut);
+ cls_log_add(op, t, section, key, bl);
+
+ return io_ctx.operate(oid, &op);
+}
+
+int RGWRados::time_log_add(const string& oid, list<cls_log_entry>& entries,
+ librados::AioCompletion *completion, bool monotonic_inc)
+{
+ librados::IoCtx io_ctx;
+
+ int r = time_log_add_init(io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ ObjectWriteOperation op;
+ cls_log_add(op, entries, monotonic_inc);
+
+ if (!completion) {
+ r = io_ctx.operate(oid, &op);
+ } else {
+ r = io_ctx.aio_operate(oid, completion, &op);
+ }
+ return r;
+}
+
+int RGWRados::time_log_list(const string& oid, const real_time& start_time, const real_time& end_time,
+ int max_entries, list<cls_log_entry>& entries,
+ const string& marker,
+ string *out_marker,
+ bool *truncated)
+{
+ librados::IoCtx io_ctx;
+
+ int r = rgw_init_ioctx(get_rados_handle(), svc.zone->get_zone_params().log_pool, io_ctx);
+ if (r < 0)
+ return r;
+ librados::ObjectReadOperation op;
+
+ utime_t st(start_time);
+ utime_t et(end_time);
+
+ cls_log_list(op, st, et, marker, max_entries, entries,
+ out_marker, truncated);
+
+ bufferlist obl;
+
+ int ret = io_ctx.operate(oid, &op, &obl);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+int RGWRados::time_log_info(const string& oid, cls_log_header *header)
+{
+ librados::IoCtx io_ctx;
+
+ int r = rgw_init_ioctx(get_rados_handle(), svc.zone->get_zone_params().log_pool, io_ctx);
+ if (r < 0)
+ return r;
+ librados::ObjectReadOperation op;
+
+ cls_log_info(op, header);
+
+ bufferlist obl;
+
+ int ret = io_ctx.operate(oid, &op, &obl);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+int RGWRados::time_log_info_async(librados::IoCtx& io_ctx, const string& oid, cls_log_header *header, librados::AioCompletion *completion)
+{
+ int r = rgw_init_ioctx(get_rados_handle(), svc.zone->get_zone_params().log_pool, io_ctx);
+ if (r < 0)
+ return r;
+
+ librados::ObjectReadOperation op;
+
+ cls_log_info(op, header);
+
+ int ret = io_ctx.aio_operate(oid, completion, &op, NULL);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+int RGWRados::time_log_trim(const string& oid, const real_time& start_time, const real_time& end_time,
+ const string& from_marker, const string& to_marker,
+ librados::AioCompletion *completion)
+{
+ librados::IoCtx io_ctx;
+
+ int r = rgw_init_ioctx(get_rados_handle(), svc.zone->get_zone_params().log_pool, io_ctx);
+ if (r < 0)
+ return r;
+
+ utime_t st(start_time);
+ utime_t et(end_time);
+
+ ObjectWriteOperation op;
+ cls_log_trim(op, st, et, from_marker, to_marker);
+
+ if (!completion) {
+ r = io_ctx.operate(oid, &op);
+ } else {
+ r = io_ctx.aio_operate(oid, completion, &op);
+ }
+ return r;
+}
+
+string RGWRados::objexp_hint_get_shardname(int shard_num)
+{
+ char buf[32];
+ snprintf(buf, sizeof(buf), "%010u", (unsigned)shard_num);
+
+ string objname("obj_delete_at_hint.");
+ return objname + buf;
+}
+
+int RGWRados::objexp_key_shard(const rgw_obj_index_key& key)
+{
+ string obj_key = key.name + key.instance;
+ int num_shards = cct->_conf->rgw_objexp_hints_num_shards;
+ return rgw_bucket_shard_index(obj_key, num_shards);
+}
+
+static string objexp_hint_get_keyext(const string& tenant_name,
+ const string& bucket_name,
+ const string& bucket_id,
+ const rgw_obj_key& obj_key)
+{
+ return tenant_name + (tenant_name.empty() ? "" : ":") + bucket_name + ":" + bucket_id +
+ ":" + obj_key.name + ":" + obj_key.instance;
+}
+
+int RGWRados::objexp_hint_add(const ceph::real_time& delete_at,
+ const string& tenant_name,
+ const string& bucket_name,
+ const string& bucket_id,
+ const rgw_obj_index_key& obj_key)
+{
+ const string keyext = objexp_hint_get_keyext(tenant_name, bucket_name,
+ bucket_id, obj_key);
+ objexp_hint_entry he = {
+ .tenant = tenant_name,
+ .bucket_name = bucket_name,
+ .bucket_id = bucket_id,
+ .obj_key = obj_key,
+ .exp_time = delete_at };
+ bufferlist hebl;
+ encode(he, hebl);
+ ObjectWriteOperation op;
+ cls_timeindex_add(op, utime_t(delete_at), keyext, hebl);
+
+ string shard_name = objexp_hint_get_shardname(objexp_key_shard(obj_key));
+ return objexp_pool_ctx.operate(shard_name, &op);
+}
+
+void RGWRados::objexp_get_shard(int shard_num,
+ string& shard) /* out */
+{
+ shard = objexp_hint_get_shardname(shard_num);
+}
+
+int RGWRados::objexp_hint_list(const string& oid,
+ const ceph::real_time& start_time,
+ const ceph::real_time& end_time,
+ const int max_entries,
+ const string& marker,
+ list<cls_timeindex_entry>& entries, /* out */
+ string *out_marker, /* out */
+ bool *truncated) /* out */
+{
+ librados::ObjectReadOperation op;
+ cls_timeindex_list(op, utime_t(start_time), utime_t(end_time), marker, max_entries, entries,
+ out_marker, truncated);
+
+ bufferlist obl;
+ int ret = objexp_pool_ctx.operate(oid, &op, &obl);
+
+ if ((ret < 0 ) && (ret != -ENOENT)) {
+ return ret;
+ }
+
+ if ((ret == -ENOENT) && truncated) {
+ *truncated = false;
+ }
+
+ return 0;
+}
+
+int RGWRados::objexp_hint_parse(cls_timeindex_entry &ti_entry, /* in */
+ objexp_hint_entry& hint_entry) /* out */
+{
+ try {
+ auto iter = ti_entry.value.cbegin();
+ decode(hint_entry, iter);
+ } catch (buffer::error& err) {
+ ldout(cct, 0) << "ERROR: couldn't decode avail_pools" << dendl;
+ }
+
+ return 0;
+}
+
+int RGWRados::objexp_hint_trim(const string& oid,
+ const ceph::real_time& start_time,
+ const ceph::real_time& end_time,
+ const string& from_marker,
+ const string& to_marker)
+{
+ int ret = cls_timeindex_trim(objexp_pool_ctx, oid, utime_t(start_time), utime_t(end_time),
+ from_marker, to_marker);
+ if ((ret < 0 ) && (ret != -ENOENT)) {
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWRados::lock_exclusive(const rgw_pool& pool, const string& oid, timespan& duration,
+ string& zone_id, string& owner_id) {
+ librados::IoCtx io_ctx;
+
+ int r = rgw_init_ioctx(get_rados_handle(), pool, io_ctx);
+ if (r < 0) {
+ return r;
+ }
+ uint64_t msec = std::chrono::duration_cast<std::chrono::milliseconds>(duration).count();
+ utime_t ut(msec / 1000, msec % 1000);
+
+ rados::cls::lock::Lock l(log_lock_name);
+ l.set_duration(ut);
+ l.set_cookie(owner_id);
+ l.set_tag(zone_id);
+ l.set_may_renew(true);
+
+ return l.lock_exclusive(&io_ctx, oid);
+}
+
+int RGWRados::unlock(const rgw_pool& pool, const string& oid, string& zone_id, string& owner_id) {
+ librados::IoCtx io_ctx;
+
+ int r = rgw_init_ioctx(get_rados_handle(), pool, io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ rados::cls::lock::Lock l(log_lock_name);
+ l.set_tag(zone_id);
+ l.set_cookie(owner_id);
+
+ return l.unlock(&io_ctx, oid);
+}
+
+int RGWRados::decode_policy(bufferlist& bl, ACLOwner *owner)
+{
+ auto i = bl.cbegin();
+ RGWAccessControlPolicy policy(cct);
+ try {
+ policy.decode_owner(i);
+ } catch (buffer::error& err) {
+ ldout(cct, 0) << "ERROR: could not decode policy, caught buffer::error" << dendl;
+ return -EIO;
+ }
+ *owner = policy.get_owner();
+ return 0;
+}
+
+int rgw_policy_from_attrset(CephContext *cct, map<string, bufferlist>& attrset, RGWAccessControlPolicy *policy)
+{
+ map<string, bufferlist>::iterator aiter = attrset.find(RGW_ATTR_ACL);
+ if (aiter == attrset.end())
+ return -EIO;
+
+ bufferlist& bl = aiter->second;
+ auto iter = bl.cbegin();
+ try {
+ policy->decode(iter);
+ } catch (buffer::error& err) {
+ ldout(cct, 0) << "ERROR: could not decode policy, caught buffer::error" << dendl;
+ return -EIO;
+ }
+ if (cct->_conf->subsys.should_gather<ceph_subsys_rgw, 15>()) {
+ RGWAccessControlPolicy_S3 *s3policy = static_cast<RGWAccessControlPolicy_S3 *>(policy);
+ ldout(cct, 15) << __func__ << " Read AccessControlPolicy";
+ s3policy->to_xml(*_dout);
+ *_dout << dendl;
+ }
+ return 0;
+}
+
+
+int RGWRados::Bucket::update_bucket_id(const string& new_bucket_id)
+{
+ rgw_bucket bucket = bucket_info.bucket;
+ bucket.update_bucket_id(new_bucket_id);
+
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+
+ bucket_info.objv_tracker.clear();
+ int ret = store->get_bucket_instance_info(obj_ctx, bucket, bucket_info, nullptr, nullptr);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+
+static inline std::string after_delim(std::string_view delim)
+{
+ // assert: ! delim.empty()
+ std::string result{delim.data(), delim.length()};
+ result += char(255);
+ return result;
+}
+
+
+/**
+ * Get ordered listing of the objects in a bucket.
+ *
+ * max: maximum number of results to return
+ * bucket: bucket to list contents of
+ * prefix: only return results that match this prefix
+ * delim: do not include results that match this string.
+ * Any skipped results will have the matching portion of their name
+ * inserted in common_prefixes with a "true" mark.
+ * marker: if filled in, begin the listing with this object.
+ * end_marker: if filled in, end the listing with this object.
+ * result: the objects are put in here.
+ * common_prefixes: if delim is filled in, any matching prefixes are
+ * placed here.
+ * is_truncated: if number of objects in the bucket is bigger than
+ * max, then truncated.
+ */
+int RGWRados::Bucket::List::list_objects_ordered(
+ int64_t max_p,
+ vector<rgw_bucket_dir_entry> *result,
+ map<string, bool> *common_prefixes,
+ bool *is_truncated)
+{
+ RGWRados *store = target->get_store();
+ CephContext *cct = store->ctx();
+ int shard_id = target->get_shard_id();
+
+ int count = 0;
+ bool truncated = true;
+ const int64_t max = // protect against memory issues and negative vals
+ std::min(bucket_list_objects_absolute_max, std::max(int64_t(0), max_p));
+ int read_ahead = std::max(cct->_conf->rgw_list_bucket_min_readahead, max);
+
+ result->clear();
+
+ // use a local marker; either the marker will have a previous entry
+ // or it will be empty; either way it's OK to copy
+ rgw_obj_key marker_obj(params.marker.name,
+ params.marker.instance,
+ params.ns.empty() ? params.marker.ns : params.ns);
+ rgw_obj_index_key cur_marker;
+ marker_obj.get_index_key(&cur_marker);
+
+ rgw_obj_key end_marker_obj(params.end_marker.name,
+ params.end_marker.instance,
+ params.ns.empty() ? params.end_marker.ns : params.ns);
+ rgw_obj_index_key cur_end_marker;
+ end_marker_obj.get_index_key(&cur_end_marker);
+ const bool cur_end_marker_valid = !params.end_marker.empty();
+
+ rgw_obj_key prefix_obj(params.prefix);
+ prefix_obj.set_ns(params.ns);
+ string cur_prefix = prefix_obj.get_index_key_name();
+ string after_delim_s; /* needed in !params.delim.empty() AND later */
+
+ if (!params.delim.empty()) {
+ after_delim_s = after_delim(params.delim);
+ /* if marker points at a common prefix, fast forward it into its
+ * upper bound string */
+ int delim_pos = cur_marker.name.find(params.delim, cur_prefix.size());
+ if (delim_pos >= 0) {
+ string s = cur_marker.name.substr(0, delim_pos);
+ s.append(after_delim_s);
+ cur_marker = s;
+ }
+ }
+
+ // allows us to skip over entries in two conditions: 1) when using a
+ // delimiter and we can skip over "subdirectories" and 2) when
+ // searching for elements in the empty namespace we can skip over
+ // namespaced elements
+ rgw_obj_index_key marker_skip_ahead;
+
+ rgw_obj_index_key prev_marker;
+ for (uint16_t attempt = 1; /* empty */; ++attempt) {
+ ldout(cct, 20) << "RGWRados::Bucket::List::" << __func__ <<
+ " starting attempt " << attempt << dendl;
+
+ if (attempt > 1 && !(prev_marker < cur_marker)) {
+ // we've failed to make forward progress
+ ldout(cct, 0) << "RGWRados::Bucket::List::" << __func__ <<
+ ": ERROR marker failed to make forward progress; attempt=" << attempt <<
+ ", prev_marker=" << prev_marker <<
+ ", cur_marker=" << cur_marker << dendl;
+ break;
+ }
+ prev_marker = cur_marker;
+
+ // see whether we found a way to skip ahead in the previous
+ // iteration
+ if (marker_skip_ahead > cur_marker) {
+ cur_marker = marker_skip_ahead;
+ ldout(cct, 20) << "advancing cur_marker=" << cur_marker << dendl;
+ }
+
+ std::map<string, rgw_bucket_dir_entry> ent_map;
+ const size_t num_requested = read_ahead + 1 - count;
+ int r = store->cls_bucket_list_ordered(target->get_bucket_info(),
+ shard_id,
+ cur_marker,
+ cur_prefix,
+ num_requested,
+ params.list_versions,
+ attempt,
+ ent_map,
+ &truncated,
+ &cur_marker);
+ if (r < 0) {
+ return r;
+ }
+
+ for (auto eiter = ent_map.begin(); eiter != ent_map.end(); ++eiter) {
+ const std::string& key = eiter->first;
+ rgw_bucket_dir_entry& entry = eiter->second;
+ rgw_obj_index_key index_key = entry.key;
+ rgw_obj_key obj(index_key); // NB: why is this re-set below? can't be const
+
+ ldout(cct, 20) << "RGWRados::Bucket::List::" << __func__ <<
+ " considering entry " << entry.key << dendl;
+
+ /* note that parse_raw_oid() here will not set the correct
+ * object's instance, as rgw_obj_index_key encodes that
+ * separately. We don't need to set the instance because it's
+ * not needed for the checks here and we end up using the raw
+ * entry for the return vector
+ */
+ bool valid = rgw_obj_key::parse_raw_oid(index_key.name, &obj);
+ if (!valid) {
+ ldout(cct, 0) << "ERROR: could not parse object name: " << obj.name << dendl;
+ continue;
+ }
+
+ if (!params.list_versions && !entry.is_visible()) {
+ continue;
+ }
+
+ const bool matched_ns = (obj.ns == params.ns);
+ if (params.enforce_ns && !matched_ns) {
+ if (!params.ns.empty()) {
+ /* we've iterated past the namespace we're searching -- done now */
+ truncated = false;
+ goto done;
+ } else {
+ // we're enforcing an empty namespace, so we need to skip
+ // past the namespace block
+ marker_skip_ahead = rgw_obj_key::after_namespace_marker(key);
+ continue;
+ }
+ }
+
+ if (cur_end_marker_valid && cur_end_marker <= index_key) {
+ truncated = false;
+ goto done;
+ }
+
+ if (count < max) {
+ params.marker = index_key;
+ next_marker = index_key;
+ }
+
+ if (params.filter && !params.filter->filter(obj.name, index_key.name)) {
+ continue;
+ }
+
+ if (params.prefix.size() &&
+ (obj.name.compare(0, params.prefix.size(), params.prefix) != 0)) {
+ continue;
+ }
+
+ if (!params.delim.empty()) {
+ int delim_pos = obj.name.find(params.delim, params.prefix.size());
+
+ if (delim_pos >= 0) {
+ /* extract key *with* trailing delimiter for CommonPrefix */
+ const std::string prefix_key =
+ obj.name.substr(0, delim_pos + params.delim.length());
+
+ if (common_prefixes &&
+ common_prefixes->find(prefix_key) == common_prefixes->end()) {
+ if (count >= max) {
+ truncated = true;
+ goto done;
+ }
+ next_marker = prefix_key;
+ (*common_prefixes)[prefix_key] = true;
+
+ // setting marker_skip_ahead allows the next call to
+ // cls_bucket_list_ordered to skip over unlisted entries;
+ // NOTE: after_delim_s
+ const std::string skip_name = obj.name.substr(0, delim_pos) + after_delim_s;
+ const rgw_obj_key skip_key(skip_name, "" /* empty instance*/ , obj.ns);
+ skip_key.get_index_key(&marker_skip_ahead);
+ ldout(cct, 20) << "marker_skip_ahead=" << marker_skip_ahead << dendl;
+
+ count++;
+ }
+
+ continue;
+ }
+ }
+
+ if (count >= max) {
+ truncated = true;
+ goto done;
+ }
+
+ ldout(cct, 20) << "RGWRados::Bucket::List::" << __func__ <<
+ " adding entry " << entry.key << " to result" << dendl;
+
+ result->emplace_back(std::move(entry));
+ count++;
+ } // eiter for loop
+
+ ldout(cct, 20) << "RGWRados::Bucket::List::" << __func__ <<
+ " INFO end of outer loop, truncated=" << truncated <<
+ ", count=" << count << ", attempt=" << attempt << dendl;
+
+ if (!truncated || count >= (max + 1) / 2) {
+ // if we finished listing, or if we're returning at least half the
+ // requested entries, that's enough; S3 and swift protocols allow
+ // returning fewer than max entries
+ break;
+ } else if (attempt > 8 && count >= 1) {
+ // if we've made at least 8 attempts and we have some, but very
+ // few, results, return with what we have
+ break;
+ }
+ } // for (uint16_t attempt...
+
+done:
+
+ auto csz = (common_prefixes) ? common_prefixes->size() : 0;
+ ldout(cct, 10) << "RGWRados::Bucket::List::" << __func__ <<
+ " INFO returning " << result->size() << " entries and "
+ << csz << " common prefixes" << dendl;
+
+ if (is_truncated) {
+ *is_truncated = truncated;
+ }
+
+ return 0;
+} // list_objects_ordered
+
+
+/**
+ * Get listing of the objects in a bucket and allow the results to be out
+ * of order.
+ *
+ * Even though there are key differences with the ordered counterpart,
+ * the parameters are the same to maintain some compatability.
+ *
+ * max: maximum number of results to return
+ * bucket: bucket to list contents of
+ * prefix: only return results that match this prefix
+ * delim: should not be set; if it is we should have indicated an error
+ * marker: if filled in, begin the listing with this object.
+ * end_marker: if filled in, end the listing with this object.
+ * result: the objects are put in here.
+ * common_prefixes: this is never filled with an unordered list; the param
+ * is maintained for compatibility
+ * is_truncated: if number of objects in the bucket is bigger than max, then
+ * truncated.
+ */
+int RGWRados::Bucket::List::list_objects_unordered(int64_t max_p,
+ vector<rgw_bucket_dir_entry> *result,
+ map<string, bool> *common_prefixes,
+ bool *is_truncated)
+{
+ RGWRados *store = target->get_store();
+ CephContext *cct = store->ctx();
+ int shard_id = target->get_shard_id();
+
+ int count = 0;
+ bool truncated = true;
+
+ const int64_t max = // protect against memory issues and negative vals
+ std::min(bucket_list_objects_absolute_max, std::max(int64_t(0), max_p));
+
+ // read a few extra in each call to cls_bucket_list_unordered in
+ // case some are filtered out due to namespace matching, versioning,
+ // filtering, etc.
+ const int64_t max_read_ahead = 100;
+ const uint32_t read_ahead = uint32_t(max + std::min(max, max_read_ahead));
+
+ result->clear();
+
+ // use a local marker; either the marker will have a previous entry
+ // or it will be empty; either way it's OK to copy
+ rgw_obj_key marker_obj(params.marker.name,
+ params.marker.instance,
+ params.ns.empty() ? params.marker.ns : params.ns);
+ rgw_obj_index_key cur_marker;
+ marker_obj.get_index_key(&cur_marker);
+
+ rgw_obj_key end_marker_obj(params.end_marker.name,
+ params.end_marker.instance,
+ params.ns.empty() ? params.end_marker.ns : params.ns);
+ rgw_obj_index_key cur_end_marker;
+ end_marker_obj.get_index_key(&cur_end_marker);
+ const bool cur_end_marker_valid = !params.end_marker.empty();
+
+ rgw_obj_key prefix_obj(params.prefix);
+ prefix_obj.set_ns(params.ns);
+ string cur_prefix = prefix_obj.get_index_key_name();
+
+ while (truncated && count <= max) {
+ std::vector<rgw_bucket_dir_entry> ent_list;
+ ent_list.reserve(read_ahead);
+
+ int r = store->cls_bucket_list_unordered(target->get_bucket_info(),
+ shard_id,
+ cur_marker,
+ cur_prefix,
+ read_ahead,
+ params.list_versions,
+ ent_list,
+ &truncated,
+ &cur_marker);
+ if (r < 0)
+ return r;
+
+ // NB: while regions of ent_list will be sorted, we have no
+ // guarantee that all items will be sorted since they can cross
+ // shard boundaries
+
+ for (auto& entry : ent_list) {
+ rgw_obj_index_key index_key = entry.key;
+ rgw_obj_key obj(index_key);
+
+ if (count < max) {
+ params.marker.set(index_key);
+ next_marker.set(index_key);
+ }
+
+ /* note that parse_raw_oid() here will not set the correct
+ * object's instance, as rgw_obj_index_key encodes that
+ * separately. We don't need to set the instance because it's
+ * not needed for the checks here and we end up using the raw
+ * entry for the return vector
+ */
+ bool valid = rgw_obj_key::parse_raw_oid(index_key.name, &obj);
+ if (!valid) {
+ ldout(cct, 0) << "ERROR: could not parse object name: " <<
+ obj.name << dendl;
+ continue;
+ }
+
+ if (!params.list_versions && !entry.is_visible()) {
+ continue;
+ }
+
+ if (params.enforce_ns && obj.ns != params.ns) {
+ continue;
+ }
+
+ if (cur_end_marker_valid && cur_end_marker <= index_key) {
+ // we're not guaranteed items will come in order, so we have
+ // to loop through all
+ continue;
+ }
+
+ if (params.filter && !params.filter->filter(obj.name, index_key.name))
+ continue;
+
+ if (params.prefix.size() &&
+ (0 != obj.name.compare(0, params.prefix.size(), params.prefix)))
+ continue;
+
+ if (count >= max) {
+ truncated = true;
+ goto done;
+ }
+
+ result->emplace_back(std::move(entry));
+ count++;
+ } // for (auto& entry : ent_list)
+ } // while (truncated && count <= max)
+
+done:
+ if (is_truncated)
+ *is_truncated = truncated;
+
+ return 0;
+} // list_objects_unordered
+
+
+/**
+ * create a rados pool, associated meta info
+ * returns 0 on success, -ERR# otherwise.
+ */
+int RGWRados::create_pool(const rgw_pool& pool)
+{
+ librados::IoCtx io_ctx;
+ constexpr bool create = true;
+ return rgw_init_ioctx(get_rados_handle(), pool, io_ctx, create);
+}
+
+int RGWRados::init_bucket_index(RGWBucketInfo& bucket_info, int num_shards)
+{
+ librados::IoCtx index_ctx;
+
+ string dir_oid = dir_oid_prefix;
+ int r = open_bucket_index_ctx(bucket_info, index_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ dir_oid.append(bucket_info.bucket.bucket_id);
+
+ map<int, string> bucket_objs;
+ get_bucket_index_objects(dir_oid, num_shards, bucket_objs);
+
+ return CLSRGWIssueBucketIndexInit(index_ctx,
+ bucket_objs,
+ cct->_conf->rgw_bucket_index_max_aio)();
+}
+
+int RGWRados::clean_bucket_index(RGWBucketInfo& bucket_info, int num_shards)
+{
+ librados::IoCtx index_ctx;
+
+ std::string dir_oid = dir_oid_prefix;
+ int r = open_bucket_index_ctx(bucket_info, index_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ dir_oid.append(bucket_info.bucket.bucket_id);
+
+ std::map<int, std::string> bucket_objs;
+ get_bucket_index_objects(dir_oid, num_shards, bucket_objs);
+
+ return CLSRGWIssueBucketIndexClean(index_ctx,
+ bucket_objs,
+ cct->_conf->rgw_bucket_index_max_aio)();
+}
+
+void RGWRados::create_bucket_id(string *bucket_id)
+{
+ uint64_t iid = instance_id();
+ uint64_t bid = next_bucket_id();
+ char buf[svc.zone->get_zone_params().get_id().size() + 48];
+ snprintf(buf, sizeof(buf), "%s.%" PRIu64 ".%" PRIu64,
+ svc.zone->get_zone_params().get_id().c_str(), iid, bid);
+ *bucket_id = buf;
+}
+
+int RGWRados::create_bucket(const RGWUserInfo& owner, rgw_bucket& bucket,
+ const string& zonegroup_id,
+ const rgw_placement_rule& placement_rule,
+ const string& swift_ver_location,
+ const RGWQuotaInfo * pquota_info,
+ map<std::string, bufferlist>& attrs,
+ RGWBucketInfo& info,
+ obj_version *pobjv,
+ obj_version *pep_objv,
+ real_time creation_time,
+ rgw_bucket *pmaster_bucket,
+ uint32_t *pmaster_num_shards,
+ bool exclusive)
+{
+#define MAX_CREATE_RETRIES 20 /* need to bound retries */
+ rgw_placement_rule selected_placement_rule;
+ RGWZonePlacementInfo rule_info;
+
+ for (int i = 0; i < MAX_CREATE_RETRIES; i++) {
+ int ret = 0;
+ ret = svc.zone->select_bucket_placement(owner, zonegroup_id, placement_rule,
+ &selected_placement_rule, &rule_info);
+ if (ret < 0)
+ return ret;
+
+ if (!pmaster_bucket) {
+ create_bucket_id(&bucket.marker);
+ bucket.bucket_id = bucket.marker;
+ } else {
+ bucket.marker = pmaster_bucket->marker;
+ bucket.bucket_id = pmaster_bucket->bucket_id;
+ }
+
+ RGWObjVersionTracker& objv_tracker = info.objv_tracker;
+
+ if (pobjv) {
+ objv_tracker.write_version = *pobjv;
+ } else {
+ objv_tracker.generate_new_write_ver(cct);
+ }
+
+ info.bucket = bucket;
+ info.owner = owner.user_id;
+ info.zonegroup = zonegroup_id;
+ info.placement_rule = selected_placement_rule;
+ info.index_type = rule_info.index_type;
+ info.swift_ver_location = swift_ver_location;
+ info.swift_versioning = (!swift_ver_location.empty());
+ if (pmaster_num_shards) {
+ info.num_shards = *pmaster_num_shards;
+ } else {
+ info.num_shards = bucket_index_max_shards;
+ }
+ info.bucket_index_shard_hash_type = RGWBucketInfo::MOD;
+ info.requester_pays = false;
+ if (real_clock::is_zero(creation_time)) {
+ info.creation_time = ceph::real_clock::now();
+ } else {
+ info.creation_time = creation_time;
+ }
+ if (pquota_info) {
+ info.quota = *pquota_info;
+ }
+
+ int r = init_bucket_index(info, info.num_shards);
+ if (r < 0) {
+ return r;
+ }
+
+ ret = put_linked_bucket_info(info, exclusive, ceph::real_time(), pep_objv, &attrs, true);
+ if (ret == -EEXIST) {
+ librados::IoCtx index_ctx;
+ map<int, string> bucket_objs;
+ int r = open_bucket_index(info, index_ctx, bucket_objs);
+ if (r < 0)
+ return r;
+
+ /* we need to reread the info and return it, caller will have a use for it */
+ RGWObjVersionTracker instance_ver = info.objv_tracker;
+ info.objv_tracker.clear();
+ auto obj_ctx = svc.sysobj->init_obj_ctx();
+ r = get_bucket_info(obj_ctx, bucket.tenant, bucket.name, info, NULL, NULL);
+ if (r < 0) {
+ if (r == -ENOENT) {
+ continue;
+ }
+ ldout(cct, 0) << "get_bucket_info returned " << r << dendl;
+ return r;
+ }
+
+ /* only remove it if it's a different bucket instance */
+ if (info.bucket.bucket_id != bucket.bucket_id) {
+ /* remove bucket meta instance */
+ r = rgw_bucket_instance_remove_entry(this,
+ bucket.get_key(),
+ &instance_ver);
+ if (r < 0)
+ return r;
+
+ /* remove bucket index objects asynchronously by best effort */
+ (void) CLSRGWIssueBucketIndexClean(index_ctx,
+ bucket_objs,
+ cct->_conf->rgw_bucket_index_max_aio)();
+ }
+ /* ret == -ENOENT here */
+ }
+ return ret;
+ }
+
+ /* this is highly unlikely */
+ ldout(cct, 0) << "ERROR: could not create bucket, continuously raced with bucket creation and removal" << dendl;
+ return -ENOENT;
+}
+
+bool RGWRados::get_obj_data_pool(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_pool *pool)
+{
+ return rgw_get_obj_data_pool(svc.zone->get_zonegroup(), svc.zone->get_zone_params(), placement_rule, obj, pool);
+}
+
+bool RGWRados::obj_to_raw(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_raw_obj *raw_obj)
+{
+ get_obj_bucket_and_oid_loc(obj, raw_obj->oid, raw_obj->loc);
+
+ return get_obj_data_pool(placement_rule, obj, &raw_obj->pool);
+}
+
+int RGWRados::get_obj_head_ioctx(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::IoCtx *ioctx)
+{
+ string oid, key;
+ get_obj_bucket_and_oid_loc(obj, oid, key);
+
+ rgw_pool pool;
+ if (!get_obj_data_pool(bucket_info.placement_rule, obj, &pool)) {
+ ldout(cct, 0) << "ERROR: cannot get data pool for obj=" << obj << ", probably misconfiguration" << dendl;
+ return -EIO;
+ }
+
+ int r = open_pool_ctx(pool, *ioctx, false);
+ if (r < 0) {
+ return r;
+ }
+
+ ioctx->locator_set_key(key);
+
+ return 0;
+}
+
+int RGWRados::get_obj_head_ref(const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_rados_ref *ref)
+{
+ get_obj_bucket_and_oid_loc(obj, ref->obj.oid, ref->obj.loc);
+
+ rgw_pool pool;
+ if (!get_obj_data_pool(bucket_info.placement_rule, obj, &pool)) {
+ ldout(cct, 0) << "ERROR: cannot get data pool for obj=" << obj << ", probably misconfiguration" << dendl;
+ return -EIO;
+ }
+
+ int r = open_pool_ctx(pool, ref->ioctx, false);
+ if (r < 0) {
+ return r;
+ }
+
+ ref->ioctx.locator_set_key(ref->obj.loc);
+
+ return 0;
+}
+
+int RGWRados::get_raw_obj_ref(const rgw_raw_obj& obj, rgw_rados_ref *ref)
+{
+ ref->obj = obj;
+
+ int r;
+
+ if (ref->obj.oid.empty()) {
+ ref->obj.oid = obj.pool.to_str();
+ ref->obj.pool = svc.zone->get_zone_params().domain_root;
+ }
+ r = open_pool_ctx(ref->obj.pool, ref->ioctx, false);
+ if (r < 0)
+ return r;
+
+ ref->ioctx.locator_set_key(ref->obj.loc);
+
+ return 0;
+}
+
+int RGWRados::get_system_obj_ref(const rgw_raw_obj& obj, rgw_rados_ref *ref)
+{
+ return get_raw_obj_ref(obj, ref);
+}
+
+/*
+ * fixes an issue where head objects were supposed to have a locator created, but ended
+ * up without one
+ */
+int RGWRados::fix_head_obj_locator(const RGWBucketInfo& bucket_info, bool copy_obj, bool remove_bad, rgw_obj_key& key)
+{
+ const rgw_bucket& bucket = bucket_info.bucket;
+ string oid;
+ string locator;
+
+ rgw_obj obj(bucket, key);
+
+ get_obj_bucket_and_oid_loc(obj, oid, locator);
+
+ if (locator.empty()) {
+ ldout(cct, 20) << "object does not have a locator, nothing to fix" << dendl;
+ return 0;
+ }
+
+ librados::IoCtx ioctx;
+
+ int ret = get_obj_head_ioctx(bucket_info, obj, &ioctx);
+ if (ret < 0) {
+ cerr << "ERROR: get_obj_head_ioctx() returned ret=" << ret << std::endl;
+ return ret;
+ }
+ ioctx.locator_set_key(string()); /* override locator for this object, use empty locator */
+
+ uint64_t size;
+ bufferlist data;
+
+ struct timespec mtime_ts;
+ map<string, bufferlist> attrs;
+ librados::ObjectReadOperation op;
+ op.getxattrs(&attrs, NULL);
+ op.stat2(&size, &mtime_ts, NULL);
+#define HEAD_SIZE 512 * 1024
+ op.read(0, HEAD_SIZE, &data, NULL);
+
+ ret = ioctx.operate(oid, &op, NULL);
+ if (ret < 0) {
+ lderr(cct) << "ERROR: ioctx.operate(oid=" << oid << ") returned ret=" << ret << dendl;
+ return ret;
+ }
+
+ if (size > HEAD_SIZE) {
+ lderr(cct) << "ERROR: returned object size (" << size << ") > HEAD_SIZE (" << HEAD_SIZE << ")" << dendl;
+ return -EIO;
+ }
+
+ if (size != data.length()) {
+ lderr(cct) << "ERROR: returned object size (" << size << ") != data.length() (" << data.length() << ")" << dendl;
+ return -EIO;
+ }
+
+ if (copy_obj) {
+ librados::ObjectWriteOperation wop;
+
+ wop.mtime2(&mtime_ts);
+
+ map<string, bufferlist>::iterator iter;
+ for (iter = attrs.begin(); iter != attrs.end(); ++iter) {
+ wop.setxattr(iter->first.c_str(), iter->second);
+ }
+
+ wop.write(0, data);
+
+ ioctx.locator_set_key(locator);
+ ioctx.operate(oid, &wop);
+ }
+
+ if (remove_bad) {
+ ioctx.locator_set_key(string());
+
+ ret = ioctx.remove(oid);
+ if (ret < 0) {
+ lderr(cct) << "ERROR: failed to remove original bad object" << dendl;
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+int RGWRados::move_rados_obj(librados::IoCtx& src_ioctx,
+ const string& src_oid, const string& src_locator,
+ librados::IoCtx& dst_ioctx,
+ const string& dst_oid, const string& dst_locator)
+{
+
+#define COPY_BUF_SIZE (4 * 1024 * 1024)
+ bool done = false;
+ uint64_t chunk_size = COPY_BUF_SIZE;
+ uint64_t ofs = 0;
+ int ret = 0;
+ real_time mtime;
+ struct timespec mtime_ts;
+ uint64_t size;
+
+ if (src_oid == dst_oid && src_locator == dst_locator) {
+ return 0;
+ }
+
+ src_ioctx.locator_set_key(src_locator);
+ dst_ioctx.locator_set_key(dst_locator);
+
+ do {
+ bufferlist data;
+ ObjectReadOperation rop;
+ ObjectWriteOperation wop;
+
+ if (ofs == 0) {
+ rop.stat2(&size, &mtime_ts, NULL);
+ mtime = real_clock::from_timespec(mtime_ts);
+ }
+ rop.read(ofs, chunk_size, &data, NULL);
+ ret = src_ioctx.operate(src_oid, &rop, NULL);
+ if (ret < 0) {
+ goto done_err;
+ }
+
+ if (data.length() == 0) {
+ break;
+ }
+
+ if (ofs == 0) {
+ wop.create(true); /* make it exclusive */
+ wop.mtime2(&mtime_ts);
+ mtime = real_clock::from_timespec(mtime_ts);
+ }
+ wop.write(ofs, data);
+ ret = dst_ioctx.operate(dst_oid, &wop);
+ if (ret < 0) {
+ goto done_err;
+ }
+ ofs += data.length();
+ done = data.length() != chunk_size;
+ } while (!done);
+
+ if (ofs != size) {
+ lderr(cct) << "ERROR: " << __func__ << ": copying " << src_oid << " -> " << dst_oid
+ << ": expected " << size << " bytes to copy, ended up with " << ofs << dendl;
+ ret = -EIO;
+ goto done_err;
+ }
+
+ src_ioctx.remove(src_oid);
+
+ return 0;
+
+done_err:
+ // TODO: clean up dst_oid if we created it
+ lderr(cct) << "ERROR: failed to copy " << src_oid << " -> " << dst_oid << dendl;
+ return ret;
+}
+
+/*
+ * fixes an issue where head objects were supposed to have a locator created, but ended
+ * up without one
+ */
+int RGWRados::fix_tail_obj_locator(const RGWBucketInfo& bucket_info, rgw_obj_key& key, bool fix, bool *need_fix)
+{
+ const rgw_bucket& bucket = bucket_info.bucket;
+ rgw_obj obj(bucket, key);
+
+ if (need_fix) {
+ *need_fix = false;
+ }
+
+ rgw_rados_ref ref;
+ int r = get_obj_head_ref(bucket_info, obj, &ref);
+ if (r < 0) {
+ return r;
+ }
+
+ RGWObjState *astate = NULL;
+ RGWObjectCtx rctx(this);
+ r = get_obj_state(&rctx, bucket_info, obj, &astate, false);
+ if (r < 0)
+ return r;
+
+ if (astate->has_manifest) {
+ RGWObjManifest::obj_iterator miter;
+ RGWObjManifest& manifest = astate->manifest;
+ for (miter = manifest.obj_begin(); miter != manifest.obj_end(); ++miter) {
+ rgw_raw_obj raw_loc = miter.get_location().get_raw_obj(this);
+ rgw_obj loc;
+ string oid;
+ string locator;
+
+ rgw_raw_obj_to_obj(manifest.get_tail_placement().bucket, raw_loc, &loc);
+
+ if (loc.key.ns.empty()) {
+ /* continue, we're only interested in tail objects */
+ continue;
+ }
+
+ get_obj_bucket_and_oid_loc(loc, oid, locator);
+ ref.ioctx.locator_set_key(locator);
+
+ ldout(cct, 20) << __func__ << ": key=" << key << " oid=" << oid << " locator=" << locator << dendl;
+
+ r = ref.ioctx.stat(oid, NULL, NULL);
+ if (r != -ENOENT) {
+ continue;
+ }
+
+ string bad_loc;
+ prepend_bucket_marker(bucket, loc.key.name, bad_loc);
+
+ /* create a new ioctx with the bad locator */
+ librados::IoCtx src_ioctx;
+ src_ioctx.dup(ref.ioctx);
+ src_ioctx.locator_set_key(bad_loc);
+
+ r = src_ioctx.stat(oid, NULL, NULL);
+ if (r != 0) {
+ /* cannot find a broken part */
+ continue;
+ }
+ ldout(cct, 20) << __func__ << ": found bad object part: " << loc << dendl;
+ if (need_fix) {
+ *need_fix = true;
+ }
+ if (fix) {
+ r = move_rados_obj(src_ioctx, oid, bad_loc, ref.ioctx, oid, locator);
+ if (r < 0) {
+ lderr(cct) << "ERROR: copy_rados_obj() on oid=" << oid << " returned r=" << r << dendl;
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+int RGWRados::BucketShard::init(const rgw_bucket& _bucket,
+ const rgw_obj& obj,
+ RGWBucketInfo* bucket_info_out)
+{
+ bucket = _bucket;
+
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+
+ RGWBucketInfo bucket_info;
+ RGWBucketInfo* bucket_info_p =
+ bucket_info_out ? bucket_info_out : &bucket_info;
+
+ int ret = store->get_bucket_instance_info(obj_ctx, bucket, *bucket_info_p, NULL, NULL);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = store->open_bucket_index_shard(*bucket_info_p, index_ctx, obj.get_hash_object(), &bucket_obj, &shard_id);
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "ERROR: open_bucket_index_shard() returned ret=" << ret << dendl;
+ return ret;
+ }
+ ldout(store->ctx(), 20) << " bucket index object: " << bucket_obj << dendl;
+
+ return 0;
+}
+
+int RGWRados::BucketShard::init(const rgw_bucket& _bucket,
+ int sid,
+ RGWBucketInfo* bucket_info_out)
+{
+ bucket = _bucket;
+ shard_id = sid;
+
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+
+ RGWBucketInfo bucket_info;
+ RGWBucketInfo* bucket_info_p =
+ bucket_info_out ? bucket_info_out : &bucket_info;
+ int ret = store->get_bucket_instance_info(obj_ctx, bucket, *bucket_info_p, NULL, NULL);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = store->open_bucket_index_shard(*bucket_info_p, index_ctx, shard_id, &bucket_obj);
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "ERROR: open_bucket_index_shard() returned ret=" << ret << dendl;
+ return ret;
+ }
+ ldout(store->ctx(), 20) << " bucket index object: " << bucket_obj << dendl;
+
+ return 0;
+}
+
+int RGWRados::BucketShard::init(const RGWBucketInfo& bucket_info,
+ const rgw_obj& obj)
+{
+ bucket = bucket_info.bucket;
+
+ int ret = store->open_bucket_index_shard(bucket_info, index_ctx,
+ obj.get_hash_object(), &bucket_obj,
+ &shard_id);
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "ERROR: open_bucket_index_shard() returned ret=" << ret << dendl;
+ return ret;
+ }
+ ldout(store->ctx(), 20) << " bucket index object: " << bucket_obj << dendl;
+
+ return 0;
+}
+
+int RGWRados::BucketShard::init(const RGWBucketInfo& bucket_info, int sid)
+{
+ bucket = bucket_info.bucket;
+ shard_id = sid;
+
+ int ret = store->open_bucket_index_shard(bucket_info, index_ctx, shard_id, &bucket_obj);
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "ERROR: open_bucket_index_shard() returned ret=" << ret << dendl;
+ return ret;
+ }
+ ldout(store->ctx(), 20) << " bucket index object: " << bucket_obj << dendl;
+
+ return 0;
+}
+
+
+/* Execute @handler on last item in bucket listing for bucket specified
+ * in @bucket_info. @obj_prefix and @obj_delim narrow down the listing
+ * to objects matching these criterias. */
+int RGWRados::on_last_entry_in_listing(RGWBucketInfo& bucket_info,
+ const std::string& obj_prefix,
+ const std::string& obj_delim,
+ std::function<int(const rgw_bucket_dir_entry&)> handler)
+{
+ RGWRados::Bucket target(this, bucket_info);
+ RGWRados::Bucket::List list_op(&target);
+
+ list_op.params.prefix = obj_prefix;
+ list_op.params.delim = obj_delim;
+
+ ldout(cct, 20) << "iterating listing for bucket=" << bucket_info.bucket.name
+ << ", obj_prefix=" << obj_prefix
+ << ", obj_delim=" << obj_delim
+ << dendl;
+
+ bool is_truncated = false;
+
+ boost::optional<rgw_bucket_dir_entry> last_entry;
+ /* We need to rewind to the last object in a listing. */
+ do {
+ /* List bucket entries in chunks. */
+ static constexpr int MAX_LIST_OBJS = 100;
+ std::vector<rgw_bucket_dir_entry> entries(MAX_LIST_OBJS);
+
+ int ret = list_op.list_objects(MAX_LIST_OBJS, &entries, nullptr,
+ &is_truncated);
+ if (ret < 0) {
+ return ret;
+ } else if (!entries.empty()) {
+ last_entry = entries.back();
+ }
+ } while (is_truncated);
+
+ if (last_entry) {
+ return handler(*last_entry);
+ }
+
+ /* Empty listing - no items we can run handler on. */
+ return 0;
+}
+
+
+int RGWRados::swift_versioning_copy(RGWObjectCtx& obj_ctx,
+ const rgw_user& user,
+ RGWBucketInfo& bucket_info,
+ rgw_obj& obj)
+{
+ if (! swift_versioning_enabled(bucket_info)) {
+ return 0;
+ }
+
+ obj_ctx.set_atomic(obj);
+
+ RGWObjState * state = nullptr;
+ int r = get_obj_state(&obj_ctx, bucket_info, obj, &state, false);
+ if (r < 0) {
+ return r;
+ }
+
+ if (!state->exists) {
+ return 0;
+ }
+
+ const string& src_name = obj.get_oid();
+ char buf[src_name.size() + 32];
+ struct timespec ts = ceph::real_clock::to_timespec(state->mtime);
+ snprintf(buf, sizeof(buf), "%03x%s/%lld.%06ld", (int)src_name.size(),
+ src_name.c_str(), (long long)ts.tv_sec, ts.tv_nsec / 1000);
+
+ RGWBucketInfo dest_bucket_info;
+
+ auto sysobj_ctx = svc.sysobj->init_obj_ctx();
+
+ r = get_bucket_info(sysobj_ctx, bucket_info.bucket.tenant, bucket_info.swift_ver_location, dest_bucket_info, NULL, NULL);
+ if (r < 0) {
+ ldout(cct, 10) << "failed to read dest bucket info: r=" << r << dendl;
+ if (r == -ENOENT) {
+ return -ERR_PRECONDITION_FAILED;
+ }
+ return r;
+ }
+
+ if (dest_bucket_info.owner != bucket_info.owner) {
+ return -ERR_PRECONDITION_FAILED;
+ }
+
+ rgw_obj dest_obj(dest_bucket_info.bucket, buf);
+
+ if (dest_bucket_info.versioning_enabled()){
+ gen_rand_obj_instance_name(&dest_obj);
+ }
+
+ obj_ctx.set_atomic(dest_obj);
+
+ string no_zone;
+
+ r = copy_obj(obj_ctx,
+ user,
+ NULL, /* req_info *info */
+ no_zone,
+ dest_obj,
+ obj,
+ dest_bucket_info,
+ bucket_info,
+ bucket_info.placement_rule,
+ NULL, /* time_t *src_mtime */
+ NULL, /* time_t *mtime */
+ NULL, /* const time_t *mod_ptr */
+ NULL, /* const time_t *unmod_ptr */
+ false, /* bool high_precision_time */
+ NULL, /* const char *if_match */
+ NULL, /* const char *if_nomatch */
+ RGWRados::ATTRSMOD_NONE,
+ true, /* bool copy_if_newer */
+ state->attrset,
+ RGWObjCategory::Main,
+ 0, /* uint64_t olh_epoch */
+ real_time(), /* time_t delete_at */
+ NULL, /* string *version_id */
+ NULL, /* string *ptag */
+ NULL, /* string *petag */
+ NULL, /* void (*progress_cb)(off_t, void *) */
+ NULL); /* void *progress_data */
+ if (r == -ECANCELED || r == -ENOENT) {
+ /* Has already been overwritten, meaning another rgw process already
+ * copied it out */
+ return 0;
+ }
+
+ return r;
+}
+
+int RGWRados::swift_versioning_restore(RGWSysObjectCtx& sysobj_ctx,
+ RGWObjectCtx& obj_ctx,
+ const rgw_user& user,
+ RGWBucketInfo& bucket_info,
+ rgw_obj& obj,
+ bool& restored) /* out */
+{
+ if (! swift_versioning_enabled(bucket_info)) {
+ return 0;
+ }
+
+ /* Bucket info of the bucket that stores previous versions of our object. */
+ RGWBucketInfo archive_binfo;
+
+ int ret = get_bucket_info(sysobj_ctx, bucket_info.bucket.tenant,
+ bucket_info.swift_ver_location, archive_binfo,
+ nullptr, nullptr);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Abort the operation if the bucket storing our archive belongs to someone
+ * else. This is a limitation in comparison to Swift as we aren't taking ACLs
+ * into consideration. For we can live with that.
+ *
+ * TODO: delegate this check to un upper layer and compare with ACLs. */
+ if (bucket_info.owner != archive_binfo.owner) {
+ return -EPERM;
+ }
+
+ /* This code will be executed on latest version of the object. */
+ const auto handler = [&](const rgw_bucket_dir_entry& entry) -> int {
+ std::string no_zone;
+
+ /* We don't support object versioning of Swift API on those buckets that
+ * are already versioned using the S3 mechanism. This affects also bucket
+ * storing archived objects. Otherwise the delete operation would create
+ * a deletion marker. */
+ if (archive_binfo.versioned()) {
+ restored = false;
+ return -ERR_PRECONDITION_FAILED;
+ }
+
+ /* We are requesting ATTRSMOD_NONE so the attr attribute is perfectly
+ * irrelevant and may be safely skipped. */
+ std::map<std::string, ceph::bufferlist> no_attrs;
+
+ rgw_obj archive_obj(archive_binfo.bucket, entry.key);
+
+ if (bucket_info.versioning_enabled()){
+ gen_rand_obj_instance_name(&obj);
+ }
+
+ obj_ctx.set_atomic(archive_obj);
+ obj_ctx.set_atomic(obj);
+
+ int ret = copy_obj(obj_ctx,
+ user,
+ nullptr, /* req_info *info */
+ no_zone,
+ obj, /* dest obj */
+ archive_obj, /* src obj */
+ bucket_info, /* dest bucket info */
+ archive_binfo, /* src bucket info */
+ bucket_info.placement_rule, /* placement_rule */
+ nullptr, /* time_t *src_mtime */
+ nullptr, /* time_t *mtime */
+ nullptr, /* const time_t *mod_ptr */
+ nullptr, /* const time_t *unmod_ptr */
+ false, /* bool high_precision_time */
+ nullptr, /* const char *if_match */
+ nullptr, /* const char *if_nomatch */
+ RGWRados::ATTRSMOD_NONE,
+ true, /* bool copy_if_newer */
+ no_attrs,
+ RGWObjCategory::Main,
+ 0, /* uint64_t olh_epoch */
+ real_time(), /* time_t delete_at */
+ nullptr, /* string *version_id */
+ nullptr, /* string *ptag */
+ nullptr, /* string *petag */
+ nullptr, /* void (*progress_cb)(off_t, void *) */
+ nullptr); /* void *progress_data */
+ if (ret == -ECANCELED || ret == -ENOENT) {
+ /* Has already been overwritten, meaning another rgw process already
+ * copied it out */
+ return 0;
+ } else if (ret < 0) {
+ return ret;
+ } else {
+ restored = true;
+ }
+
+ /* Need to remove the archived copy. */
+ ret = delete_obj(obj_ctx, archive_binfo, archive_obj,
+ archive_binfo.versioning_status());
+
+ return ret;
+ };
+
+ const std::string& obj_name = obj.get_oid();
+ const auto prefix = boost::str(boost::format("%03x%s") % obj_name.size()
+ % obj_name);
+
+ return on_last_entry_in_listing(archive_binfo, prefix, std::string(),
+ handler);
+}
+
+int RGWRados::Object::Write::_do_write_meta(uint64_t size, uint64_t accounted_size,
+ map<string, bufferlist>& attrs,
+ bool assume_noent, bool modify_tail,
+ void *_index_op)
+{
+ RGWRados::Bucket::UpdateIndex *index_op = static_cast<RGWRados::Bucket::UpdateIndex *>(_index_op);
+ RGWRados *store = target->get_store();
+
+ ObjectWriteOperation op;
+#ifdef WITH_LTTNG
+ const struct req_state* s = get_req_state();
+ string req_id;
+ if (!s) {
+ // fake req_id
+ req_id = store->svc.zone_utils->unique_id(store->get_new_req_id());
+ } else {
+ req_id = s->req_id;
+ }
+#endif
+
+ RGWObjState *state;
+ int r = target->get_state(&state, false, assume_noent);
+ if (r < 0)
+ return r;
+
+ rgw_obj& obj = target->get_obj();
+
+ if (obj.get_oid().empty()) {
+ ldout(store->ctx(), 0) << "ERROR: " << __func__ << "(): cannot write object with empty name" << dendl;
+ return -EIO;
+ }
+
+ rgw_rados_ref ref;
+ r = store->get_obj_head_ref(target->get_bucket_info(), obj, &ref);
+ if (r < 0)
+ return r;
+
+ bool is_olh = state->is_olh;
+
+ bool reset_obj = (meta.flags & PUT_OBJ_CREATE) != 0;
+
+ const string *ptag = meta.ptag;
+ if (!ptag && !index_op->get_optag()->empty()) {
+ ptag = index_op->get_optag();
+ }
+ r = target->prepare_atomic_modification(op, reset_obj, ptag, meta.if_match, meta.if_nomatch, false, modify_tail);
+ if (r < 0)
+ return r;
+
+ if (real_clock::is_zero(meta.set_mtime)) {
+ meta.set_mtime = real_clock::now();
+ }
+
+ if (target->bucket_info.obj_lock_enabled() && target->bucket_info.obj_lock.has_rule() && meta.flags == PUT_OBJ_CREATE) {
+ auto iter = attrs.find(RGW_ATTR_OBJECT_RETENTION);
+ if (iter == attrs.end()) {
+ real_time lock_until_date = target->bucket_info.obj_lock.get_lock_until_date(meta.set_mtime);
+ string mode = target->bucket_info.obj_lock.get_mode();
+ RGWObjectRetention obj_retention(mode, lock_until_date);
+ bufferlist bl;
+ obj_retention.encode(bl);
+ op.setxattr(RGW_ATTR_OBJECT_RETENTION, bl);
+ }
+ }
+
+ if (state->is_olh) {
+ op.setxattr(RGW_ATTR_OLH_ID_TAG, state->olh_tag);
+ }
+
+ struct timespec mtime_ts = real_clock::to_timespec(meta.set_mtime);
+ op.mtime2(&mtime_ts);
+
+ if (meta.data) {
+ /* if we want to overwrite the data, we also want to overwrite the
+ xattrs, so just remove the object */
+ op.write_full(*meta.data);
+ }
+
+ string etag;
+ string content_type;
+ bufferlist acl_bl;
+ string storage_class;
+
+ map<string, bufferlist>::iterator iter;
+ if (meta.rmattrs) {
+ for (iter = meta.rmattrs->begin(); iter != meta.rmattrs->end(); ++iter) {
+ const string& name = iter->first;
+ op.rmxattr(name.c_str());
+ }
+ }
+
+ if (meta.manifest) {
+ storage_class = meta.manifest->get_tail_placement().placement_rule.storage_class;
+
+ /* remove existing manifest attr */
+ iter = attrs.find(RGW_ATTR_MANIFEST);
+ if (iter != attrs.end())
+ attrs.erase(iter);
+
+ bufferlist bl;
+ encode(*meta.manifest, bl);
+ op.setxattr(RGW_ATTR_MANIFEST, bl);
+ }
+
+ for (iter = attrs.begin(); iter != attrs.end(); ++iter) {
+ const string& name = iter->first;
+ bufferlist& bl = iter->second;
+
+ if (!bl.length())
+ continue;
+
+ op.setxattr(name.c_str(), bl);
+
+ if (name.compare(RGW_ATTR_ETAG) == 0) {
+ etag = rgw_bl_str(bl);
+ } else if (name.compare(RGW_ATTR_CONTENT_TYPE) == 0) {
+ content_type = rgw_bl_str(bl);
+ } else if (name.compare(RGW_ATTR_ACL) == 0) {
+ acl_bl = bl;
+ }
+ }
+ if (attrs.find(RGW_ATTR_PG_VER) == attrs.end()) {
+ cls_rgw_obj_store_pg_ver(op, RGW_ATTR_PG_VER);
+ }
+
+ if (attrs.find(RGW_ATTR_SOURCE_ZONE) == attrs.end()) {
+ bufferlist bl;
+ encode(store->svc.zone->get_zone_short_id(), bl);
+ op.setxattr(RGW_ATTR_SOURCE_ZONE, bl);
+ }
+
+ if (!storage_class.empty()) {
+ bufferlist bl;
+ bl.append(storage_class);
+ op.setxattr(RGW_ATTR_STORAGE_CLASS, bl);
+ }
+
+ if (!op.size())
+ return 0;
+
+ uint64_t epoch;
+ int64_t poolid;
+ bool orig_exists;
+ uint64_t orig_size;
+
+ if (!reset_obj) { //Multipart upload, it has immutable head.
+ orig_exists = false;
+ orig_size = 0;
+ } else {
+ orig_exists = state->exists;
+ orig_size = state->accounted_size;
+ }
+
+ bool versioned_target = (meta.olh_epoch && *meta.olh_epoch > 0) ||
+ !obj.key.instance.empty();
+
+ bool versioned_op = (target->versioning_enabled() || is_olh || versioned_target);
+
+ if (versioned_op) {
+ index_op->set_bilog_flags(RGW_BILOG_FLAG_VERSIONED_OP);
+ }
+
+ if (!index_op->is_prepared()) {
+ tracepoint(rgw_rados, prepare_enter, req_id.c_str());
+ r = index_op->prepare(CLS_RGW_OP_ADD, &state->write_tag);
+ tracepoint(rgw_rados, prepare_exit, req_id.c_str());
+ if (r < 0)
+ return r;
+ }
+
+ tracepoint(rgw_rados, operate_enter, req_id.c_str());
+ r = ref.ioctx.operate(ref.obj.oid, &op);
+ tracepoint(rgw_rados, operate_exit, req_id.c_str());
+ if (r < 0) { /* we can expect to get -ECANCELED if object was replaced under,
+ or -ENOENT if was removed, or -EEXIST if it did not exist
+ before and now it does */
+ if (r == -EEXIST && assume_noent) {
+ target->invalidate_state();
+ return r;
+ }
+ goto done_cancel;
+ }
+
+ epoch = ref.ioctx.get_last_version();
+ poolid = ref.ioctx.get_id();
+
+ r = target->complete_atomic_modification();
+ if (r < 0) {
+ ldout(store->ctx(), 0) << "ERROR: complete_atomic_modification returned r=" << r << dendl;
+ }
+
+ tracepoint(rgw_rados, complete_enter, req_id.c_str());
+ r = index_op->complete(poolid, epoch, size, accounted_size,
+ meta.set_mtime, etag, content_type,
+ storage_class, &acl_bl,
+ meta.category, meta.remove_objs, meta.user_data, meta.appendable);
+ tracepoint(rgw_rados, complete_exit, req_id.c_str());
+ if (r < 0)
+ goto done_cancel;
+
+ if (meta.mtime) {
+ *meta.mtime = meta.set_mtime;
+ }
+
+ /* note that index_op was using state so we couldn't invalidate it earlier */
+ target->invalidate_state();
+ state = NULL;
+
+ if (versioned_op && meta.olh_epoch) {
+ r = store->set_olh(target->get_ctx(), target->get_bucket_info(), obj, false, NULL, *meta.olh_epoch, real_time(), false, meta.zones_trace);
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ if (!real_clock::is_zero(meta.delete_at)) {
+ rgw_obj_index_key obj_key;
+ obj.key.get_index_key(&obj_key);
+
+ r = store->objexp_hint_add(meta.delete_at,
+ obj.bucket.tenant, obj.bucket.name, obj.bucket.bucket_id, obj_key);
+ if (r < 0) {
+ ldout(store->ctx(), 0) << "ERROR: objexp_hint_add() returned r=" << r << ", object will not get removed" << dendl;
+ /* ignoring error, nothing we can do at this point */
+ }
+ }
+ meta.canceled = false;
+
+ /* update quota cache */
+ if (meta.completeMultipart){
+ store->quota_handler->update_stats(meta.owner, obj.bucket, (orig_exists ? 0 : 1),
+ 0, orig_size);
+ }
+ else {
+ store->quota_handler->update_stats(meta.owner, obj.bucket, (orig_exists ? 0 : 1),
+ accounted_size, orig_size);
+ }
+ return 0;
+
+done_cancel:
+ int ret = index_op->cancel();
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "ERROR: index_op.cancel()() returned ret=" << ret << dendl;
+ }
+
+ meta.canceled = true;
+
+ /* we lost in a race. There are a few options:
+ * - existing object was rewritten (ECANCELED)
+ * - non existing object was created (EEXIST)
+ * - object was removed (ENOENT)
+ * should treat it as a success
+ */
+ if (meta.if_match == NULL && meta.if_nomatch == NULL) {
+ if (r == -ECANCELED || r == -ENOENT || r == -EEXIST) {
+ r = 0;
+ }
+ } else {
+ if (meta.if_match != NULL) {
+ // only overwrite existing object
+ if (strcmp(meta.if_match, "*") == 0) {
+ if (r == -ENOENT) {
+ r = -ERR_PRECONDITION_FAILED;
+ } else if (r == -ECANCELED) {
+ r = 0;
+ }
+ }
+ }
+
+ if (meta.if_nomatch != NULL) {
+ // only create a new object
+ if (strcmp(meta.if_nomatch, "*") == 0) {
+ if (r == -EEXIST) {
+ r = -ERR_PRECONDITION_FAILED;
+ } else if (r == -ENOENT) {
+ r = 0;
+ }
+ }
+ }
+ }
+
+ return r;
+}
+
+int RGWRados::Object::Write::write_meta(uint64_t size, uint64_t accounted_size,
+ map<string, bufferlist>& attrs)
+{
+ RGWBucketInfo& bucket_info = target->get_bucket_info();
+
+ RGWRados::Bucket bop(target->get_store(), bucket_info);
+ RGWRados::Bucket::UpdateIndex index_op(&bop, target->get_obj());
+ index_op.set_zones_trace(meta.zones_trace);
+
+ bool assume_noent = (meta.if_match == NULL && meta.if_nomatch == NULL);
+ int r;
+ if (assume_noent) {
+ r = _do_write_meta(size, accounted_size, attrs, assume_noent, meta.modify_tail, (void *)&index_op);
+ if (r == -EEXIST) {
+ assume_noent = false;
+ }
+ }
+ if (!assume_noent) {
+ r = _do_write_meta(size, accounted_size, attrs, assume_noent, meta.modify_tail, (void *)&index_op);
+ }
+ return r;
+}
+
+class RGWRadosPutObj : public RGWHTTPStreamRWRequest::ReceiveCB
+{
+ CephContext* cct;
+ rgw_obj obj;
+ rgw::putobj::DataProcessor *filter;
+ boost::optional<RGWPutObj_Compress>& compressor;
+ bool try_etag_verify;
+ rgw::putobj::etag_verifier_ptr etag_verifier;
+ boost::optional<rgw::putobj::ChunkProcessor> buffering;
+ CompressorRef& plugin;
+ rgw::putobj::ObjectProcessor *processor;
+ void (*progress_cb)(off_t, void *);
+ void *progress_data;
+ bufferlist extra_data_bl, manifest_bl;
+ std::optional<RGWCompressionInfo> compression_info;
+ uint64_t extra_data_left{0};
+ bool need_to_process_attrs{true};
+ uint64_t data_len{0};
+ map<string, bufferlist> src_attrs;
+ uint64_t ofs{0};
+ uint64_t lofs{0}; /* logical ofs */
+ std::function<int(const map<string, bufferlist>&)> attrs_handler;
+public:
+ RGWRadosPutObj(CephContext* cct,
+ CompressorRef& plugin,
+ boost::optional<RGWPutObj_Compress>& compressor,
+ rgw::putobj::ObjectProcessor *p,
+ void (*_progress_cb)(off_t, void *),
+ void *_progress_data,
+ std::function<int(const map<string, bufferlist>&)> _attrs_handler) :
+ cct(cct),
+ filter(p),
+ compressor(compressor),
+ try_etag_verify(cct->_conf->rgw_sync_obj_etag_verify),
+ plugin(plugin),
+ processor(p),
+ progress_cb(_progress_cb),
+ progress_data(_progress_data),
+ attrs_handler(_attrs_handler) {}
+
+ int process_attrs(void) {
+ if (extra_data_bl.length()) {
+ JSONParser jp;
+ if (!jp.parse(extra_data_bl.c_str(), extra_data_bl.length())) {
+ ldout(cct, 0) << "failed to parse response extra data. len=" << extra_data_bl.length() << " data=" << extra_data_bl.c_str() << dendl;
+ return -EIO;
+ }
+
+ JSONDecoder::decode_json("attrs", src_attrs, &jp);
+
+ auto iter = src_attrs.find(RGW_ATTR_COMPRESSION);
+ if (iter != src_attrs.end()) {
+ const bufferlist bl = std::move(iter->second);
+ src_attrs.erase(iter); // don't preserve source compression info
+
+ if (try_etag_verify) {
+ // if we're trying to verify etags, we need to convert compressed
+ // ranges in the manifest back into logical multipart part offsets
+ RGWCompressionInfo info;
+ bool compressed = false;
+ int r = rgw_compression_info_from_attr(bl, compressed, info);
+ if (r < 0) {
+ ldout(cct, 4) << "failed to decode compression info, "
+ "disabling etag verification" << dendl;
+ try_etag_verify = false;
+ } else if (compressed) {
+ compression_info = std::move(info);
+ }
+ }
+ }
+ /* We need the manifest to recompute the ETag for verification */
+ iter = src_attrs.find(RGW_ATTR_MANIFEST);
+ if (iter != src_attrs.end()) {
+ manifest_bl = std::move(iter->second);
+ src_attrs.erase(iter);
+ }
+
+ // filter out olh attributes
+ iter = src_attrs.lower_bound(RGW_ATTR_OLH_PREFIX);
+ while (iter != src_attrs.end()) {
+ if (!boost::algorithm::starts_with(iter->first, RGW_ATTR_OLH_PREFIX)) {
+ break;
+ }
+ iter = src_attrs.erase(iter);
+ }
+ }
+
+ int ret = attrs_handler(src_attrs);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (plugin && src_attrs.find(RGW_ATTR_CRYPT_MODE) == src_attrs.end()) {
+ //do not compress if object is encrypted
+ compressor = boost::in_place(cct, plugin, filter);
+ // add a filter that buffers data so we don't try to compress tiny blocks.
+ // libcurl reads in 16k at a time, and we need at least 64k to get a good
+ // compression ratio
+ constexpr unsigned buffer_size = 512 * 1024;
+ buffering = boost::in_place(&*compressor, buffer_size);
+ filter = &*buffering;
+ }
+
+ /*
+ * Presently we don't support ETag based verification if encryption is
+ * requested. We can enable simultaneous support once we have a mechanism
+ * to know the sequence in which the filters must be applied.
+ */
+ if (try_etag_verify && src_attrs.find(RGW_ATTR_CRYPT_MODE) == src_attrs.end()) {
+ ret = rgw::putobj::create_etag_verifier(cct, filter, manifest_bl,
+ compression_info,
+ etag_verifier);
+ if (ret < 0) {
+ ldout(cct, 4) << "failed to initial etag verifier, "
+ "disabling etag verification" << dendl;
+ } else {
+ filter = etag_verifier.get();
+ }
+ }
+
+ need_to_process_attrs = false;
+
+ return 0;
+ }
+
+ int handle_data(bufferlist& bl, bool *pause) override {
+ if (progress_cb) {
+ progress_cb(data_len, progress_data);
+ }
+ if (extra_data_left) {
+ uint64_t extra_len = bl.length();
+ if (extra_len > extra_data_left)
+ extra_len = extra_data_left;
+
+ bufferlist extra;
+ bl.splice(0, extra_len, &extra);
+ extra_data_bl.append(extra);
+
+ extra_data_left -= extra_len;
+ if (extra_data_left == 0) {
+ int res = process_attrs();
+ if (res < 0)
+ return res;
+ }
+ ofs += extra_len;
+ if (bl.length() == 0) {
+ return 0;
+ }
+ }
+ if (need_to_process_attrs) {
+ /* need to call process_attrs() even if we don't get any attrs,
+ * need it to call attrs_handler().
+ */
+ int res = process_attrs();
+ if (res < 0) {
+ return res;
+ }
+ }
+
+ ceph_assert(uint64_t(ofs) >= extra_data_len);
+
+ uint64_t size = bl.length();
+ ofs += size;
+
+ const uint64_t lofs = data_len;
+ data_len += size;
+
+ return filter->process(std::move(bl), lofs);
+ }
+
+ int flush() {
+ return filter->process({}, data_len);
+ }
+
+ bufferlist& get_extra_data() { return extra_data_bl; }
+
+ map<string, bufferlist>& get_attrs() { return src_attrs; }
+
+ void set_extra_data_len(uint64_t len) override {
+ extra_data_left = len;
+ RGWHTTPStreamRWRequest::ReceiveCB::set_extra_data_len(len);
+ }
+
+ uint64_t get_data_len() {
+ return data_len;
+ }
+
+ std::string get_verifier_etag() {
+ if (etag_verifier) {
+ etag_verifier->calculate_etag();
+ return etag_verifier->get_calculated_etag();
+ } else {
+ return "";
+ }
+ }
+};
+
+/*
+ * prepare attrset depending on attrs_mod.
+ */
+static void set_copy_attrs(map<string, bufferlist>& src_attrs,
+ map<string, bufferlist>& attrs,
+ RGWRados::AttrsMod attrs_mod)
+{
+ switch (attrs_mod) {
+ case RGWRados::ATTRSMOD_NONE:
+ attrs = src_attrs;
+ break;
+ case RGWRados::ATTRSMOD_REPLACE:
+ if (!attrs[RGW_ATTR_ETAG].length()) {
+ attrs[RGW_ATTR_ETAG] = src_attrs[RGW_ATTR_ETAG];
+ }
+ if (!attrs[RGW_ATTR_TAIL_TAG].length()) {
+ auto ttiter = src_attrs.find(RGW_ATTR_TAIL_TAG);
+ if (ttiter != src_attrs.end()) {
+ attrs[RGW_ATTR_TAIL_TAG] = src_attrs[RGW_ATTR_TAIL_TAG];
+ }
+ }
+ break;
+ case RGWRados::ATTRSMOD_MERGE:
+ for (map<string, bufferlist>::iterator it = src_attrs.begin(); it != src_attrs.end(); ++it) {
+ if (attrs.find(it->first) == attrs.end()) {
+ attrs[it->first] = it->second;
+ }
+ }
+ break;
+ }
+}
+
+int RGWRados::rewrite_obj(RGWBucketInfo& dest_bucket_info, const rgw_obj& obj)
+{
+ map<string, bufferlist> attrset;
+
+ real_time mtime;
+ uint64_t obj_size;
+ RGWObjectCtx rctx(this);
+
+ RGWRados::Object op_target(this, dest_bucket_info, rctx, obj);
+ RGWRados::Object::Read read_op(&op_target);
+
+ read_op.params.attrs = &attrset;
+ read_op.params.lastmod = &mtime;
+ read_op.params.obj_size = &obj_size;
+
+ int ret = read_op.prepare();
+ if (ret < 0)
+ return ret;
+
+ attrset.erase(RGW_ATTR_ID_TAG);
+ attrset.erase(RGW_ATTR_TAIL_TAG);
+
+ return copy_obj_data(rctx, dest_bucket_info, dest_bucket_info.placement_rule,
+ read_op, obj_size - 1, obj, NULL, mtime, attrset,
+ 0, real_time(), NULL);
+}
+
+struct obj_time_weight {
+ real_time mtime;
+ uint32_t zone_short_id;
+ uint64_t pg_ver;
+ bool high_precision;
+
+ obj_time_weight() : zone_short_id(0), pg_ver(0), high_precision(false) {}
+
+ bool compare_low_precision(const obj_time_weight& rhs) {
+ struct timespec l = ceph::real_clock::to_timespec(mtime);
+ struct timespec r = ceph::real_clock::to_timespec(rhs.mtime);
+ l.tv_nsec = 0;
+ r.tv_nsec = 0;
+ if (l > r) {
+ return false;
+ }
+ if (l < r) {
+ return true;
+ }
+ if (!zone_short_id || !rhs.zone_short_id) {
+ /* don't compare zone ids, if one wasn't provided */
+ return false;
+ }
+ if (zone_short_id != rhs.zone_short_id) {
+ return (zone_short_id < rhs.zone_short_id);
+ }
+ return (pg_ver < rhs.pg_ver);
+
+ }
+
+ bool operator<(const obj_time_weight& rhs) {
+ if (!high_precision || !rhs.high_precision) {
+ return compare_low_precision(rhs);
+ }
+ if (mtime > rhs.mtime) {
+ return false;
+ }
+ if (mtime < rhs.mtime) {
+ return true;
+ }
+ if (!zone_short_id || !rhs.zone_short_id) {
+ /* don't compare zone ids, if one wasn't provided */
+ return false;
+ }
+ if (zone_short_id != rhs.zone_short_id) {
+ return (zone_short_id < rhs.zone_short_id);
+ }
+ return (pg_ver < rhs.pg_ver);
+ }
+
+ void init(const real_time& _mtime, uint32_t _short_id, uint64_t _pg_ver) {
+ mtime = _mtime;
+ zone_short_id = _short_id;
+ pg_ver = _pg_ver;
+ }
+
+ void init(RGWObjState *state) {
+ mtime = state->mtime;
+ zone_short_id = state->zone_short_id;
+ pg_ver = state->pg_ver;
+ }
+};
+
+inline ostream& operator<<(ostream& out, const obj_time_weight &o) {
+ out << o.mtime;
+
+ if (o.zone_short_id != 0 || o.pg_ver != 0) {
+ out << "[zid=" << o.zone_short_id << ", pgv=" << o.pg_ver << "]";
+ }
+
+ return out;
+}
+
+class RGWGetExtraDataCB : public RGWHTTPStreamRWRequest::ReceiveCB {
+ bufferlist extra_data;
+public:
+ RGWGetExtraDataCB() {}
+ int handle_data(bufferlist& bl, bool *pause) override {
+ int bl_len = (int)bl.length();
+ if (extra_data.length() < extra_data_len) {
+ off_t max = extra_data_len - extra_data.length();
+ if (max > bl_len) {
+ max = bl_len;
+ }
+ bl.splice(0, max, &extra_data);
+ }
+ return bl_len;
+ }
+
+ bufferlist& get_extra_data() {
+ return extra_data;
+ }
+};
+
+int RGWRados::stat_remote_obj(RGWObjectCtx& obj_ctx,
+ const rgw_user& user_id,
+ req_info *info,
+ const string& source_zone,
+ rgw_obj& src_obj,
+ RGWBucketInfo& src_bucket_info,
+ real_time *src_mtime,
+ uint64_t *psize,
+ const real_time *mod_ptr,
+ const real_time *unmod_ptr,
+ bool high_precision_time,
+ const char *if_match,
+ const char *if_nomatch,
+ map<string, bufferlist> *pattrs,
+ map<string, string> *pheaders,
+ string *version_id,
+ string *ptag,
+ string *petag)
+{
+ /* source is in a different zonegroup, copy from there */
+
+ RGWRESTStreamRWRequest *in_stream_req;
+ string tag;
+ map<string, bufferlist> src_attrs;
+ append_rand_alpha(cct, tag, tag, 32);
+ obj_time_weight set_mtime_weight;
+ set_mtime_weight.high_precision = high_precision_time;
+
+ RGWRESTConn *conn;
+ if (source_zone.empty()) {
+ if (src_bucket_info.zonegroup.empty()) {
+ /* source is in the master zonegroup */
+ conn = svc.zone->get_master_conn();
+ } else {
+ auto& zonegroup_conn_map = svc.zone->get_zonegroup_conn_map();
+ map<string, RGWRESTConn *>::iterator iter = zonegroup_conn_map.find(src_bucket_info.zonegroup);
+ if (iter == zonegroup_conn_map.end()) {
+ ldout(cct, 0) << "could not find zonegroup connection to zonegroup: " << source_zone << dendl;
+ return -ENOENT;
+ }
+ conn = iter->second;
+ }
+ } else {
+ auto& zone_conn_map = svc.zone->get_zone_conn_map();
+ map<string, RGWRESTConn *>::iterator iter = zone_conn_map.find(source_zone);
+ if (iter == zone_conn_map.end()) {
+ ldout(cct, 0) << "could not find zone connection to zone: " << source_zone << dendl;
+ return -ENOENT;
+ }
+ conn = iter->second;
+ }
+
+ RGWGetExtraDataCB cb;
+ map<string, string> req_headers;
+ real_time set_mtime;
+
+ const real_time *pmod = mod_ptr;
+
+ obj_time_weight dest_mtime_weight;
+
+ constexpr bool prepend_meta = true;
+ constexpr bool get_op = true;
+ constexpr bool rgwx_stat = true;
+ constexpr bool sync_manifest = true;
+ constexpr bool skip_decrypt = true;
+ int ret = conn->get_obj(user_id, info, src_obj, pmod, unmod_ptr,
+ dest_mtime_weight.zone_short_id, dest_mtime_weight.pg_ver,
+ prepend_meta, get_op, rgwx_stat,
+ sync_manifest, skip_decrypt,
+ true, &cb, &in_stream_req);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = conn->complete_request(in_stream_req, nullptr, &set_mtime, psize, nullptr, pheaders);
+ if (ret < 0) {
+ return ret;
+ }
+
+ bufferlist& extra_data_bl = cb.get_extra_data();
+ if (extra_data_bl.length()) {
+ JSONParser jp;
+ if (!jp.parse(extra_data_bl.c_str(), extra_data_bl.length())) {
+ ldout(cct, 0) << "failed to parse response extra data. len=" << extra_data_bl.length() << " data=" << extra_data_bl.c_str() << dendl;
+ return -EIO;
+ }
+
+ JSONDecoder::decode_json("attrs", src_attrs, &jp);
+
+ src_attrs.erase(RGW_ATTR_MANIFEST); // not interested in original object layout
+ }
+
+ if (src_mtime) {
+ *src_mtime = set_mtime;
+ }
+
+ if (petag) {
+ map<string, bufferlist>::iterator iter = src_attrs.find(RGW_ATTR_ETAG);
+ if (iter != src_attrs.end()) {
+ bufferlist& etagbl = iter->second;
+ *petag = etagbl.to_str();
+ while (petag->size() > 0 && (*petag)[petag->size() - 1] == '\0') {
+ *petag = petag->substr(0, petag->size() - 1);
+ }
+ }
+ }
+
+ if (pattrs) {
+ *pattrs = std::move(src_attrs);
+ }
+
+ return 0;
+}
+
+int RGWRados::fetch_remote_obj(RGWObjectCtx& obj_ctx,
+ const rgw_user& user_id,
+ req_info *info,
+ const string& source_zone,
+ const rgw_obj& dest_obj,
+ const rgw_obj& src_obj,
+ RGWBucketInfo& dest_bucket_info,
+ RGWBucketInfo& src_bucket_info,
+ std::optional<rgw_placement_rule> dest_placement_rule,
+ real_time *src_mtime,
+ real_time *mtime,
+ const real_time *mod_ptr,
+ const real_time *unmod_ptr,
+ bool high_precision_time,
+ const char *if_match,
+ const char *if_nomatch,
+ AttrsMod attrs_mod,
+ bool copy_if_newer,
+ map<string, bufferlist>& attrs,
+ RGWObjCategory category,
+ std::optional<uint64_t> olh_epoch,
+ real_time delete_at,
+ string *ptag,
+ string *petag,
+ void (*progress_cb)(off_t, void *),
+ void *progress_data,
+ rgw_zone_set *zones_trace,
+ std::optional<uint64_t>* bytes_transferred)
+{
+ /* source is in a different zonegroup, copy from there */
+
+ RGWRESTStreamRWRequest *in_stream_req;
+ string tag;
+ int i;
+ append_rand_alpha(cct, tag, tag, 32);
+ obj_time_weight set_mtime_weight;
+ set_mtime_weight.high_precision = high_precision_time;
+ int ret;
+
+ rgw::AioThrottle aio(cct->_conf->rgw_put_obj_min_window_size);
+ using namespace rgw::putobj;
+ const rgw_placement_rule *ptail_rule = (dest_placement_rule ? &(*dest_placement_rule) : nullptr);
+ AtomicObjectProcessor processor(&aio, this, dest_bucket_info, ptail_rule, user_id,
+ obj_ctx, dest_obj, olh_epoch, tag);
+ RGWRESTConn *conn;
+ auto& zone_conn_map = svc.zone->get_zone_conn_map();
+ auto& zonegroup_conn_map = svc.zone->get_zonegroup_conn_map();
+ if (source_zone.empty()) {
+ if (dest_bucket_info.zonegroup.empty()) {
+ /* source is in the master zonegroup */
+ conn = svc.zone->get_master_conn();
+ } else {
+ map<string, RGWRESTConn *>::iterator iter = zonegroup_conn_map.find(src_bucket_info.zonegroup);
+ if (iter == zonegroup_conn_map.end()) {
+ ldout(cct, 0) << "could not find zonegroup connection to zonegroup: " << source_zone << dendl;
+ return -ENOENT;
+ }
+ conn = iter->second;
+ }
+ } else {
+ map<string, RGWRESTConn *>::iterator iter = zone_conn_map.find(source_zone);
+ if (iter == zone_conn_map.end()) {
+ ldout(cct, 0) << "could not find zone connection to zone: " << source_zone << dendl;
+ return -ENOENT;
+ }
+ conn = iter->second;
+ }
+
+ string obj_name = dest_obj.bucket.name + "/" + dest_obj.get_oid();
+
+ boost::optional<RGWPutObj_Compress> compressor;
+ CompressorRef plugin;
+
+ rgw_placement_rule dest_rule;
+ RGWRadosPutObj cb(cct, plugin, compressor, &processor, progress_cb, progress_data,
+ [&](const map<string, bufferlist>& obj_attrs) {
+ if (!ptail_rule) {
+ auto iter = obj_attrs.find(RGW_ATTR_STORAGE_CLASS);
+ if (iter != obj_attrs.end()) {
+ dest_rule.storage_class = iter->second.to_str();
+ dest_rule.inherit_from(dest_bucket_info.placement_rule);
+ processor.set_tail_placement(std::move(dest_rule));
+ ptail_rule = &dest_rule;
+ } else {
+ ptail_rule = &dest_bucket_info.placement_rule;
+ }
+ }
+ const auto& compression_type = svc.zone->get_zone_params().get_compression_type(*ptail_rule);
+ if (compression_type != "none") {
+ plugin = Compressor::create(cct, compression_type);
+ if (!plugin) {
+ ldout(cct, 1) << "Cannot load plugin for compression type "
+ << compression_type << dendl;
+ }
+ }
+
+ int ret = processor.prepare();
+ if (ret < 0) {
+ return ret;
+ }
+ return 0;
+ });
+
+ string etag;
+ real_time set_mtime;
+ uint64_t expected_size = 0;
+
+ RGWObjState *dest_state = NULL;
+
+ const real_time *pmod = mod_ptr;
+
+ obj_time_weight dest_mtime_weight;
+
+ if (copy_if_newer) {
+ /* need to get mtime for destination */
+ ret = get_obj_state(&obj_ctx, dest_bucket_info, dest_obj, &dest_state, false);
+ if (ret < 0)
+ goto set_err_state;
+
+ if (!real_clock::is_zero(dest_state->mtime)) {
+ dest_mtime_weight.init(dest_state);
+ pmod = &dest_mtime_weight.mtime;
+ }
+ }
+
+ static constexpr bool prepend_meta = true;
+ static constexpr bool get_op = true;
+ static constexpr bool rgwx_stat = false;
+ static constexpr bool sync_manifest = true;
+ static constexpr bool skip_decrypt = true;
+ ret = conn->get_obj(user_id, info, src_obj, pmod, unmod_ptr,
+ dest_mtime_weight.zone_short_id, dest_mtime_weight.pg_ver,
+ prepend_meta, get_op, rgwx_stat,
+ sync_manifest, skip_decrypt,
+ true,
+ &cb, &in_stream_req);
+ if (ret < 0) {
+ goto set_err_state;
+ }
+
+ ret = conn->complete_request(in_stream_req, &etag, &set_mtime,
+ &expected_size, nullptr, nullptr);
+ if (ret < 0) {
+ goto set_err_state;
+ }
+ ret = cb.flush();
+ if (ret < 0) {
+ goto set_err_state;
+ }
+ if (cb.get_data_len() != expected_size) {
+ ret = -EIO;
+ ldout(cct, 0) << "ERROR: object truncated during fetching, expected "
+ << expected_size << " bytes but received " << cb.get_data_len() << dendl;
+ goto set_err_state;
+ }
+ if (compressor && compressor->is_compressed()) {
+ bufferlist tmp;
+ RGWCompressionInfo cs_info;
+ cs_info.compression_type = plugin->get_type_name();
+ cs_info.orig_size = cb.get_data_len();
+ cs_info.blocks = move(compressor->get_compression_blocks());
+ encode(cs_info, tmp);
+ cb.get_attrs()[RGW_ATTR_COMPRESSION] = tmp;
+ }
+
+ if (source_zone.empty()) { /* need to preserve expiration if copy in the same zonegroup */
+ cb.get_attrs().erase(RGW_ATTR_DELETE_AT);
+ } else {
+ map<string, bufferlist>::iterator iter = cb.get_attrs().find(RGW_ATTR_DELETE_AT);
+ if (iter != cb.get_attrs().end()) {
+ try {
+ decode(delete_at, iter->second);
+ } catch (buffer::error& err) {
+ ldout(cct, 0) << "ERROR: failed to decode delete_at field in intra zone copy" << dendl;
+ }
+ }
+ }
+
+ if (src_mtime) {
+ *src_mtime = set_mtime;
+ }
+
+ if (petag) {
+ const auto iter = cb.get_attrs().find(RGW_ATTR_ETAG);
+ if (iter != cb.get_attrs().end()) {
+ *petag = iter->second.to_str();
+ }
+ }
+
+ //erase the append attr
+ cb.get_attrs().erase(RGW_ATTR_APPEND_PART_NUM);
+
+ if (source_zone.empty()) {
+ set_copy_attrs(cb.get_attrs(), attrs, attrs_mod);
+ } else {
+ attrs = cb.get_attrs();
+ }
+
+ if (copy_if_newer) {
+ uint64_t pg_ver = 0;
+ auto i = attrs.find(RGW_ATTR_PG_VER);
+ if (i != attrs.end() && i->second.length() > 0) {
+ auto iter = i->second.cbegin();
+ try {
+ decode(pg_ver, iter);
+ } catch (buffer::error& err) {
+ ldout(ctx(), 0) << "ERROR: failed to decode pg ver attribute, ignoring" << dendl;
+ /* non critical error */
+ }
+ }
+ set_mtime_weight.init(set_mtime, svc.zone->get_zone_short_id(), pg_ver);
+ }
+
+ /* Perform ETag verification is we have computed the object's MD5 sum at our end */
+ if (const auto& verifier_etag = cb.get_verifier_etag();
+ !verifier_etag.empty()) {
+ string trimmed_etag = etag;
+
+ /* Remove the leading and trailing double quotes from etag */
+ trimmed_etag.erase(std::remove(trimmed_etag.begin(), trimmed_etag.end(),'\"'),
+ trimmed_etag.end());
+
+ if (verifier_etag != trimmed_etag) {
+ ret = -EIO;
+ ldout(cct, 0) << "ERROR: source and destination objects don't match. Expected etag:"
+ << trimmed_etag << " Computed etag:" << verifier_etag << dendl;
+ goto set_err_state;
+ }
+ }
+
+#define MAX_COMPLETE_RETRY 100
+ for (i = 0; i < MAX_COMPLETE_RETRY; i++) {
+ bool canceled = false;
+ ret = processor.complete(cb.get_data_len(), etag, mtime, set_mtime,
+ attrs, delete_at, nullptr, nullptr, nullptr,
+ zones_trace, &canceled);
+ if (ret < 0) {
+ goto set_err_state;
+ }
+
+ if (copy_if_newer && canceled) {
+ ldout(cct, 20) << "raced with another write of obj: " << dest_obj << dendl;
+ obj_ctx.invalidate(dest_obj); /* object was overwritten */
+ ret = get_obj_state(&obj_ctx, dest_bucket_info, dest_obj, &dest_state, false);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: " << __func__ << ": get_err_state() returned ret=" << ret << dendl;
+ goto set_err_state;
+ }
+ dest_mtime_weight.init(dest_state);
+ dest_mtime_weight.high_precision = high_precision_time;
+ if (!dest_state->exists ||
+ dest_mtime_weight < set_mtime_weight) {
+ ldout(cct, 20) << "retrying writing object mtime=" << set_mtime << " dest_state->mtime=" << dest_state->mtime << " dest_state->exists=" << dest_state->exists << dendl;
+ continue;
+ } else {
+ ldout(cct, 20) << "not retrying writing object mtime=" << set_mtime << " dest_state->mtime=" << dest_state->mtime << " dest_state->exists=" << dest_state->exists << dendl;
+ }
+ }
+ break;
+ }
+
+ if (i == MAX_COMPLETE_RETRY) {
+ ldout(cct, 0) << "ERROR: retried object completion too many times, something is wrong!" << dendl;
+ ret = -EIO;
+ goto set_err_state;
+ }
+
+ if (bytes_transferred) {
+ *bytes_transferred = cb.get_data_len();
+ }
+ return 0;
+set_err_state:
+ if (copy_if_newer && ret == -ERR_NOT_MODIFIED) {
+ // we may have already fetched during sync of OP_ADD, but were waiting
+ // for OP_LINK_OLH to call set_olh() with a real olh_epoch
+ if (olh_epoch && *olh_epoch > 0) {
+ constexpr bool log_data_change = true;
+ ret = set_olh(obj_ctx, dest_bucket_info, dest_obj, false, nullptr,
+ *olh_epoch, real_time(), false, zones_trace, log_data_change);
+ } else {
+ // we already have the latest copy
+ ret = 0;
+ }
+ }
+ return ret;
+}
+
+
+int RGWRados::copy_obj_to_remote_dest(RGWObjState *astate,
+ map<string, bufferlist>& src_attrs,
+ RGWRados::Object::Read& read_op,
+ const rgw_user& user_id,
+ rgw_obj& dest_obj,
+ real_time *mtime)
+{
+ string etag;
+
+ RGWRESTStreamS3PutObj *out_stream_req;
+
+ auto rest_master_conn = svc.zone->get_master_conn();
+
+ int ret = rest_master_conn->put_obj_async(user_id, dest_obj, astate->size, src_attrs, true, &out_stream_req);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = read_op.iterate(0, astate->size - 1, out_stream_req->get_out_cb());
+ if (ret < 0) {
+ delete out_stream_req;
+ return ret;
+ }
+
+ ret = rest_master_conn->complete_request(out_stream_req, etag, mtime);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+/**
+ * Copy an object.
+ * dest_obj: the object to copy into
+ * src_obj: the object to copy from
+ * attrs: usage depends on attrs_mod parameter
+ * attrs_mod: the modification mode of the attrs, may have the following values:
+ * ATTRSMOD_NONE - the attributes of the source object will be
+ * copied without modifications, attrs parameter is ignored;
+ * ATTRSMOD_REPLACE - new object will have the attributes provided by attrs
+ * parameter, source object attributes are not copied;
+ * ATTRSMOD_MERGE - any conflicting meta keys on the source object's attributes
+ * are overwritten by values contained in attrs parameter.
+ * err: stores any errors resulting from the get of the original object
+ * Returns: 0 on success, -ERR# otherwise.
+ */
+int RGWRados::copy_obj(RGWObjectCtx& obj_ctx,
+ const rgw_user& user_id,
+ req_info *info,
+ const string& source_zone,
+ rgw_obj& dest_obj,
+ rgw_obj& src_obj,
+ RGWBucketInfo& dest_bucket_info,
+ RGWBucketInfo& src_bucket_info,
+ const rgw_placement_rule& dest_placement,
+ real_time *src_mtime,
+ real_time *mtime,
+ const real_time *mod_ptr,
+ const real_time *unmod_ptr,
+ bool high_precision_time,
+ const char *if_match,
+ const char *if_nomatch,
+ AttrsMod attrs_mod,
+ bool copy_if_newer,
+ map<string, bufferlist>& attrs,
+ RGWObjCategory category,
+ uint64_t olh_epoch,
+ real_time delete_at,
+ string *version_id,
+ string *ptag,
+ string *petag,
+ void (*progress_cb)(off_t, void *),
+ void *progress_data)
+{
+ int ret;
+ uint64_t obj_size;
+ rgw_obj shadow_obj = dest_obj;
+ string shadow_oid;
+
+ bool remote_src;
+ bool remote_dest;
+
+ append_rand_alpha(cct, dest_obj.get_oid(), shadow_oid, 32);
+ shadow_obj.init_ns(dest_obj.bucket, shadow_oid, shadow_ns);
+
+ auto& zonegroup = svc.zone->get_zonegroup();
+
+ remote_dest = !zonegroup.equals(dest_bucket_info.zonegroup);
+ remote_src = !zonegroup.equals(src_bucket_info.zonegroup);
+
+ if (remote_src && remote_dest) {
+ ldout(cct, 0) << "ERROR: can't copy object when both src and dest buckets are remote" << dendl;
+ return -EINVAL;
+ }
+
+ ldout(cct, 5) << "Copy object " << src_obj.bucket << ":" << src_obj.get_oid() << " => " << dest_obj.bucket << ":" << dest_obj.get_oid() << dendl;
+
+ if (remote_src || !source_zone.empty()) {
+ return fetch_remote_obj(obj_ctx, user_id, info, source_zone,
+ dest_obj, src_obj, dest_bucket_info, src_bucket_info,
+ dest_placement, src_mtime, mtime, mod_ptr,
+ unmod_ptr, high_precision_time,
+ if_match, if_nomatch, attrs_mod, copy_if_newer, attrs, category,
+ olh_epoch, delete_at, ptag, petag, progress_cb, progress_data);
+ }
+
+ map<string, bufferlist> src_attrs;
+ RGWRados::Object src_op_target(this, src_bucket_info, obj_ctx, src_obj);
+ RGWRados::Object::Read read_op(&src_op_target);
+
+ read_op.conds.mod_ptr = mod_ptr;
+ read_op.conds.unmod_ptr = unmod_ptr;
+ read_op.conds.high_precision_time = high_precision_time;
+ read_op.conds.if_match = if_match;
+ read_op.conds.if_nomatch = if_nomatch;
+ read_op.params.attrs = &src_attrs;
+ read_op.params.lastmod = src_mtime;
+ read_op.params.obj_size = &obj_size;
+
+ ret = read_op.prepare();
+ if (ret < 0) {
+ return ret;
+ }
+ if (src_attrs.count(RGW_ATTR_CRYPT_MODE)) {
+ // Current implementation does not follow S3 spec and even
+ // may result in data corruption silently when copying
+ // multipart objects acorss pools. So reject COPY operations
+ //on encrypted objects before it is fully functional.
+ ldout(cct, 0) << "ERROR: copy op for encrypted object " << src_obj
+ << " has not been implemented." << dendl;
+ return -ERR_NOT_IMPLEMENTED;
+ }
+
+ src_attrs[RGW_ATTR_ACL] = attrs[RGW_ATTR_ACL];
+ src_attrs.erase(RGW_ATTR_DELETE_AT);
+
+ set_copy_attrs(src_attrs, attrs, attrs_mod);
+ attrs.erase(RGW_ATTR_ID_TAG);
+ attrs.erase(RGW_ATTR_PG_VER);
+ attrs.erase(RGW_ATTR_SOURCE_ZONE);
+ map<string, bufferlist>::iterator cmp = src_attrs.find(RGW_ATTR_COMPRESSION);
+ if (cmp != src_attrs.end())
+ attrs[RGW_ATTR_COMPRESSION] = cmp->second;
+
+ RGWObjManifest manifest;
+ RGWObjState *astate = NULL;
+
+ ret = get_obj_state(&obj_ctx, src_bucket_info, src_obj, &astate);
+ if (ret < 0) {
+ return ret;
+ }
+
+ vector<rgw_raw_obj> ref_objs;
+
+ if (remote_dest) {
+ /* dest is in a different zonegroup, copy it there */
+ return copy_obj_to_remote_dest(astate, attrs, read_op, user_id, dest_obj, mtime);
+ }
+ uint64_t max_chunk_size;
+
+ ret = get_max_chunk_size(dest_bucket_info.placement_rule, dest_obj, &max_chunk_size);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: failed to get max_chunk_size() for bucket " << dest_obj.bucket << dendl;
+ return ret;
+ }
+
+ rgw_pool src_pool;
+ rgw_pool dest_pool;
+
+ const rgw_placement_rule *src_rule{nullptr};
+
+ if (astate->has_manifest) {
+ src_rule = &astate->manifest.get_tail_placement().placement_rule;
+ ldout(cct, 20) << __func__ << "(): manifest src_rule=" << src_rule->to_str() << dendl;
+ }
+
+ if (!src_rule || src_rule->empty()) {
+ src_rule = &src_bucket_info.placement_rule;
+ }
+
+ if (!get_obj_data_pool(*src_rule, src_obj, &src_pool)) {
+ ldout(cct, 0) << "ERROR: failed to locate data pool for " << src_obj << dendl;
+ return -EIO;
+ }
+
+ if (!get_obj_data_pool(dest_placement, dest_obj, &dest_pool)) {
+ ldout(cct, 0) << "ERROR: failed to locate data pool for " << dest_obj << dendl;
+ return -EIO;
+ }
+
+ ldout(cct, 20) << __func__ << "(): src_rule=" << src_rule->to_str() << " src_pool=" << src_pool
+ << " dest_rule=" << dest_placement.to_str() << " dest_pool=" << dest_pool << dendl;
+
+ bool copy_data = !astate->has_manifest ||
+ (*src_rule != dest_placement) ||
+ (src_pool != dest_pool);
+
+ bool copy_first = false;
+ if (astate->has_manifest) {
+ if (!astate->manifest.has_tail()) {
+ copy_data = true;
+ } else {
+ uint64_t head_size = astate->manifest.get_head_size();
+
+ if (head_size > 0) {
+ if (head_size > max_chunk_size) {
+ copy_data = true;
+ } else {
+ copy_first = true;
+ }
+ }
+ }
+ }
+
+ if (petag) {
+ const auto iter = attrs.find(RGW_ATTR_ETAG);
+ if (iter != attrs.end()) {
+ *petag = iter->second.to_str();
+ }
+ }
+
+ if (copy_data) { /* refcounting tail wouldn't work here, just copy the data */
+ attrs.erase(RGW_ATTR_TAIL_TAG);
+ return copy_obj_data(obj_ctx, dest_bucket_info, dest_placement, read_op, obj_size - 1, dest_obj,
+ mtime, real_time(), attrs, olh_epoch, delete_at, petag);
+ }
+
+ RGWObjManifest::obj_iterator miter = astate->manifest.obj_begin();
+
+ if (copy_first) { // we need to copy first chunk, not increase refcount
+ ++miter;
+ }
+
+ rgw_rados_ref ref;
+ ret = get_raw_obj_ref(miter.get_location().get_raw_obj(this), &ref);
+ if (ret < 0) {
+ return ret;
+ }
+
+ bufferlist first_chunk;
+
+ bool copy_itself = (dest_obj == src_obj);
+ RGWObjManifest *pmanifest;
+ ldout(cct, 20) << "dest_obj=" << dest_obj << " src_obj=" << src_obj << " copy_itself=" << (int)copy_itself << dendl;
+
+ RGWRados::Object dest_op_target(this, dest_bucket_info, obj_ctx, dest_obj);
+ RGWRados::Object::Write write_op(&dest_op_target);
+
+ string tag;
+
+ if (ptag) {
+ tag = *ptag;
+ }
+
+ if (tag.empty()) {
+ append_rand_alpha(cct, tag, tag, 32);
+ }
+
+ if (!copy_itself) {
+ attrs.erase(RGW_ATTR_TAIL_TAG);
+ manifest = astate->manifest;
+ const rgw_bucket_placement& tail_placement = manifest.get_tail_placement();
+ if (tail_placement.bucket.name.empty()) {
+ manifest.set_tail_placement(tail_placement.placement_rule, src_obj.bucket);
+ }
+ string ref_tag;
+ for (; miter != astate->manifest.obj_end(); ++miter) {
+ ObjectWriteOperation op;
+ ref_tag = tag + '\0';
+ cls_refcount_get(op, ref_tag, true);
+ const rgw_raw_obj& loc = miter.get_location().get_raw_obj(this);
+ ref.ioctx.locator_set_key(loc.loc);
+
+ ret = ref.ioctx.operate(loc.oid, &op);
+ if (ret < 0) {
+ goto done_ret;
+ }
+
+ ref_objs.push_back(loc);
+ }
+
+ pmanifest = &manifest;
+ } else {
+ pmanifest = &astate->manifest;
+ /* don't send the object's tail for garbage collection */
+ astate->keep_tail = true;
+ }
+
+ if (copy_first) {
+ ret = read_op.read(0, max_chunk_size, first_chunk);
+ if (ret < 0) {
+ goto done_ret;
+ }
+
+ pmanifest->set_head(dest_bucket_info.placement_rule, dest_obj, first_chunk.length());
+ } else {
+ pmanifest->set_head(dest_bucket_info.placement_rule, dest_obj, 0);
+ }
+
+ write_op.meta.data = &first_chunk;
+ write_op.meta.manifest = pmanifest;
+ write_op.meta.ptag = &tag;
+ write_op.meta.owner = dest_bucket_info.owner;
+ write_op.meta.mtime = mtime;
+ write_op.meta.flags = PUT_OBJ_CREATE;
+ write_op.meta.category = category;
+ write_op.meta.olh_epoch = olh_epoch;
+ write_op.meta.delete_at = delete_at;
+ write_op.meta.modify_tail = !copy_itself;
+
+ ret = write_op.write_meta(obj_size, astate->accounted_size, attrs);
+ if (ret < 0) {
+ goto done_ret;
+ }
+
+ return 0;
+
+done_ret:
+ if (!copy_itself) {
+ vector<rgw_raw_obj>::iterator riter;
+
+ /* rollback reference */
+ string ref_tag = tag + '\0';
+ for (riter = ref_objs.begin(); riter != ref_objs.end(); ++riter) {
+ ObjectWriteOperation op;
+ cls_refcount_put(op, ref_tag, true);
+
+ ref.ioctx.locator_set_key(riter->loc);
+
+ int r = ref.ioctx.operate(riter->oid, &op);
+ if (r < 0) {
+ ldout(cct, 0) << "ERROR: cleanup after error failed to drop reference on obj=" << *riter << dendl;
+ }
+ }
+ }
+ return ret;
+}
+
+
+int RGWRados::copy_obj_data(RGWObjectCtx& obj_ctx,
+ RGWBucketInfo& dest_bucket_info,
+ const rgw_placement_rule& dest_placement,
+ RGWRados::Object::Read& read_op, off_t end,
+ const rgw_obj& dest_obj,
+ real_time *mtime,
+ real_time set_mtime,
+ map<string, bufferlist>& attrs,
+ uint64_t olh_epoch,
+ real_time delete_at,
+ string *petag)
+{
+ string tag;
+ append_rand_alpha(cct, tag, tag, 32);
+
+ rgw::AioThrottle aio(cct->_conf->rgw_put_obj_min_window_size);
+ using namespace rgw::putobj;
+ AtomicObjectProcessor processor(&aio, this, dest_bucket_info, &dest_placement,
+ dest_bucket_info.owner, obj_ctx,
+ dest_obj, olh_epoch, tag);
+ int ret = processor.prepare();
+ if (ret < 0)
+ return ret;
+
+ off_t ofs = 0;
+
+ do {
+ bufferlist bl;
+ ret = read_op.read(ofs, end, bl);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: fail to read object data, ret = " << ret << dendl;
+ return ret;
+ }
+
+ uint64_t read_len = ret;
+ ret = processor.process(std::move(bl), ofs);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ofs += read_len;
+ } while (ofs <= end);
+
+ // flush
+ ret = processor.process({}, ofs);
+ if (ret < 0) {
+ return ret;
+ }
+
+ string etag;
+ auto iter = attrs.find(RGW_ATTR_ETAG);
+ if (iter != attrs.end()) {
+ bufferlist& bl = iter->second;
+ etag = bl.to_str();
+ if (petag) {
+ *petag = etag;
+ }
+ }
+
+ uint64_t accounted_size;
+ {
+ bool compressed{false};
+ RGWCompressionInfo cs_info;
+ ret = rgw_compression_info_from_attrset(attrs, compressed, cs_info);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: failed to read compression info" << dendl;
+ return ret;
+ }
+ // pass original size if compressed
+ accounted_size = compressed ? cs_info.orig_size : ofs;
+ }
+
+ return processor.complete(accounted_size, etag, mtime, set_mtime, attrs, delete_at,
+ nullptr, nullptr, nullptr, nullptr, nullptr);
+}
+
+int RGWRados::transition_obj(RGWObjectCtx& obj_ctx,
+ RGWBucketInfo& bucket_info,
+ rgw_obj& obj,
+ const rgw_placement_rule& placement_rule,
+ const real_time& mtime,
+ uint64_t olh_epoch)
+{
+ map<string, bufferlist> attrs;
+ real_time read_mtime;
+ uint64_t obj_size;
+
+ RGWRados::Object op_target(this, bucket_info, obj_ctx, obj);
+ RGWRados::Object::Read read_op(&op_target);
+
+ read_op.params.attrs = &attrs;
+ read_op.params.lastmod = &read_mtime;
+ read_op.params.obj_size = &obj_size;
+
+ int ret = read_op.prepare();
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (read_mtime != mtime) {
+ /* raced */
+ return -ECANCELED;
+ }
+
+ ret = copy_obj_data(obj_ctx,
+ bucket_info,
+ placement_rule,
+ read_op,
+ obj_size - 1,
+ obj,
+ nullptr /* pmtime */,
+ mtime,
+ attrs,
+ olh_epoch,
+ real_time(),
+ nullptr /* petag */);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWRados::check_bucket_empty(RGWBucketInfo& bucket_info)
+{
+ constexpr uint NUM_ENTRIES = 1000u;
+
+ rgw_obj_index_key marker;
+ string prefix;
+ bool is_truncated;
+
+ do {
+ std::vector<rgw_bucket_dir_entry> ent_list;
+ ent_list.reserve(NUM_ENTRIES);
+
+ int r = cls_bucket_list_unordered(bucket_info,
+ RGW_NO_SHARD,
+ marker,
+ prefix,
+ NUM_ENTRIES,
+ true,
+ ent_list,
+ &is_truncated,
+ &marker);
+ if (r < 0) {
+ return r;
+ }
+
+ string ns;
+ for (auto const& dirent : ent_list) {
+ rgw_obj_key obj;
+
+ if (rgw_obj_key::oid_to_key_in_ns(dirent.key.name, &obj, ns)) {
+ return -ENOTEMPTY;
+ }
+ }
+ } while (is_truncated);
+
+ return 0;
+}
+
+/**
+ * Delete a bucket.
+ * bucket: the name of the bucket to delete
+ * Returns 0 on success, -ERR# otherwise.
+ */
+int RGWRados::delete_bucket(RGWBucketInfo& bucket_info, RGWObjVersionTracker& objv_tracker, bool check_empty)
+{
+ const rgw_bucket& bucket = bucket_info.bucket;
+ librados::IoCtx index_ctx;
+ map<int, string> bucket_objs;
+ int r = open_bucket_index(bucket_info, index_ctx, bucket_objs);
+ if (r < 0)
+ return r;
+
+ if (check_empty) {
+ r = check_bucket_empty(bucket_info);
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ r = rgw_bucket_delete_bucket_obj(this, bucket.tenant, bucket.name, objv_tracker);
+ if (r < 0)
+ return r;
+
+ /* if the bucket is not synced we can remove the meta file */
+ if (!svc.zone->is_syncing_bucket_meta(bucket)) {
+ RGWObjVersionTracker objv_tracker;
+ r = rgw_bucket_instance_remove_entry(this, bucket.get_key(), &objv_tracker);
+ if (r < 0) {
+ return r;
+ }
+
+ /* remove bucket index objects asynchronously by best effort */
+ (void) CLSRGWIssueBucketIndexClean(index_ctx,
+ bucket_objs,
+ cct->_conf->rgw_bucket_index_max_aio)();
+ }
+
+ return 0;
+}
+
+int RGWRados::set_bucket_owner(rgw_bucket& bucket, ACLOwner& owner)
+{
+ RGWBucketInfo info;
+ map<string, bufferlist> attrs;
+ auto obj_ctx = svc.sysobj->init_obj_ctx();
+ int r;
+ if (bucket.bucket_id.empty()) {
+ r = get_bucket_info(obj_ctx, bucket.tenant, bucket.name, info, NULL, &attrs);
+ } else {
+ r = get_bucket_instance_info(obj_ctx, bucket, info, nullptr, &attrs);
+ }
+ if (r < 0) {
+ ldout(cct, 0) << "NOTICE: get_bucket_info on bucket=" << bucket.name << " returned err=" << r << dendl;
+ return r;
+ }
+
+ info.owner = owner.get_id();
+
+ r = put_bucket_instance_info(info, false, real_time(), &attrs);
+ if (r < 0) {
+ ldout(cct, 0) << "NOTICE: put_bucket_info on bucket=" << bucket.name << " returned err=" << r << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+
+int RGWRados::set_buckets_enabled(vector<rgw_bucket>& buckets, bool enabled)
+{
+ int ret = 0;
+
+ vector<rgw_bucket>::iterator iter;
+
+ for (iter = buckets.begin(); iter != buckets.end(); ++iter) {
+ rgw_bucket& bucket = *iter;
+ if (enabled)
+ ldout(cct, 20) << "enabling bucket name=" << bucket.name << dendl;
+ else
+ ldout(cct, 20) << "disabling bucket name=" << bucket.name << dendl;
+
+ RGWBucketInfo info;
+ map<string, bufferlist> attrs;
+ auto obj_ctx = svc.sysobj->init_obj_ctx();
+ int r = get_bucket_info(obj_ctx, bucket.tenant, bucket.name, info, NULL, &attrs);
+ if (r < 0) {
+ ldout(cct, 0) << "NOTICE: get_bucket_info on bucket=" << bucket.name << " returned err=" << r << ", skipping bucket" << dendl;
+ ret = r;
+ continue;
+ }
+ if (enabled) {
+ info.flags &= ~BUCKET_SUSPENDED;
+ } else {
+ info.flags |= BUCKET_SUSPENDED;
+ }
+
+ r = put_bucket_instance_info(info, false, real_time(), &attrs);
+ if (r < 0) {
+ ldout(cct, 0) << "NOTICE: put_bucket_info on bucket=" << bucket.name << " returned err=" << r << ", skipping bucket" << dendl;
+ ret = r;
+ continue;
+ }
+ }
+ return ret;
+}
+
+int RGWRados::bucket_suspended(rgw_bucket& bucket, bool *suspended)
+{
+ RGWBucketInfo bucket_info;
+ auto obj_ctx = svc.sysobj->init_obj_ctx();
+ int ret = get_bucket_info(obj_ctx, bucket.tenant, bucket.name, bucket_info, NULL);
+ if (ret < 0) {
+ return ret;
+ }
+
+ *suspended = ((bucket_info.flags & BUCKET_SUSPENDED) != 0);
+ return 0;
+}
+
+int RGWRados::Object::complete_atomic_modification()
+{
+ if (!state->has_manifest || state->keep_tail)
+ return 0;
+
+ cls_rgw_obj_chain chain;
+ store->update_gc_chain(obj, state->manifest, &chain);
+
+ if (chain.empty()) {
+ return 0;
+ }
+
+ string tag = (state->tail_tag.length() > 0 ? state->tail_tag.to_str() : state->obj_tag.to_str());
+ return store->gc->send_chain(chain, tag, false); // do it async
+}
+
+void RGWRados::update_gc_chain(rgw_obj& head_obj, RGWObjManifest& manifest, cls_rgw_obj_chain *chain)
+{
+ RGWObjManifest::obj_iterator iter;
+ rgw_raw_obj raw_head;
+ obj_to_raw(manifest.get_head_placement_rule(), head_obj, &raw_head);
+ for (iter = manifest.obj_begin(); iter != manifest.obj_end(); ++iter) {
+ const rgw_raw_obj& mobj = iter.get_location().get_raw_obj(this);
+ if (mobj == raw_head)
+ continue;
+ cls_rgw_obj_key key(mobj.oid);
+ chain->push_obj(mobj.pool.to_str(), key, mobj.loc);
+ }
+}
+
+int RGWRados::send_chain_to_gc(cls_rgw_obj_chain& chain, const string& tag, bool sync)
+{
+ return gc->send_chain(chain, tag, sync);
+}
+
+int RGWRados::open_bucket_index(const RGWBucketInfo& bucket_info,
+ librados::IoCtx& index_ctx,
+ string& bucket_oid)
+{
+ const rgw_bucket& bucket = bucket_info.bucket;
+ int r = open_bucket_index_ctx(bucket_info, index_ctx);
+ if (r < 0)
+ return r;
+
+ if (bucket.bucket_id.empty()) {
+ ldout(cct, 0) << "ERROR: empty bucket id for bucket operation" << dendl;
+ return -EIO;
+ }
+
+ bucket_oid = dir_oid_prefix;
+ bucket_oid.append(bucket.bucket_id);
+
+ return 0;
+}
+
+int RGWRados::open_bucket_index_base(const RGWBucketInfo& bucket_info,
+ librados::IoCtx& index_ctx,
+ string& bucket_oid_base) {
+ const rgw_bucket& bucket = bucket_info.bucket;
+ int r = open_bucket_index_ctx(bucket_info, index_ctx);
+ if (r < 0)
+ return r;
+
+ if (bucket.bucket_id.empty()) {
+ ldout(cct, 0) << "ERROR: empty bucket_id for bucket operation" << dendl;
+ return -EIO;
+ }
+
+ bucket_oid_base = dir_oid_prefix;
+ bucket_oid_base.append(bucket.bucket_id);
+
+ return 0;
+
+}
+
+int RGWRados::open_bucket_index(const RGWBucketInfo& bucket_info,
+ librados::IoCtx& index_ctx,
+ map<int, string>& bucket_objs,
+ int shard_id,
+ map<int, string> *bucket_instance_ids) {
+ string bucket_oid_base;
+ int ret = open_bucket_index_base(bucket_info, index_ctx, bucket_oid_base);
+ if (ret < 0) {
+ return ret;
+ }
+
+ get_bucket_index_objects(bucket_oid_base, bucket_info.num_shards, bucket_objs, shard_id);
+ if (bucket_instance_ids) {
+ get_bucket_instance_ids(bucket_info, shard_id, bucket_instance_ids);
+ }
+ return 0;
+}
+
+template<typename T>
+int RGWRados::open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
+ map<int, string>& oids, map<int, T>& bucket_objs,
+ int shard_id, map<int, string> *bucket_instance_ids)
+{
+ int ret = open_bucket_index(bucket_info, index_ctx, oids, shard_id, bucket_instance_ids);
+ if (ret < 0)
+ return ret;
+
+ map<int, string>::const_iterator iter = oids.begin();
+ for (; iter != oids.end(); ++iter) {
+ bucket_objs[iter->first] = T();
+ }
+ return 0;
+}
+
+int RGWRados::open_bucket_index_shard(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
+ const string& obj_key, string *bucket_obj, int *shard_id)
+{
+ string bucket_oid_base;
+ int ret = open_bucket_index_base(bucket_info, index_ctx, bucket_oid_base);
+ if (ret < 0)
+ return ret;
+
+ RGWObjectCtx obj_ctx(this);
+
+ ret = get_bucket_index_object(bucket_oid_base, obj_key, bucket_info.num_shards,
+ (RGWBucketInfo::BIShardsHashType)bucket_info.bucket_index_shard_hash_type, bucket_obj, shard_id);
+ if (ret < 0) {
+ ldout(cct, 10) << "get_bucket_index_object() returned ret=" << ret << dendl;
+ return ret;
+ }
+ return 0;
+}
+
+int RGWRados::open_bucket_index_shard(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
+ int shard_id, string *bucket_obj)
+{
+ string bucket_oid_base;
+ int ret = open_bucket_index_base(bucket_info, index_ctx, bucket_oid_base);
+ if (ret < 0)
+ return ret;
+
+ RGWObjectCtx obj_ctx(this);
+
+ get_bucket_index_object(bucket_oid_base, bucket_info.num_shards,
+ shard_id, bucket_obj);
+ return 0;
+}
+
+static void accumulate_raw_stats(const rgw_bucket_dir_header& header,
+ map<RGWObjCategory, RGWStorageStats>& stats)
+{
+ for (const auto& pair : header.stats) {
+ const RGWObjCategory category = static_cast<RGWObjCategory>(pair.first);
+ const rgw_bucket_category_stats& header_stats = pair.second;
+
+ RGWStorageStats& s = stats[category];
+
+ s.category = category;
+ s.size += header_stats.total_size;
+ s.size_rounded += header_stats.total_size_rounded;
+ s.size_utilized += header_stats.actual_size;
+ s.num_objects += header_stats.num_entries;
+ }
+}
+
+int RGWRados::bucket_check_index(RGWBucketInfo& bucket_info,
+ map<RGWObjCategory, RGWStorageStats> *existing_stats,
+ map<RGWObjCategory, RGWStorageStats> *calculated_stats)
+{
+ librados::IoCtx index_ctx;
+ // key - bucket index object id
+ // value - bucket index check OP returned result with the given bucket index object (shard)
+ map<int, string> oids;
+ map<int, struct rgw_cls_check_index_ret> bucket_objs_ret;
+
+ int ret = open_bucket_index(bucket_info, index_ctx, oids, bucket_objs_ret);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = CLSRGWIssueBucketCheck(index_ctx, oids, bucket_objs_ret, cct->_conf->rgw_bucket_index_max_aio)();
+ if (ret < 0) {
+ return ret;
+ }
+
+ // Aggregate results (from different shards if there is any)
+ map<int, struct rgw_cls_check_index_ret>::iterator iter;
+ for (iter = bucket_objs_ret.begin(); iter != bucket_objs_ret.end(); ++iter) {
+ accumulate_raw_stats(iter->second.existing_header, *existing_stats);
+ accumulate_raw_stats(iter->second.calculated_header, *calculated_stats);
+ }
+
+ return 0;
+}
+
+int RGWRados::bucket_rebuild_index(RGWBucketInfo& bucket_info)
+{
+ librados::IoCtx index_ctx;
+ map<int, string> bucket_objs;
+
+ int r = open_bucket_index(bucket_info, index_ctx, bucket_objs);
+ if (r < 0) {
+ return r;
+ }
+
+ return CLSRGWIssueBucketRebuild(index_ctx, bucket_objs, cct->_conf->rgw_bucket_index_max_aio)();
+}
+
+int RGWRados::bucket_set_reshard(const RGWBucketInfo& bucket_info, const cls_rgw_bucket_instance_entry& entry)
+{
+ librados::IoCtx index_ctx;
+ map<int, string> bucket_objs;
+
+ int r = open_bucket_index(bucket_info, index_ctx, bucket_objs);
+ if (r < 0) {
+ return r;
+ }
+
+ return CLSRGWIssueSetBucketResharding(index_ctx, bucket_objs, entry, cct->_conf->rgw_bucket_index_max_aio)();
+}
+
+int RGWRados::defer_gc(void *ctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj)
+{
+ RGWObjectCtx *rctx = static_cast<RGWObjectCtx *>(ctx);
+ std::string oid, key;
+ get_obj_bucket_and_oid_loc(obj, oid, key);
+ if (!rctx)
+ return 0;
+
+ RGWObjState *state = NULL;
+
+ int r = get_obj_state(rctx, bucket_info, obj, &state, false);
+ if (r < 0)
+ return r;
+
+ if (!state->is_atomic) {
+ ldout(cct, 20) << "state for obj=" << obj << " is not atomic, not deferring gc operation" << dendl;
+ return -EINVAL;
+ }
+
+ string tag;
+
+ if (state->tail_tag.length() > 0) {
+ tag = state->tail_tag.c_str();
+ } else if (state->obj_tag.length() > 0) {
+ tag = state->obj_tag.c_str();
+ } else {
+ ldout(cct, 20) << "state->obj_tag is empty, not deferring gc operation" << dendl;
+ return -EINVAL;
+ }
+
+ ldout(cct, 0) << "defer chain tag=" << tag << dendl;
+
+ return gc->defer_chain(tag, false);
+}
+
+void RGWRados::remove_rgw_head_obj(ObjectWriteOperation& op)
+{
+ list<string> prefixes;
+ prefixes.push_back(RGW_ATTR_OLH_PREFIX);
+ cls_rgw_remove_obj(op, prefixes);
+}
+
+void RGWRados::cls_obj_check_prefix_exist(ObjectOperation& op, const string& prefix, bool fail_if_exist)
+{
+ cls_rgw_obj_check_attrs_prefix(op, prefix, fail_if_exist);
+}
+
+void RGWRados::cls_obj_check_mtime(ObjectOperation& op, const real_time& mtime, bool high_precision_time, RGWCheckMTimeType type)
+{
+ cls_rgw_obj_check_mtime(op, mtime, high_precision_time, type);
+}
+
+
+/**
+ * Delete an object.
+ * bucket: name of the bucket storing the object
+ * obj: name of the object to delete
+ * Returns: 0 on success, -ERR# otherwise.
+ */
+int RGWRados::Object::Delete::delete_obj()
+{
+ RGWRados *store = target->get_store();
+ rgw_obj& src_obj = target->get_obj();
+ const string& instance = src_obj.key.instance;
+ rgw_obj obj = src_obj;
+
+ if (instance == "null") {
+ obj.key.instance.clear();
+ }
+
+ bool explicit_marker_version = (!params.marker_version_id.empty());
+
+ if (params.versioning_status & BUCKET_VERSIONED || explicit_marker_version) {
+ if (instance.empty() || explicit_marker_version) {
+ rgw_obj marker = obj;
+
+ if (!params.marker_version_id.empty()) {
+ if (params.marker_version_id != "null") {
+ marker.key.set_instance(params.marker_version_id);
+ }
+ } else if ((params.versioning_status & BUCKET_VERSIONS_SUSPENDED) == 0) {
+ store->gen_rand_obj_instance_name(&marker);
+ }
+
+ result.version_id = marker.key.instance;
+ if (result.version_id.empty())
+ result.version_id = "null";
+ result.delete_marker = true;
+
+ struct rgw_bucket_dir_entry_meta meta;
+
+ meta.owner = params.obj_owner.get_id().to_str();
+ meta.owner_display_name = params.obj_owner.get_display_name();
+
+ if (real_clock::is_zero(params.mtime)) {
+ meta.mtime = real_clock::now();
+ } else {
+ meta.mtime = params.mtime;
+ }
+
+ int r = store->set_olh(target->get_ctx(), target->get_bucket_info(), marker, true, &meta, params.olh_epoch, params.unmod_since, params.high_precision_time, params.zones_trace);
+ if (r < 0) {
+ return r;
+ }
+ } else {
+ rgw_bucket_dir_entry dirent;
+
+ int r = store->bi_get_instance(target->get_bucket_info(), obj, &dirent);
+ if (r < 0) {
+ return r;
+ }
+ result.delete_marker = dirent.is_delete_marker();
+ r = store->unlink_obj_instance(target->get_ctx(), target->get_bucket_info(), obj, params.olh_epoch, params.zones_trace);
+ if (r < 0) {
+ return r;
+ }
+ result.version_id = instance;
+ }
+
+ BucketShard *bs;
+ int r = target->get_bucket_shard(&bs);
+ if (r < 0) {
+ ldout(store->ctx(), 5) << "failed to get BucketShard object: r=" << r << dendl;
+ return r;
+ }
+
+ if (target->bucket_info.datasync_flag_enabled()) {
+ r = store->data_log->add_entry(bs->bucket, bs->shard_id);
+ if (r < 0) {
+ lderr(store->ctx()) << "ERROR: failed writing data log" << dendl;
+ return r;
+ }
+ }
+
+ return 0;
+ }
+
+ rgw_rados_ref ref;
+ int r = store->get_obj_head_ref(target->get_bucket_info(), obj, &ref);
+ if (r < 0) {
+ return r;
+ }
+
+ RGWObjState *state;
+ r = target->get_state(&state, false);
+ if (r < 0)
+ return r;
+
+ ObjectWriteOperation op;
+
+ if (!real_clock::is_zero(params.unmod_since)) {
+ struct timespec ctime = ceph::real_clock::to_timespec(state->mtime);
+ struct timespec unmod = ceph::real_clock::to_timespec(params.unmod_since);
+ if (!params.high_precision_time) {
+ ctime.tv_nsec = 0;
+ unmod.tv_nsec = 0;
+ }
+
+ ldout(store->ctx(), 10) << "If-UnModified-Since: " << params.unmod_since << " Last-Modified: " << ctime << dendl;
+ if (ctime > unmod) {
+ return -ERR_PRECONDITION_FAILED;
+ }
+
+ /* only delete object if mtime is less than or equal to params.unmod_since */
+ store->cls_obj_check_mtime(op, params.unmod_since, params.high_precision_time, CLS_RGW_CHECK_TIME_MTIME_LE);
+ }
+ uint64_t obj_accounted_size = state->accounted_size;
+
+ if(params.abortmp) {
+ obj_accounted_size = params.parts_accounted_size;
+ }
+
+ if (!real_clock::is_zero(params.expiration_time)) {
+ bufferlist bl;
+ real_time delete_at;
+
+ if (state->get_attr(RGW_ATTR_DELETE_AT, bl)) {
+ try {
+ auto iter = bl.cbegin();
+ decode(delete_at, iter);
+ } catch (buffer::error& err) {
+ ldout(store->ctx(), 0) << "ERROR: couldn't decode RGW_ATTR_DELETE_AT" << dendl;
+ return -EIO;
+ }
+
+ if (params.expiration_time != delete_at) {
+ return -ERR_PRECONDITION_FAILED;
+ }
+ } else {
+ return -ERR_PRECONDITION_FAILED;
+ }
+ }
+
+ if (!state->exists) {
+ target->invalidate_state();
+ return -ENOENT;
+ }
+
+ r = target->prepare_atomic_modification(op, false, NULL, NULL, NULL, true, false);
+ if (r < 0)
+ return r;
+
+ RGWBucketInfo& bucket_info = target->get_bucket_info();
+
+ RGWRados::Bucket bop(store, bucket_info);
+ RGWRados::Bucket::UpdateIndex index_op(&bop, obj);
+
+ index_op.set_zones_trace(params.zones_trace);
+ index_op.set_bilog_flags(params.bilog_flags);
+
+ r = index_op.prepare(CLS_RGW_OP_DEL, &state->write_tag);
+ if (r < 0)
+ return r;
+
+ store->remove_rgw_head_obj(op);
+ r = ref.ioctx.operate(ref.obj.oid, &op);
+
+ /* raced with another operation, object state is indeterminate */
+ const bool need_invalidate = (r == -ECANCELED);
+
+ int64_t poolid = ref.ioctx.get_id();
+ if (r >= 0) {
+ tombstone_cache_t *obj_tombstone_cache = store->get_tombstone_cache();
+ if (obj_tombstone_cache) {
+ tombstone_entry entry{*state};
+ obj_tombstone_cache->add(obj, entry);
+ }
+ r = index_op.complete_del(poolid, ref.ioctx.get_last_version(), state->mtime, params.remove_objs);
+
+ int ret = target->complete_atomic_modification();
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "ERROR: complete_atomic_modification returned ret=" << ret << dendl;
+ }
+ /* other than that, no need to propagate error */
+ } else {
+ int ret = index_op.cancel();
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "ERROR: index_op.cancel() returned ret=" << ret << dendl;
+ }
+ }
+
+ if (need_invalidate) {
+ target->invalidate_state();
+ }
+
+ if (r < 0)
+ return r;
+
+ /* update quota cache */
+ store->quota_handler->update_stats(params.bucket_owner, obj.bucket, -1, 0, obj_accounted_size);
+
+ return 0;
+}
+
+int RGWRados::delete_obj(RGWObjectCtx& obj_ctx,
+ const RGWBucketInfo& bucket_info,
+ const rgw_obj& obj,
+ int versioning_status,
+ uint16_t bilog_flags,
+ const real_time& expiration_time,
+ rgw_zone_set *zones_trace)
+{
+ RGWRados::Object del_target(this, bucket_info, obj_ctx, obj);
+ RGWRados::Object::Delete del_op(&del_target);
+
+ del_op.params.bucket_owner = bucket_info.owner;
+ del_op.params.versioning_status = versioning_status;
+ del_op.params.bilog_flags = bilog_flags;
+ del_op.params.expiration_time = expiration_time;
+ del_op.params.zones_trace = zones_trace;
+
+ return del_op.delete_obj();
+}
+
+int RGWRados::delete_raw_obj(const rgw_raw_obj& obj)
+{
+ rgw_rados_ref ref;
+ int r = get_raw_obj_ref(obj, &ref);
+ if (r < 0) {
+ return r;
+ }
+
+ ObjectWriteOperation op;
+
+ op.remove();
+ r = ref.ioctx.operate(ref.obj.oid, &op);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int RGWRados::delete_obj_index(const rgw_obj& obj, ceph::real_time mtime)
+{
+ std::string oid, key;
+ get_obj_bucket_and_oid_loc(obj, oid, key);
+
+ auto obj_ctx = svc.sysobj->init_obj_ctx();
+
+ RGWBucketInfo bucket_info;
+ int ret = get_bucket_instance_info(obj_ctx, obj.bucket, bucket_info, NULL, NULL);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: " << __func__ << "() get_bucket_instance_info(bucket=" << obj.bucket << ") returned ret=" << ret << dendl;
+ return ret;
+ }
+
+ RGWRados::Bucket bop(this, bucket_info);
+ RGWRados::Bucket::UpdateIndex index_op(&bop, obj);
+
+ return index_op.complete_del(-1 /* pool */, 0, mtime, NULL);
+}
+
+static void generate_fake_tag(RGWRados *store, map<string, bufferlist>& attrset, RGWObjManifest& manifest, bufferlist& manifest_bl, bufferlist& tag_bl)
+{
+ string tag;
+
+ RGWObjManifest::obj_iterator mi = manifest.obj_begin();
+ if (mi != manifest.obj_end()) {
+ if (manifest.has_tail()) // first object usually points at the head, let's skip to a more unique part
+ ++mi;
+ tag = mi.get_location().get_raw_obj(store).oid;
+ tag.append("_");
+ }
+
+ unsigned char md5[CEPH_CRYPTO_MD5_DIGESTSIZE];
+ char md5_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 1];
+ MD5 hash;
+ hash.Update((const unsigned char *)manifest_bl.c_str(), manifest_bl.length());
+
+ map<string, bufferlist>::iterator iter = attrset.find(RGW_ATTR_ETAG);
+ if (iter != attrset.end()) {
+ bufferlist& bl = iter->second;
+ hash.Update((const unsigned char *)bl.c_str(), bl.length());
+ }
+
+ hash.Final(md5);
+ buf_to_hex(md5, CEPH_CRYPTO_MD5_DIGESTSIZE, md5_str);
+ tag.append(md5_str);
+
+ ldout(store->ctx(), 10) << "generate_fake_tag new tag=" << tag << dendl;
+
+ tag_bl.append(tag.c_str(), tag.size() + 1);
+}
+
+static bool is_olh(map<string, bufferlist>& attrs)
+{
+ map<string, bufferlist>::iterator iter = attrs.find(RGW_ATTR_OLH_INFO);
+ return (iter != attrs.end());
+}
+
+static bool has_olh_tag(map<string, bufferlist>& attrs)
+{
+ map<string, bufferlist>::iterator iter = attrs.find(RGW_ATTR_OLH_ID_TAG);
+ return (iter != attrs.end());
+}
+
+int RGWRados::get_olh_target_state(RGWObjectCtx& obj_ctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
+ RGWObjState *olh_state, RGWObjState **target_state)
+{
+ ceph_assert(olh_state->is_olh);
+
+ rgw_obj target;
+ int r = RGWRados::follow_olh(bucket_info, obj_ctx, olh_state, obj, &target); /* might return -EAGAIN */
+ if (r < 0) {
+ return r;
+ }
+ r = get_obj_state(&obj_ctx, bucket_info, target, target_state, false);
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+int RGWRados::get_obj_state_impl(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
+ RGWObjState **state, bool follow_olh, bool assume_noent)
+{
+ if (obj.empty()) {
+ return -EINVAL;
+ }
+
+ bool need_follow_olh = follow_olh && obj.key.instance.empty();
+
+ RGWObjState *s = rctx->get_state(obj);
+ ldout(cct, 20) << "get_obj_state: rctx=" << (void *)rctx << " obj=" << obj << " state=" << (void *)s << " s->prefetch_data=" << s->prefetch_data << dendl;
+ *state = s;
+ if (s->has_attrs) {
+ if (s->is_olh && need_follow_olh) {
+ return get_olh_target_state(*rctx, bucket_info, obj, s, state);
+ }
+ return 0;
+ }
+
+ s->obj = obj;
+
+ rgw_raw_obj raw_obj;
+ obj_to_raw(bucket_info.placement_rule, obj, &raw_obj);
+
+ int r = -ENOENT;
+
+ if (!assume_noent) {
+ r = RGWRados::raw_obj_stat(raw_obj, &s->size, &s->mtime, &s->epoch, &s->attrset, (s->prefetch_data ? &s->data : NULL), NULL);
+ }
+
+ if (r == -ENOENT) {
+ s->exists = false;
+ s->has_attrs = true;
+ tombstone_entry entry;
+ if (obj_tombstone_cache && obj_tombstone_cache->find(obj, entry)) {
+ s->mtime = entry.mtime;
+ s->zone_short_id = entry.zone_short_id;
+ s->pg_ver = entry.pg_ver;
+ ldout(cct, 20) << __func__ << "(): found obj in tombstone cache: obj=" << obj
+ << " mtime=" << s->mtime << " pgv=" << s->pg_ver << dendl;
+ } else {
+ s->mtime = real_time();
+ }
+ return 0;
+ }
+ if (r < 0)
+ return r;
+
+ s->exists = true;
+ s->has_attrs = true;
+ s->accounted_size = s->size;
+
+ auto iter = s->attrset.find(RGW_ATTR_ETAG);
+ if (iter != s->attrset.end()) {
+ /* get rid of extra null character at the end of the etag, as we used to store it like that */
+ bufferlist& bletag = iter->second;
+ if (bletag.length() > 0 && bletag[bletag.length() - 1] == '\0') {
+ bufferlist newbl;
+ bletag.splice(0, bletag.length() - 1, &newbl);
+ bletag.claim(newbl);
+ }
+ }
+
+ iter = s->attrset.find(RGW_ATTR_COMPRESSION);
+ const bool compressed = (iter != s->attrset.end());
+ if (compressed) {
+ // use uncompressed size for accounted_size
+ try {
+ RGWCompressionInfo info;
+ auto p = iter->second.cbegin();
+ decode(info, p);
+ s->accounted_size = info.orig_size;
+ } catch (buffer::error&) {
+ dout(0) << "ERROR: could not decode compression info for object: " << obj << dendl;
+ return -EIO;
+ }
+ }
+
+ iter = s->attrset.find(RGW_ATTR_SHADOW_OBJ);
+ if (iter != s->attrset.end()) {
+ bufferlist bl = iter->second;
+ bufferlist::iterator it = bl.begin();
+ it.copy(bl.length(), s->shadow_obj);
+ s->shadow_obj[bl.length()] = '\0';
+ }
+ s->obj_tag = s->attrset[RGW_ATTR_ID_TAG];
+ auto ttiter = s->attrset.find(RGW_ATTR_TAIL_TAG);
+ if (ttiter != s->attrset.end()) {
+ s->tail_tag = s->attrset[RGW_ATTR_TAIL_TAG];
+ }
+
+ bufferlist manifest_bl = s->attrset[RGW_ATTR_MANIFEST];
+ if (manifest_bl.length()) {
+ auto miter = manifest_bl.cbegin();
+ try {
+ decode(s->manifest, miter);
+ s->has_manifest = true;
+ s->manifest.set_head(bucket_info.placement_rule, obj, s->size); /* patch manifest to reflect the head we just read, some manifests might be
+ broken due to old bugs */
+ s->size = s->manifest.get_obj_size();
+ if (!compressed)
+ s->accounted_size = s->size;
+ } catch (buffer::error& err) {
+ ldout(cct, 0) << "ERROR: couldn't decode manifest" << dendl;
+ return -EIO;
+ }
+ ldout(cct, 10) << "manifest: total_size = " << s->manifest.get_obj_size() << dendl;
+ if (cct->_conf->subsys.should_gather<ceph_subsys_rgw, 20>() && \
+ s->manifest.has_explicit_objs()) {
+ RGWObjManifest::obj_iterator mi;
+ for (mi = s->manifest.obj_begin(); mi != s->manifest.obj_end(); ++mi) {
+ ldout(cct, 20) << "manifest: ofs=" << mi.get_ofs() << " loc=" << mi.get_location().get_raw_obj(this) << dendl;
+ }
+ }
+
+ if (!s->obj_tag.length()) {
+ /*
+ * Uh oh, something's wrong, object with manifest should have tag. Let's
+ * create one out of the manifest, would be unique
+ */
+ generate_fake_tag(this, s->attrset, s->manifest, manifest_bl, s->obj_tag);
+ s->fake_tag = true;
+ }
+ }
+ map<string, bufferlist>::iterator aiter = s->attrset.find(RGW_ATTR_PG_VER);
+ if (aiter != s->attrset.end()) {
+ bufferlist& pg_ver_bl = aiter->second;
+ if (pg_ver_bl.length()) {
+ auto pgbl = pg_ver_bl.cbegin();
+ try {
+ decode(s->pg_ver, pgbl);
+ } catch (buffer::error& err) {
+ ldout(cct, 0) << "ERROR: couldn't decode pg ver attr for object " << s->obj << ", non-critical error, ignoring" << dendl;
+ }
+ }
+ }
+ aiter = s->attrset.find(RGW_ATTR_SOURCE_ZONE);
+ if (aiter != s->attrset.end()) {
+ bufferlist& zone_short_id_bl = aiter->second;
+ if (zone_short_id_bl.length()) {
+ auto zbl = zone_short_id_bl.cbegin();
+ try {
+ decode(s->zone_short_id, zbl);
+ } catch (buffer::error& err) {
+ ldout(cct, 0) << "ERROR: couldn't decode zone short id attr for object " << s->obj << ", non-critical error, ignoring" << dendl;
+ }
+ }
+ }
+ if (s->obj_tag.length())
+ ldout(cct, 20) << "get_obj_state: setting s->obj_tag to " << s->obj_tag.c_str() << dendl;
+ else
+ ldout(cct, 20) << "get_obj_state: s->obj_tag was set empty" << dendl;
+
+ /* an object might not be olh yet, but could have olh id tag, so we should set it anyway if
+ * it exist, and not only if is_olh() returns true
+ */
+ iter = s->attrset.find(RGW_ATTR_OLH_ID_TAG);
+ if (iter != s->attrset.end()) {
+ s->olh_tag = iter->second;
+ }
+
+ if (is_olh(s->attrset)) {
+ s->is_olh = true;
+
+ ldout(cct, 20) << __func__ << ": setting s->olh_tag to " << string(s->olh_tag.c_str(), s->olh_tag.length()) << dendl;
+
+ if (need_follow_olh) {
+ return get_olh_target_state(*rctx, bucket_info, obj, s, state);
+ } else if (obj.key.have_null_instance() && !s->has_manifest) {
+ // read null version, and the head object only have olh info
+ s->exists = false;
+ return -ENOENT;
+ }
+ }
+
+ return 0;
+}
+
+int RGWRados::get_obj_state(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state,
+ bool follow_olh, bool assume_noent)
+{
+ int ret;
+
+ do {
+ ret = get_obj_state_impl(rctx, bucket_info, obj, state, follow_olh, assume_noent);
+ } while (ret == -EAGAIN);
+
+ return ret;
+}
+
+int RGWRados::Object::get_manifest(RGWObjManifest **pmanifest)
+{
+ RGWObjState *astate;
+ int r = get_state(&astate, true);
+ if (r < 0) {
+ return r;
+ }
+
+ *pmanifest = &astate->manifest;
+
+ return 0;
+}
+
+int RGWRados::Object::Read::get_attr(const char *name, bufferlist& dest)
+{
+ RGWObjState *state;
+ int r = source->get_state(&state, true);
+ if (r < 0)
+ return r;
+ if (!state->exists)
+ return -ENOENT;
+ if (!state->get_attr(name, dest))
+ return -ENODATA;
+
+ return 0;
+}
+
+
+int RGWRados::Object::Stat::stat_async()
+{
+ RGWObjectCtx& ctx = source->get_ctx();
+ rgw_obj& obj = source->get_obj();
+ RGWRados *store = source->get_store();
+
+ RGWObjState *s = ctx.get_state(obj); /* calling this one directly because otherwise a sync request will be sent */
+ result.obj = obj;
+ if (s->has_attrs) {
+ state.ret = 0;
+ result.size = s->size;
+ result.mtime = ceph::real_clock::to_timespec(s->mtime);
+ result.attrs = s->attrset;
+ result.has_manifest = s->has_manifest;
+ result.manifest = s->manifest;
+ return 0;
+ }
+
+ string oid;
+ string loc;
+ get_obj_bucket_and_oid_loc(obj, oid, loc);
+
+ int r = store->get_obj_head_ioctx(source->get_bucket_info(), obj, &state.io_ctx);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::ObjectReadOperation op;
+ op.stat2(&result.size, &result.mtime, NULL);
+ op.getxattrs(&result.attrs, NULL);
+ state.completion = librados::Rados::aio_create_completion(NULL, NULL, NULL);
+ state.io_ctx.locator_set_key(loc);
+ r = state.io_ctx.aio_operate(oid, state.completion, &op, NULL);
+ if (r < 0) {
+ ldout(store->ctx(), 5) << __func__
+ << ": ERROR: aio_operate() returned ret=" << r
+ << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+
+int RGWRados::Object::Stat::wait()
+{
+ if (!state.completion) {
+ return state.ret;
+ }
+
+ state.completion->wait_for_safe();
+ state.ret = state.completion->get_return_value();
+ state.completion->release();
+
+ if (state.ret != 0) {
+ return state.ret;
+ }
+
+ return finish();
+}
+
+int RGWRados::Object::Stat::finish()
+{
+ map<string, bufferlist>::iterator iter = result.attrs.find(RGW_ATTR_MANIFEST);
+ if (iter != result.attrs.end()) {
+ bufferlist& bl = iter->second;
+ auto biter = bl.cbegin();
+ try {
+ decode(result.manifest, biter);
+ } catch (buffer::error& err) {
+ RGWRados *store = source->get_store();
+ ldout(store->ctx(), 0) << "ERROR: " << __func__ << ": failed to decode manifest" << dendl;
+ return -EIO;
+ }
+ result.has_manifest = true;
+ }
+
+ return 0;
+}
+
+int RGWRados::append_atomic_test(RGWObjectCtx *rctx,
+ const RGWBucketInfo& bucket_info, const rgw_obj& obj,
+ ObjectOperation& op, RGWObjState **pstate)
+{
+ if (!rctx)
+ return 0;
+
+ int r = get_obj_state(rctx, bucket_info, obj, pstate, false);
+ if (r < 0)
+ return r;
+
+ return append_atomic_test(*pstate, op);
+}
+
+int RGWRados::append_atomic_test(const RGWObjState* state,
+ librados::ObjectOperation& op)
+{
+ if (!state->is_atomic) {
+ ldout(cct, 20) << "state for obj=" << state->obj << " is not atomic, not appending atomic test" << dendl;
+ return 0;
+ }
+
+ if (state->obj_tag.length() > 0 && !state->fake_tag) {// check for backward compatibility
+ op.cmpxattr(RGW_ATTR_ID_TAG, LIBRADOS_CMPXATTR_OP_EQ, state->obj_tag);
+ } else {
+ ldout(cct, 20) << "state->obj_tag is empty, not appending atomic test" << dendl;
+ }
+ return 0;
+}
+
+int RGWRados::Object::get_state(RGWObjState **pstate, bool follow_olh, bool assume_noent)
+{
+ return store->get_obj_state(&ctx, bucket_info, obj, pstate, follow_olh, assume_noent);
+}
+
+void RGWRados::Object::invalidate_state()
+{
+ ctx.invalidate(obj);
+}
+
+int RGWRados::Object::prepare_atomic_modification(ObjectWriteOperation& op, bool reset_obj, const string *ptag,
+ const char *if_match, const char *if_nomatch, bool removal_op,
+ bool modify_tail)
+{
+ int r = get_state(&state, false);
+ if (r < 0)
+ return r;
+
+ bool need_guard = (state->has_manifest || (state->obj_tag.length() != 0) ||
+ if_match != NULL || if_nomatch != NULL) &&
+ (!state->fake_tag);
+
+ if (!state->is_atomic) {
+ ldout(store->ctx(), 20) << "prepare_atomic_modification: state is not atomic. state=" << (void *)state << dendl;
+
+ if (reset_obj) {
+ op.create(false);
+ store->remove_rgw_head_obj(op); // we're not dropping reference here, actually removing object
+ }
+
+ return 0;
+ }
+
+ if (need_guard) {
+ /* first verify that the object wasn't replaced under */
+ if (if_nomatch == NULL || strcmp(if_nomatch, "*") != 0) {
+ op.cmpxattr(RGW_ATTR_ID_TAG, LIBRADOS_CMPXATTR_OP_EQ, state->obj_tag);
+ // FIXME: need to add FAIL_NOTEXIST_OK for racing deletion
+ }
+
+ if (if_match) {
+ if (strcmp(if_match, "*") == 0) {
+ // test the object is existing
+ if (!state->exists) {
+ return -ERR_PRECONDITION_FAILED;
+ }
+ } else {
+ bufferlist bl;
+ if (!state->get_attr(RGW_ATTR_ETAG, bl) ||
+ strncmp(if_match, bl.c_str(), bl.length()) != 0) {
+ return -ERR_PRECONDITION_FAILED;
+ }
+ }
+ }
+
+ if (if_nomatch) {
+ if (strcmp(if_nomatch, "*") == 0) {
+ // test the object is NOT existing
+ if (state->exists) {
+ return -ERR_PRECONDITION_FAILED;
+ }
+ } else {
+ bufferlist bl;
+ if (!state->get_attr(RGW_ATTR_ETAG, bl) ||
+ strncmp(if_nomatch, bl.c_str(), bl.length()) == 0) {
+ return -ERR_PRECONDITION_FAILED;
+ }
+ }
+ }
+ }
+
+ if (reset_obj) {
+ if (state->exists) {
+ op.create(false);
+ store->remove_rgw_head_obj(op);
+ } else {
+ op.create(true);
+ }
+ }
+
+ if (removal_op) {
+ /* the object is being removed, no need to update its tag */
+ return 0;
+ }
+
+ if (ptag) {
+ state->write_tag = *ptag;
+ } else {
+ append_rand_alpha(store->ctx(), state->write_tag, state->write_tag, 32);
+ }
+ bufferlist bl;
+ bl.append(state->write_tag.c_str(), state->write_tag.size() + 1);
+
+ ldout(store->ctx(), 10) << "setting object write_tag=" << state->write_tag << dendl;
+
+ op.setxattr(RGW_ATTR_ID_TAG, bl);
+ if (modify_tail) {
+ op.setxattr(RGW_ATTR_TAIL_TAG, bl);
+ }
+
+ return 0;
+}
+
+/**
+ * Set an attr on an object.
+ * bucket: name of the bucket holding the object
+ * obj: name of the object to set the attr on
+ * name: the attr to set
+ * bl: the contents of the attr
+ * Returns: 0 on success, -ERR# otherwise.
+ */
+int RGWRados::set_attr(void *ctx, const RGWBucketInfo& bucket_info, rgw_obj& obj, const char *name, bufferlist& bl)
+{
+ map<string, bufferlist> attrs;
+ attrs[name] = bl;
+ return set_attrs(ctx, bucket_info, obj, attrs, NULL);
+}
+
+int RGWRados::set_attrs(void *ctx, const RGWBucketInfo& bucket_info, rgw_obj& src_obj,
+ map<string, bufferlist>& attrs,
+ map<string, bufferlist>* rmattrs)
+{
+ rgw_obj obj = src_obj;
+ if (obj.key.instance == "null") {
+ obj.key.instance.clear();
+ }
+
+ rgw_rados_ref ref;
+ int r = get_obj_head_ref(bucket_info, obj, &ref);
+ if (r < 0) {
+ return r;
+ }
+ RGWObjectCtx *rctx = static_cast<RGWObjectCtx *>(ctx);
+
+ ObjectWriteOperation op;
+ RGWObjState *state = NULL;
+
+ r = append_atomic_test(rctx, bucket_info, obj, op, &state);
+ if (r < 0)
+ return r;
+
+ // ensure null version object exist
+ if (src_obj.key.instance == "null" && !state->has_manifest) {
+ return -ENOENT;
+ }
+
+ map<string, bufferlist>::iterator iter;
+ if (rmattrs) {
+ for (iter = rmattrs->begin(); iter != rmattrs->end(); ++iter) {
+ const string& name = iter->first;
+ op.rmxattr(name.c_str());
+ }
+ }
+
+ const rgw_bucket& bucket = obj.bucket;
+
+ for (iter = attrs.begin(); iter != attrs.end(); ++iter) {
+ const string& name = iter->first;
+ bufferlist& bl = iter->second;
+
+ if (!bl.length())
+ continue;
+
+ op.setxattr(name.c_str(), bl);
+
+ if (name.compare(RGW_ATTR_DELETE_AT) == 0) {
+ real_time ts;
+ try {
+ decode(ts, bl);
+
+ rgw_obj_index_key obj_key;
+ obj.key.get_index_key(&obj_key);
+
+ objexp_hint_add(ts, bucket.tenant, bucket.name, bucket.bucket_id, obj_key);
+ } catch (buffer::error& err) {
+ ldout(cct, 0) << "ERROR: failed to decode " RGW_ATTR_DELETE_AT << " attr" << dendl;
+ }
+ }
+ }
+
+ if (!op.size())
+ return 0;
+
+ RGWObjectCtx obj_ctx(this);
+
+ bufferlist bl;
+ RGWRados::Bucket bop(this, bucket_info);
+ RGWRados::Bucket::UpdateIndex index_op(&bop, obj);
+
+ if (state) {
+ string tag;
+ append_rand_alpha(cct, tag, tag, 32);
+ state->write_tag = tag;
+ r = index_op.prepare(CLS_RGW_OP_ADD, &state->write_tag);
+
+ if (r < 0)
+ return r;
+
+ bl.append(tag.c_str(), tag.size() + 1);
+ op.setxattr(RGW_ATTR_ID_TAG, bl);
+ }
+
+
+ real_time mtime = real_clock::now();
+ struct timespec mtime_ts = real_clock::to_timespec(mtime);
+ op.mtime2(&mtime_ts);
+ r = ref.ioctx.operate(ref.obj.oid, &op);
+ if (state) {
+ if (r >= 0) {
+ bufferlist acl_bl = attrs[RGW_ATTR_ACL];
+ bufferlist etag_bl = attrs[RGW_ATTR_ETAG];
+ bufferlist content_type_bl = attrs[RGW_ATTR_CONTENT_TYPE];
+ string etag = rgw_bl_str(etag_bl);
+ string content_type = rgw_bl_str(content_type_bl);
+ string storage_class;
+ auto iter = attrs.find(RGW_ATTR_STORAGE_CLASS);
+ if (iter != attrs.end()) {
+ storage_class = rgw_bl_str(iter->second);
+ }
+ uint64_t epoch = ref.ioctx.get_last_version();
+ int64_t poolid = ref.ioctx.get_id();
+ r = index_op.complete(poolid, epoch, state->size, state->accounted_size,
+ mtime, etag, content_type, storage_class, &acl_bl,
+ RGWObjCategory::Main, NULL);
+ } else {
+ int ret = index_op.cancel();
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: complete_update_index_cancel() returned ret=" << ret << dendl;
+ }
+ }
+ }
+ if (r < 0)
+ return r;
+
+ if (state) {
+ state->obj_tag.swap(bl);
+ if (rmattrs) {
+ for (iter = rmattrs->begin(); iter != rmattrs->end(); ++iter) {
+ state->attrset.erase(iter->first);
+ }
+ }
+
+ for (iter = attrs.begin(); iter != attrs.end(); ++iter) {
+ state->attrset[iter->first] = iter->second;
+ }
+
+ auto iter = state->attrset.find(RGW_ATTR_ID_TAG);
+ if (iter != state->attrset.end()) {
+ iter->second = state->obj_tag;
+ }
+ }
+
+ return 0;
+}
+
+int RGWRados::Object::Read::prepare()
+{
+ RGWRados *store = source->get_store();
+ CephContext *cct = store->ctx();
+
+ bufferlist etag;
+
+ map<string, bufferlist>::iterator iter;
+
+ RGWObjState *astate;
+ int r = source->get_state(&astate, true);
+ if (r < 0)
+ return r;
+
+ if (!astate->exists) {
+ return -ENOENT;
+ }
+
+ const RGWBucketInfo& bucket_info = source->get_bucket_info();
+
+ state.obj = astate->obj;
+ store->obj_to_raw(bucket_info.placement_rule, state.obj, &state.head_obj);
+
+ state.cur_pool = state.head_obj.pool;
+ state.cur_ioctx = &state.io_ctxs[state.cur_pool];
+
+ r = store->get_obj_head_ioctx(bucket_info, state.obj, state.cur_ioctx);
+ if (r < 0) {
+ return r;
+ }
+ if (params.target_obj) {
+ *params.target_obj = state.obj;
+ }
+ if (params.attrs) {
+ *params.attrs = astate->attrset;
+ if (cct->_conf->subsys.should_gather<ceph_subsys_rgw, 20>()) {
+ for (iter = params.attrs->begin(); iter != params.attrs->end(); ++iter) {
+ ldout(cct, 20) << "Read xattr: " << iter->first << dendl;
+ }
+ }
+ }
+
+ /* Convert all times go GMT to make them compatible */
+ if (conds.mod_ptr || conds.unmod_ptr) {
+ obj_time_weight src_weight;
+ src_weight.init(astate);
+ src_weight.high_precision = conds.high_precision_time;
+
+ obj_time_weight dest_weight;
+ dest_weight.high_precision = conds.high_precision_time;
+
+ if (conds.mod_ptr) {
+ dest_weight.init(*conds.mod_ptr, conds.mod_zone_id, conds.mod_pg_ver);
+ ldout(cct, 10) << "If-Modified-Since: " << dest_weight << " Last-Modified: " << src_weight << dendl;
+ if (!(dest_weight < src_weight)) {
+ return -ERR_NOT_MODIFIED;
+ }
+ }
+
+ if (conds.unmod_ptr) {
+ dest_weight.init(*conds.unmod_ptr, conds.mod_zone_id, conds.mod_pg_ver);
+ ldout(cct, 10) << "If-UnModified-Since: " << dest_weight << " Last-Modified: " << src_weight << dendl;
+ if (dest_weight < src_weight) {
+ return -ERR_PRECONDITION_FAILED;
+ }
+ }
+ }
+ if (conds.if_match || conds.if_nomatch) {
+ r = get_attr(RGW_ATTR_ETAG, etag);
+ if (r < 0)
+ return r;
+
+
+
+ if (conds.if_match) {
+ string if_match_str = rgw_string_unquote(conds.if_match);
+ ldout(cct, 10) << "ETag: " << string(etag.c_str(), etag.length()) << " " << " If-Match: " << if_match_str << dendl;
+ if (if_match_str.compare(0, etag.length(), etag.c_str(), etag.length()) != 0) {
+ return -ERR_PRECONDITION_FAILED;
+ }
+ }
+
+ if (conds.if_nomatch) {
+ string if_nomatch_str = rgw_string_unquote(conds.if_nomatch);
+ ldout(cct, 10) << "ETag: " << string(etag.c_str(), etag.length()) << " " << " If-NoMatch: " << if_nomatch_str << dendl;
+ if (if_nomatch_str.compare(0, etag.length(), etag.c_str(), etag.length()) == 0) {
+ return -ERR_NOT_MODIFIED;
+ }
+ }
+ }
+
+ if (params.obj_size)
+ *params.obj_size = astate->size;
+ if (params.lastmod)
+ *params.lastmod = astate->mtime;
+
+ return 0;
+}
+
+int RGWRados::Object::Read::range_to_ofs(uint64_t obj_size, int64_t &ofs, int64_t &end)
+{
+ if (ofs < 0) {
+ ofs += obj_size;
+ if (ofs < 0)
+ ofs = 0;
+ end = obj_size - 1;
+ } else if (end < 0) {
+ end = obj_size - 1;
+ }
+
+ if (obj_size > 0) {
+ if (ofs >= (off_t)obj_size) {
+ return -ERANGE;
+ }
+ if (end >= (off_t)obj_size) {
+ end = obj_size - 1;
+ }
+ }
+ return 0;
+}
+
+int RGWRados::Bucket::UpdateIndex::guard_reshard(BucketShard **pbs, std::function<int(BucketShard *)> call)
+{
+ RGWRados *store = target->get_store();
+ BucketShard *bs;
+ int r;
+
+#define NUM_RESHARD_RETRIES 10
+ for (int i = 0; i < NUM_RESHARD_RETRIES; ++i) {
+ int ret = get_bucket_shard(&bs);
+ if (ret < 0) {
+ ldout(store->ctx(), 5) << "failed to get BucketShard object: ret=" << ret << dendl;
+ return ret;
+ }
+ r = call(bs);
+ if (r != -ERR_BUSY_RESHARDING) {
+ break;
+ }
+ ldout(store->ctx(), 0) << "NOTICE: resharding operation on bucket index detected, blocking" << dendl;
+ string new_bucket_id;
+ r = store->block_while_resharding(bs, &new_bucket_id,
+ target->bucket_info, null_yield);
+ if (r == -ERR_BUSY_RESHARDING) {
+ continue;
+ }
+ if (r < 0) {
+ return r;
+ }
+ ldout(store->ctx(), 20) << "reshard completion identified, new_bucket_id=" << new_bucket_id << dendl;
+ i = 0; /* resharding is finished, make sure we can retry */
+ r = target->update_bucket_id(new_bucket_id);
+ if (r < 0) {
+ ldout(store->ctx(), 0) << "ERROR: update_bucket_id() new_bucket_id=" << new_bucket_id << " returned r=" << r << dendl;
+ return r;
+ }
+ invalidate_bs();
+ } // for loop
+
+ if (r < 0) {
+ return r;
+ }
+
+ if (pbs) {
+ *pbs = bs;
+ }
+
+ return 0;
+}
+
+int RGWRados::Bucket::UpdateIndex::prepare(RGWModifyOp op, const string *write_tag)
+{
+ if (blind) {
+ return 0;
+ }
+ RGWRados *store = target->get_store();
+
+ if (write_tag && write_tag->length()) {
+ optag = string(write_tag->c_str(), write_tag->length());
+ } else {
+ if (optag.empty()) {
+ append_rand_alpha(store->ctx(), optag, optag, 32);
+ }
+ }
+
+ int r = guard_reshard(nullptr, [&](BucketShard *bs) -> int {
+ return store->cls_obj_prepare_op(*bs, op, optag, obj, bilog_flags, zones_trace);
+ });
+
+ if (r < 0) {
+ return r;
+ }
+ prepared = true;
+
+ return 0;
+}
+
+int RGWRados::Bucket::UpdateIndex::complete(int64_t poolid, uint64_t epoch,
+ uint64_t size, uint64_t accounted_size,
+ ceph::real_time& ut, const string& etag,
+ const string& content_type, const string& storage_class,
+ bufferlist *acl_bl,
+ RGWObjCategory category,
+ list<rgw_obj_index_key> *remove_objs, const string *user_data,
+ bool appendable)
+{
+ if (blind) {
+ return 0;
+ }
+ RGWRados *store = target->get_store();
+ BucketShard *bs;
+
+ int ret = get_bucket_shard(&bs);
+ if (ret < 0) {
+ ldout(store->ctx(), 5) << "failed to get BucketShard object: ret=" << ret << dendl;
+ return ret;
+ }
+
+ rgw_bucket_dir_entry ent;
+ obj.key.get_index_key(&ent.key);
+ ent.meta.size = size;
+ ent.meta.accounted_size = accounted_size;
+ ent.meta.mtime = ut;
+ ent.meta.etag = etag;
+ ent.meta.storage_class = storage_class;
+ if (user_data)
+ ent.meta.user_data = *user_data;
+
+ ACLOwner owner;
+ if (acl_bl && acl_bl->length()) {
+ int ret = store->decode_policy(*acl_bl, &owner);
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "WARNING: could not decode policy ret=" << ret << dendl;
+ }
+ }
+ ent.meta.owner = owner.get_id().to_str();
+ ent.meta.owner_display_name = owner.get_display_name();
+ ent.meta.content_type = content_type;
+ ent.meta.appendable = appendable;
+
+ ret = store->cls_obj_complete_add(*bs, obj, optag, poolid, epoch, ent, category, remove_objs, bilog_flags, zones_trace);
+
+ if (target->bucket_info.datasync_flag_enabled()) {
+ int r = store->data_log->add_entry(bs->bucket, bs->shard_id);
+ if (r < 0) {
+ lderr(store->ctx()) << "ERROR: failed writing data log" << dendl;
+ }
+ }
+
+ return ret;
+}
+
+int RGWRados::Bucket::UpdateIndex::complete_del(int64_t poolid, uint64_t epoch,
+ real_time& removed_mtime,
+ list<rgw_obj_index_key> *remove_objs)
+{
+ if (blind) {
+ return 0;
+ }
+ RGWRados *store = target->get_store();
+ BucketShard *bs;
+
+ int ret = get_bucket_shard(&bs);
+ if (ret < 0) {
+ ldout(store->ctx(), 5) << "failed to get BucketShard object: ret=" << ret << dendl;
+ return ret;
+ }
+
+ ret = store->cls_obj_complete_del(*bs, optag, poolid, epoch, obj, removed_mtime, remove_objs, bilog_flags, zones_trace);
+
+ if (target->bucket_info.datasync_flag_enabled()) {
+ int r = store->data_log->add_entry(bs->bucket, bs->shard_id);
+ if (r < 0) {
+ lderr(store->ctx()) << "ERROR: failed writing data log" << dendl;
+ }
+ }
+
+ return ret;
+}
+
+
+int RGWRados::Bucket::UpdateIndex::cancel()
+{
+ if (blind) {
+ return 0;
+ }
+ RGWRados *store = target->get_store();
+ BucketShard *bs;
+
+ int ret = guard_reshard(&bs, [&](BucketShard *bs) -> int {
+ return store->cls_obj_complete_cancel(*bs, optag, obj, bilog_flags, zones_trace);
+ });
+
+ /*
+ * need to update data log anyhow, so that whoever follows needs to update its internal markers
+ * for following the specific bucket shard log. Otherwise they end up staying behind, and users
+ * have no way to tell that they're all caught up
+ */
+ if (target->bucket_info.datasync_flag_enabled()) {
+ int r = store->data_log->add_entry(bs->bucket, bs->shard_id);
+ if (r < 0) {
+ lderr(store->ctx()) << "ERROR: failed writing data log" << dendl;
+ }
+ }
+
+ return ret;
+}
+
+int RGWRados::Object::Read::read(int64_t ofs, int64_t end, bufferlist& bl)
+{
+ RGWRados *store = source->get_store();
+ CephContext *cct = store->ctx();
+
+ rgw_raw_obj read_obj;
+ uint64_t read_ofs = ofs;
+ uint64_t len, read_len;
+ bool reading_from_head = true;
+ ObjectReadOperation op;
+
+ bool merge_bl = false;
+ bufferlist *pbl = &bl;
+ bufferlist read_bl;
+ uint64_t max_chunk_size;
+
+ RGWObjState *astate;
+ int r = source->get_state(&astate, true);
+ if (r < 0)
+ return r;
+
+ if (astate->size == 0) {
+ end = 0;
+ } else if (end >= (int64_t)astate->size) {
+ end = astate->size - 1;
+ }
+
+ if (end < 0)
+ len = 0;
+ else
+ len = end - ofs + 1;
+
+ if (astate->has_manifest && astate->manifest.has_tail()) {
+ /* now get the relevant object part */
+ RGWObjManifest::obj_iterator iter = astate->manifest.obj_find(ofs);
+
+ uint64_t stripe_ofs = iter.get_stripe_ofs();
+ read_obj = iter.get_location().get_raw_obj(store);
+ len = std::min(len, iter.get_stripe_size() - (ofs - stripe_ofs));
+ read_ofs = iter.location_ofs() + (ofs - stripe_ofs);
+ reading_from_head = (read_obj == state.head_obj);
+ } else {
+ read_obj = state.head_obj;
+ }
+
+ r = store->get_max_chunk_size(read_obj.pool, &max_chunk_size);
+ if (r < 0) {
+ ldout(cct, 0) << "ERROR: failed to get max_chunk_size() for pool " << read_obj.pool << dendl;
+ return r;
+ }
+
+ if (len > max_chunk_size)
+ len = max_chunk_size;
+
+
+ read_len = len;
+
+ if (reading_from_head) {
+ /* only when reading from the head object do we need to do the atomic test */
+ r = store->append_atomic_test(&source->get_ctx(), source->get_bucket_info(), state.obj, op, &astate);
+ if (r < 0)
+ return r;
+
+ if (astate && astate->prefetch_data) {
+ if (!ofs && astate->data.length() >= len) {
+ bl = astate->data;
+ return bl.length();
+ }
+
+ if (ofs < astate->data.length()) {
+ unsigned copy_len = std::min((uint64_t)astate->data.length() - ofs, len);
+ astate->data.copy(ofs, copy_len, bl);
+ read_len -= copy_len;
+ read_ofs += copy_len;
+ if (!read_len)
+ return bl.length();
+
+ merge_bl = true;
+ pbl = &read_bl;
+ }
+ }
+ }
+
+ ldout(cct, 20) << "rados->read obj-ofs=" << ofs << " read_ofs=" << read_ofs << " read_len=" << read_len << dendl;
+ op.read(read_ofs, read_len, pbl, NULL);
+
+ if (state.cur_pool != read_obj.pool) {
+ auto iter = state.io_ctxs.find(read_obj.pool);
+ if (iter == state.io_ctxs.end()) {
+ state.cur_ioctx = &state.io_ctxs[read_obj.pool];
+ r = store->open_pool_ctx(read_obj.pool, *state.cur_ioctx, false);
+ if (r < 0) {
+ ldout(cct, 20) << "ERROR: failed to open pool context for pool=" << read_obj.pool << " r=" << r << dendl;
+ return r;
+ }
+ } else {
+ state.cur_ioctx = &iter->second;
+ }
+ state.cur_pool = read_obj.pool;
+ }
+
+ state.cur_ioctx->locator_set_key(read_obj.loc);
+
+ r = state.cur_ioctx->operate(read_obj.oid, &op, NULL);
+ ldout(cct, 20) << "rados->read r=" << r << " bl.length=" << bl.length() << dendl;
+
+ if (r < 0) {
+ return r;
+ }
+
+ if (merge_bl) {
+ bl.append(read_bl);
+ }
+
+ return bl.length();
+}
+
+struct get_obj_data {
+ RGWRados* store;
+ RGWGetDataCB* client_cb;
+ rgw::Aio* aio;
+ uint64_t offset; // next offset to write to client
+ rgw::AioResultList completed; // completed read results, sorted by offset
+
+ get_obj_data(RGWRados* store, RGWGetDataCB* cb, rgw::Aio* aio, uint64_t offset)
+ : store(store), client_cb(cb), aio(aio), offset(offset) {}
+
+ int flush(rgw::AioResultList&& results) {
+ int r = rgw::check_for_errors(results);
+ if (r < 0) {
+ return r;
+ }
+
+ auto cmp = [](const auto& lhs, const auto& rhs) { return lhs.id < rhs.id; };
+ results.sort(cmp); // merge() requires results to be sorted first
+ completed.merge(results, cmp); // merge results in sorted order
+
+ while (!completed.empty() && completed.front().id == offset) {
+ auto bl = std::move(completed.front().data);
+ completed.pop_front_and_dispose(std::default_delete<rgw::AioResultEntry>{});
+
+ offset += bl.length();
+ int r = client_cb->handle_data(bl, 0, bl.length());
+ if (r < 0) {
+ return r;
+ }
+ }
+ return 0;
+ }
+
+ void cancel() {
+ // wait for all completions to drain and ignore the results
+ aio->drain();
+ }
+
+ int drain() {
+ auto c = aio->wait();
+ while (!c.empty()) {
+ int r = flush(std::move(c));
+ if (r < 0) {
+ cancel();
+ return r;
+ }
+ c = aio->wait();
+ }
+ return flush(std::move(c));
+ }
+};
+
+static int _get_obj_iterate_cb(const rgw_raw_obj& read_obj, off_t obj_ofs,
+ off_t read_ofs, off_t len, bool is_head_obj,
+ RGWObjState *astate, void *arg)
+{
+ struct get_obj_data *d = (struct get_obj_data *)arg;
+
+ return d->store->get_obj_iterate_cb(read_obj, obj_ofs, read_ofs, len,
+ is_head_obj, astate, arg);
+}
+
+int RGWRados::get_obj_iterate_cb(const rgw_raw_obj& read_obj, off_t obj_ofs,
+ off_t read_ofs, off_t len, bool is_head_obj,
+ RGWObjState *astate, void *arg)
+{
+ ObjectReadOperation op;
+ struct get_obj_data *d = (struct get_obj_data *)arg;
+ string oid, key;
+
+ if (is_head_obj) {
+ /* only when reading from the head object do we need to do the atomic test */
+ int r = append_atomic_test(astate, op);
+ if (r < 0)
+ return r;
+
+ if (astate &&
+ obj_ofs < astate->data.length()) {
+ unsigned chunk_len = std::min((uint64_t)astate->data.length() - obj_ofs, (uint64_t)len);
+
+ r = d->client_cb->handle_data(astate->data, obj_ofs, chunk_len);
+ if (r < 0)
+ return r;
+
+ len -= chunk_len;
+ d->offset += chunk_len;
+ read_ofs += chunk_len;
+ obj_ofs += chunk_len;
+ if (!len)
+ return 0;
+ }
+ }
+
+ auto obj = d->store->svc.rados->obj(read_obj);
+ int r = obj.open();
+ if (r < 0) {
+ ldout(cct, 4) << "failed to open rados context for " << read_obj << dendl;
+ return r;
+ }
+
+ ldout(cct, 20) << "rados->get_obj_iterate_cb oid=" << read_obj.oid << " obj-ofs=" << obj_ofs << " read_ofs=" << read_ofs << " len=" << len << dendl;
+ op.read(read_ofs, len, nullptr, nullptr);
+
+ const uint64_t cost = len;
+ const uint64_t id = obj_ofs; // use logical object offset for sorting replies
+
+ auto completed = d->aio->submit(obj, &op, cost, id);
+
+ return d->flush(std::move(completed));
+}
+
+int RGWRados::Object::Read::iterate(int64_t ofs, int64_t end, RGWGetDataCB *cb)
+{
+ RGWRados *store = source->get_store();
+ CephContext *cct = store->ctx();
+ RGWObjectCtx& obj_ctx = source->get_ctx();
+ const uint64_t chunk_size = cct->_conf->rgw_get_obj_max_req_size;
+ const uint64_t window_size = cct->_conf->rgw_get_obj_window_size;
+
+ rgw::AioThrottle aio(window_size);
+ get_obj_data data(store, cb, &aio, ofs);
+
+ int r = store->iterate_obj(obj_ctx, source->get_bucket_info(), state.obj,
+ ofs, end, chunk_size, _get_obj_iterate_cb, &data);
+ if (r < 0) {
+ ldout(cct, 0) << "iterate_obj() failed with " << r << dendl;
+ data.cancel(); // drain completions without writing back to client
+ return r;
+ }
+
+ return data.drain();
+}
+
+int RGWRados::iterate_obj(RGWObjectCtx& obj_ctx,
+ const RGWBucketInfo& bucket_info, const rgw_obj& obj,
+ off_t ofs, off_t end, uint64_t max_chunk_size,
+ iterate_obj_cb cb, void *arg)
+{
+ rgw_raw_obj head_obj;
+ rgw_raw_obj read_obj;
+ uint64_t read_ofs = ofs;
+ uint64_t len;
+ bool reading_from_head = true;
+ RGWObjState *astate = NULL;
+
+ obj_to_raw(bucket_info.placement_rule, obj, &head_obj);
+
+ int r = get_obj_state(&obj_ctx, bucket_info, obj, &astate, false);
+ if (r < 0) {
+ return r;
+ }
+
+ if (end < 0)
+ len = 0;
+ else
+ len = end - ofs + 1;
+
+ if (astate->has_manifest) {
+ /* now get the relevant object stripe */
+ RGWObjManifest::obj_iterator iter = astate->manifest.obj_find(ofs);
+
+ RGWObjManifest::obj_iterator obj_end = astate->manifest.obj_end();
+
+ for (; iter != obj_end && ofs <= end; ++iter) {
+ off_t stripe_ofs = iter.get_stripe_ofs();
+ off_t next_stripe_ofs = stripe_ofs + iter.get_stripe_size();
+
+ while (ofs < next_stripe_ofs && ofs <= end) {
+ read_obj = iter.get_location().get_raw_obj(this);
+ uint64_t read_len = std::min(len, iter.get_stripe_size() - (ofs - stripe_ofs));
+ read_ofs = iter.location_ofs() + (ofs - stripe_ofs);
+
+ if (read_len > max_chunk_size) {
+ read_len = max_chunk_size;
+ }
+
+ reading_from_head = (read_obj == head_obj);
+ r = cb(read_obj, ofs, read_ofs, read_len, reading_from_head, astate, arg);
+ if (r < 0) {
+ return r;
+ }
+
+ len -= read_len;
+ ofs += read_len;
+ }
+ }
+ } else {
+ while (ofs <= end) {
+ read_obj = head_obj;
+ uint64_t read_len = std::min(len, max_chunk_size);
+
+ r = cb(read_obj, ofs, ofs, read_len, reading_from_head, astate, arg);
+ if (r < 0) {
+ return r;
+ }
+
+ len -= read_len;
+ ofs += read_len;
+ }
+ }
+
+ return 0;
+}
+
+int RGWRados::obj_operate(const RGWBucketInfo& bucket_info, const rgw_obj& obj, ObjectWriteOperation *op)
+{
+ rgw_rados_ref ref;
+ int r = get_obj_head_ref(bucket_info, obj, &ref);
+ if (r < 0) {
+ return r;
+ }
+
+ return ref.ioctx.operate(ref.obj.oid, op);
+}
+
+int RGWRados::obj_operate(const RGWBucketInfo& bucket_info, const rgw_obj& obj, ObjectReadOperation *op)
+{
+ rgw_rados_ref ref;
+ int r = get_obj_head_ref(bucket_info, obj, &ref);
+ if (r < 0) {
+ return r;
+ }
+
+ bufferlist outbl;
+
+ return ref.ioctx.operate(ref.obj.oid, op, &outbl);
+}
+
+int RGWRados::olh_init_modification_impl(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, string *op_tag)
+{
+ ObjectWriteOperation op;
+
+ ceph_assert(olh_obj.key.instance.empty());
+
+ bool has_tag = (state.exists && has_olh_tag(state.attrset));
+
+ if (!state.exists) {
+ op.create(true);
+ } else {
+ op.assert_exists();
+ struct timespec mtime_ts = real_clock::to_timespec(state.mtime);
+ op.mtime2(&mtime_ts);
+ }
+
+ /*
+ * 3 possible cases: olh object doesn't exist, it exists as an olh, it exists as a regular object.
+ * If it exists as a regular object we'll need to transform it into an olh. We'll do it in two
+ * steps, first change its tag and set the olh pending attrs. Once write is done we'll need to
+ * truncate it, remove extra attrs, and send it to the garbage collection. The bucket index olh
+ * log will reflect that.
+ *
+ * Need to generate separate olh and obj tags, as olh can be colocated with object data. obj_tag
+ * is used for object data instance, olh_tag for olh instance.
+ */
+ if (has_tag) {
+ /* guard against racing writes */
+ bucket_index_guard_olh_op(state, op);
+ }
+
+ if (!has_tag) {
+ /* obj tag */
+ string obj_tag = gen_rand_alphanumeric_lower(cct, 32);
+
+ bufferlist bl;
+ bl.append(obj_tag.c_str(), obj_tag.size());
+ op.setxattr(RGW_ATTR_ID_TAG, bl);
+
+ state.attrset[RGW_ATTR_ID_TAG] = bl;
+ state.obj_tag = bl;
+
+ /* olh tag */
+ string olh_tag = gen_rand_alphanumeric_lower(cct, 32);
+
+ bufferlist olh_bl;
+ olh_bl.append(olh_tag.c_str(), olh_tag.size());
+ op.setxattr(RGW_ATTR_OLH_ID_TAG, olh_bl);
+
+ state.attrset[RGW_ATTR_OLH_ID_TAG] = olh_bl;
+ state.olh_tag = olh_bl;
+ state.is_olh = true;
+
+ bufferlist verbl;
+ op.setxattr(RGW_ATTR_OLH_VER, verbl);
+ }
+
+ bufferlist bl;
+ RGWOLHPendingInfo pending_info;
+ pending_info.time = real_clock::now();
+ encode(pending_info, bl);
+
+#define OLH_PENDING_TAG_LEN 32
+ /* tag will start with current time epoch, this so that entries are sorted by time */
+ char buf[32];
+ utime_t ut(pending_info.time);
+ snprintf(buf, sizeof(buf), "%016llx", (unsigned long long)ut.sec());
+ *op_tag = buf;
+
+ string s = gen_rand_alphanumeric_lower(cct, OLH_PENDING_TAG_LEN - op_tag->size());
+
+ op_tag->append(s);
+
+ string attr_name = RGW_ATTR_OLH_PENDING_PREFIX;
+ attr_name.append(*op_tag);
+
+ op.setxattr(attr_name.c_str(), bl);
+
+ int ret = obj_operate(bucket_info, olh_obj, &op);
+ if (ret < 0) {
+ return ret;
+ }
+
+ state.exists = true;
+ state.attrset[attr_name] = bl;
+
+ return 0;
+}
+
+int RGWRados::olh_init_modification(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& obj, string *op_tag)
+{
+ int ret;
+
+ ret = olh_init_modification_impl(bucket_info, state, obj, op_tag);
+ if (ret == -EEXIST) {
+ ret = -ECANCELED;
+ }
+
+ return ret;
+}
+
+int RGWRados::guard_reshard(BucketShard *bs,
+ const rgw_obj& obj_instance,
+ const RGWBucketInfo& bucket_info,
+ std::function<int(BucketShard *)> call)
+{
+ rgw_obj obj;
+ const rgw_obj *pobj = &obj_instance;
+ int r;
+
+ for (int i = 0; i < NUM_RESHARD_RETRIES; ++i) {
+ r = bs->init(pobj->bucket, *pobj, nullptr /* no RGWBucketInfo */);
+ if (r < 0) {
+ ldout(cct, 5) << "bs.init() returned ret=" << r << dendl;
+ return r;
+ }
+ r = call(bs);
+ if (r != -ERR_BUSY_RESHARDING) {
+ break;
+ }
+ ldout(cct, 0) << "NOTICE: resharding operation on bucket index detected, blocking" << dendl;
+ string new_bucket_id;
+ r = block_while_resharding(bs, &new_bucket_id, bucket_info, null_yield);
+ if (r == -ERR_BUSY_RESHARDING) {
+ continue;
+ }
+ if (r < 0) {
+ return r;
+ }
+ ldout(cct, 20) << "reshard completion identified, new_bucket_id=" << new_bucket_id << dendl;
+ i = 0; /* resharding is finished, make sure we can retry */
+
+ obj = *pobj;
+ obj.bucket.update_bucket_id(new_bucket_id);
+ pobj = &obj;
+ } // for loop
+
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+int RGWRados::block_while_resharding(RGWRados::BucketShard *bs,
+ string *new_bucket_id,
+ const RGWBucketInfo& bucket_info,
+ optional_yield y)
+{
+ int ret = 0;
+ cls_rgw_bucket_instance_entry entry;
+
+ // since we want to run this recovery code from two distinct places,
+ // let's just put it in a lambda so we can easily re-use; if the
+ // lambda successfully fetches a new bucket id, it sets
+ // new_bucket_id and returns 0, otherwise it returns a negative
+ // error code
+ auto fetch_new_bucket_id =
+ [this, bucket_info](const std::string& log_tag,
+ std::string* new_bucket_id) -> int {
+ RGWBucketInfo fresh_bucket_info = bucket_info;
+ int ret = try_refresh_bucket_info(fresh_bucket_info, nullptr);
+ if (ret < 0) {
+ ldout(cct, 0) << __func__ <<
+ " ERROR: failed to refresh bucket info after reshard at " <<
+ log_tag << ": " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ *new_bucket_id = fresh_bucket_info.bucket.bucket_id;
+ return 0;
+ };
+
+ constexpr int num_retries = 10;
+ for (int i = 1; i <= num_retries; i++) { // nb: 1-based for loop
+ ret = cls_rgw_get_bucket_resharding(bs->index_ctx, bs->bucket_obj, &entry);
+ if (ret == -ENOENT) {
+ return fetch_new_bucket_id("get_bucket_resharding_failed", new_bucket_id);
+ } else if (ret < 0) {
+ ldout(cct, 0) << __func__ <<
+ " ERROR: failed to get bucket resharding : " << cpp_strerror(-ret) <<
+ dendl;
+ return ret;
+ }
+
+ if (!entry.resharding_in_progress()) {
+ return fetch_new_bucket_id("get_bucket_resharding_succeeded",
+ new_bucket_id);
+ }
+
+ ldout(cct, 20) << "NOTICE: reshard still in progress; " <<
+ (i < num_retries ? "retrying" : "too many retries") << dendl;
+
+ if (i == num_retries) {
+ break;
+ }
+
+ // If bucket is erroneously marked as resharding (e.g., crash or
+ // other error) then fix it. If we can take the bucket reshard
+ // lock then it means no other resharding should be taking place,
+ // and we're free to clear the flags.
+ {
+ // since we expect to do this rarely, we'll do our work in a
+ // block and erase our work after each try
+
+ RGWObjectCtx obj_ctx(this);
+ const rgw_bucket& b = bs->bucket;
+ std::string bucket_id = b.get_key();
+ RGWBucketReshardLock reshard_lock(this, bucket_info, true);
+ ret = reshard_lock.lock();
+ if (ret < 0) {
+ ldout(cct, 20) << __func__ <<
+ " INFO: failed to take reshard lock for bucket " <<
+ bucket_id << "; expected if resharding underway" << dendl;
+ } else {
+ ldout(cct, 10) << __func__ <<
+ " INFO: was able to take reshard lock for bucket " <<
+ bucket_id << dendl;
+ ret = RGWBucketReshard::clear_resharding(this, bucket_info);
+ if (ret < 0) {
+ reshard_lock.unlock();
+ ldout(cct, 0) << __func__ <<
+ " ERROR: failed to clear resharding flags for bucket " <<
+ bucket_id << dendl;
+ } else {
+ reshard_lock.unlock();
+ ldout(cct, 5) << __func__ <<
+ " INFO: apparently successfully cleared resharding flags for "
+ "bucket " << bucket_id << dendl;
+ continue; // if we apparently succeed immediately test again
+ } // if clear resharding succeeded
+ } // if taking of lock succeeded
+ } // block to encapsulate recovery from incomplete reshard
+
+ ret = reshard_wait->wait(y);
+ if (ret < 0) {
+ ldout(cct, 0) << __func__ <<
+ " ERROR: bucket is still resharding, please retry" << dendl;
+ return ret;
+ }
+ } // for loop
+
+ ldout(cct, 0) << __func__ <<
+ " ERROR: bucket is still resharding, please retry" << dendl;
+ return -ERR_BUSY_RESHARDING;
+}
+
+int RGWRados::bucket_index_link_olh(const RGWBucketInfo& bucket_info, RGWObjState& olh_state, const rgw_obj& obj_instance,
+ bool delete_marker,
+ const string& op_tag,
+ struct rgw_bucket_dir_entry_meta *meta,
+ uint64_t olh_epoch,
+ real_time unmod_since, bool high_precision_time,
+ rgw_zone_set *_zones_trace, bool log_data_change)
+{
+ rgw_rados_ref ref;
+ int r = get_obj_head_ref(bucket_info, obj_instance, &ref);
+ if (r < 0) {
+ return r;
+ }
+
+ rgw_zone_set zones_trace;
+ if (_zones_trace) {
+ zones_trace = *_zones_trace;
+ }
+ zones_trace.insert(svc.zone->get_zone().id);
+
+ BucketShard bs(this);
+
+ cls_rgw_obj_key key(obj_instance.key.get_index_key_name(), obj_instance.key.instance);
+ r = guard_reshard(&bs, obj_instance, bucket_info,
+ [&](BucketShard *bs) -> int {
+ librados::ObjectWriteOperation op;
+ cls_rgw_guard_bucket_resharding(op, -ERR_BUSY_RESHARDING);
+ return cls_rgw_bucket_link_olh(bs->index_ctx, op,
+ bs->bucket_obj, key, olh_state.olh_tag, delete_marker, op_tag, meta, olh_epoch,
+ unmod_since, high_precision_time,
+ svc.zone->get_zone().log_data, zones_trace);
+ });
+ if (r < 0) {
+ ldout(cct, 20) << "cls_rgw_bucket_link_olh() returned r=" << r << dendl;
+ return r;
+ }
+
+ if (log_data_change && bucket_info.datasync_flag_enabled()) {
+ data_log->add_entry(bs.bucket, bs.shard_id);
+ }
+
+ return 0;
+}
+
+void RGWRados::bucket_index_guard_olh_op(RGWObjState& olh_state, ObjectOperation& op)
+{
+ ldout(cct, 20) << __func__ << "(): olh_state.olh_tag=" << string(olh_state.olh_tag.c_str(), olh_state.olh_tag.length()) << dendl;
+ op.cmpxattr(RGW_ATTR_OLH_ID_TAG, CEPH_OSD_CMPXATTR_OP_EQ, olh_state.olh_tag);
+}
+
+int RGWRados::bucket_index_unlink_instance(const RGWBucketInfo& bucket_info, const rgw_obj& obj_instance,
+ const string& op_tag, const string& olh_tag, uint64_t olh_epoch, rgw_zone_set *_zones_trace)
+{
+ rgw_rados_ref ref;
+ int r = get_obj_head_ref(bucket_info, obj_instance, &ref);
+ if (r < 0) {
+ return r;
+ }
+
+ rgw_zone_set zones_trace;
+ if (_zones_trace) {
+ zones_trace = *_zones_trace;
+ }
+ zones_trace.insert(svc.zone->get_zone().id);
+
+ BucketShard bs(this);
+
+ cls_rgw_obj_key key(obj_instance.key.get_index_key_name(), obj_instance.key.instance);
+ r = guard_reshard(&bs, obj_instance, bucket_info,
+ [&](BucketShard *bs) -> int {
+ librados::ObjectWriteOperation op;
+ cls_rgw_guard_bucket_resharding(op, -ERR_BUSY_RESHARDING);
+ return cls_rgw_bucket_unlink_instance(bs->index_ctx, op, bs->bucket_obj, key, op_tag,
+ olh_tag, olh_epoch, svc.zone->get_zone().log_data, zones_trace);
+ });
+ if (r < 0) {
+ ldout(cct, 20) << "cls_rgw_bucket_link_olh() returned r=" << r << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+int RGWRados::bucket_index_read_olh_log(const RGWBucketInfo& bucket_info, RGWObjState& state,
+ const rgw_obj& obj_instance, uint64_t ver_marker,
+ map<uint64_t, vector<rgw_bucket_olh_log_entry> > *log,
+ bool *is_truncated)
+{
+ rgw_rados_ref ref;
+ int r = get_obj_head_ref(bucket_info, obj_instance, &ref);
+ if (r < 0) {
+ return r;
+ }
+
+ BucketShard bs(this);
+ int ret =
+ bs.init(obj_instance.bucket, obj_instance, nullptr /* no RGWBucketInfo */);
+ if (ret < 0) {
+ ldout(cct, 5) << "bs.init() returned ret=" << ret << dendl;
+ return ret;
+ }
+
+ string olh_tag(state.olh_tag.c_str(), state.olh_tag.length());
+
+ cls_rgw_obj_key key(obj_instance.key.get_index_key_name(), string());
+
+ ret = guard_reshard(&bs, obj_instance, bucket_info,
+ [&](BucketShard *bs) -> int {
+ ObjectReadOperation op;
+ cls_rgw_guard_bucket_resharding(op, -ERR_BUSY_RESHARDING);
+ return cls_rgw_get_olh_log(bs->index_ctx, bs->bucket_obj, op,
+ key, ver_marker, olh_tag, log, is_truncated);
+ });
+ if (ret < 0) {
+ ldout(cct, 20) << "cls_rgw_get_olh_log() returned r=" << r << dendl;
+ return ret;
+ }
+
+ return 0;
+}
+
+// a multisite sync bug resulted in the OLH head attributes being overwritten by
+// the attributes from another zone, causing link_olh() to fail endlessly due to
+// olh_tag mismatch. this attempts to detect this case and reconstruct the OLH
+// attributes from the bucket index. see http://tracker.ceph.com/issues/37792
+int RGWRados::repair_olh(RGWObjState* state, const RGWBucketInfo& bucket_info,
+ const rgw_obj& obj)
+{
+ // fetch the current olh entry from the bucket index
+ rgw_bucket_olh_entry olh;
+ int r = bi_get_olh(bucket_info, obj, &olh);
+ if (r < 0) {
+ ldout(cct, 0) << "repair_olh failed to read olh entry for " << obj << dendl;
+ return r;
+ }
+ if (olh.tag == rgw_bl_str(state->olh_tag)) { // mismatch already resolved?
+ return 0;
+ }
+
+ ldout(cct, 4) << "repair_olh setting olh_tag=" << olh.tag
+ << " key=" << olh.key << " delete_marker=" << olh.delete_marker << dendl;
+
+ // rewrite OLH_ID_TAG and OLH_INFO from current olh
+ ObjectWriteOperation op;
+ // assert this is the same olh tag we think we're fixing
+ bucket_index_guard_olh_op(*state, op);
+ // preserve existing mtime
+ struct timespec mtime_ts = ceph::real_clock::to_timespec(state->mtime);
+ op.mtime2(&mtime_ts);
+ {
+ bufferlist bl;
+ bl.append(olh.tag.c_str(), olh.tag.size());
+ op.setxattr(RGW_ATTR_OLH_ID_TAG, bl);
+ }
+ {
+ RGWOLHInfo info;
+ info.target = rgw_obj(bucket_info.bucket, olh.key);
+ info.removed = olh.delete_marker;
+ bufferlist bl;
+ encode(info, bl);
+ op.setxattr(RGW_ATTR_OLH_INFO, bl);
+ }
+ rgw_rados_ref ref;
+ r = get_obj_head_ref(bucket_info, obj, &ref);
+ if (r < 0) {
+ return r;
+ }
+ r = ref.ioctx.operate(ref.obj.oid, &op);
+ if (r < 0) {
+ ldout(cct, 0) << "repair_olh failed to write olh attributes with "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+ return 0;
+}
+
+int RGWRados::bucket_index_trim_olh_log(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& obj_instance, uint64_t ver)
+{
+ rgw_rados_ref ref;
+ int r = get_obj_head_ref(bucket_info, obj_instance, &ref);
+ if (r < 0) {
+ return r;
+ }
+
+ BucketShard bs(this);
+ int ret =
+ bs.init(obj_instance.bucket, obj_instance, nullptr /* no RGWBucketInfo */);
+ if (ret < 0) {
+ ldout(cct, 5) << "bs.init() returned ret=" << ret << dendl;
+ return ret;
+ }
+
+ string olh_tag(state.olh_tag.c_str(), state.olh_tag.length());
+
+ cls_rgw_obj_key key(obj_instance.key.get_index_key_name(), string());
+
+ ret = guard_reshard(&bs, obj_instance, bucket_info,
+ [&](BucketShard *pbs) -> int {
+ ObjectWriteOperation op;
+ cls_rgw_guard_bucket_resharding(op, -ERR_BUSY_RESHARDING);
+ cls_rgw_trim_olh_log(op, key, ver, olh_tag);
+ return pbs->index_ctx.operate(pbs->bucket_obj, &op);
+ });
+ if (ret < 0) {
+ ldout(cct, 20) << "cls_rgw_trim_olh_log() returned r=" << ret << dendl;
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWRados::bucket_index_clear_olh(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& obj_instance)
+{
+ rgw_rados_ref ref;
+ int r = get_obj_head_ref(bucket_info, obj_instance, &ref);
+ if (r < 0) {
+ return r;
+ }
+
+ BucketShard bs(this);
+
+ string olh_tag(state.olh_tag.c_str(), state.olh_tag.length());
+
+ cls_rgw_obj_key key(obj_instance.key.get_index_key_name(), string());
+
+ int ret = guard_reshard(&bs, obj_instance, bucket_info,
+ [&](BucketShard *pbs) -> int {
+ ObjectWriteOperation op;
+ cls_rgw_guard_bucket_resharding(op, -ERR_BUSY_RESHARDING);
+ return cls_rgw_clear_olh(pbs->index_ctx, op, pbs->bucket_obj, key, olh_tag);
+ });
+ if (ret < 0) {
+ ldout(cct, 5) << "cls_rgw_clear_olh() returned ret=" << ret << dendl;
+ return ret;
+ }
+
+ return 0;
+}
+
+static int decode_olh_info(CephContext* cct, const bufferlist& bl, RGWOLHInfo *olh)
+{
+ try {
+ auto biter = bl.cbegin();
+ decode(*olh, biter);
+ return 0;
+ } catch (buffer::error& err) {
+ ldout(cct, 0) << "ERROR: failed to decode olh info" << dendl;
+ return -EIO;
+ }
+}
+
+int RGWRados::apply_olh_log(RGWObjectCtx& obj_ctx, RGWObjState& state, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
+ bufferlist& olh_tag, map<uint64_t, vector<rgw_bucket_olh_log_entry> >& log,
+ uint64_t *plast_ver, rgw_zone_set* zones_trace)
+{
+ if (log.empty()) {
+ return 0;
+ }
+
+ librados::ObjectWriteOperation op;
+
+ uint64_t last_ver = log.rbegin()->first;
+ *plast_ver = last_ver;
+
+ map<uint64_t, vector<rgw_bucket_olh_log_entry> >::iterator iter = log.begin();
+
+ op.cmpxattr(RGW_ATTR_OLH_ID_TAG, CEPH_OSD_CMPXATTR_OP_EQ, olh_tag);
+ op.cmpxattr(RGW_ATTR_OLH_VER, CEPH_OSD_CMPXATTR_OP_GTE, last_ver);
+
+ bufferlist ver_bl;
+ string last_ver_s = to_string(last_ver);
+ ver_bl.append(last_ver_s.c_str(), last_ver_s.size());
+ op.setxattr(RGW_ATTR_OLH_VER, ver_bl);
+
+ struct timespec mtime_ts = real_clock::to_timespec(state.mtime);
+ op.mtime2(&mtime_ts);
+
+ bool need_to_link = false;
+ uint64_t link_epoch = 0;
+ cls_rgw_obj_key key;
+ bool delete_marker = false;
+ list<cls_rgw_obj_key> remove_instances;
+ bool need_to_remove = false;
+
+ // decode current epoch and instance
+ auto olh_ver = state.attrset.find(RGW_ATTR_OLH_VER);
+ if (olh_ver != state.attrset.end()) {
+ std::string str = olh_ver->second.to_str();
+ std::string err;
+ link_epoch = strict_strtoll(str.c_str(), 10, &err);
+ }
+ auto olh_info = state.attrset.find(RGW_ATTR_OLH_INFO);
+ if (olh_info != state.attrset.end()) {
+ RGWOLHInfo info;
+ int r = decode_olh_info(cct, olh_info->second, &info);
+ if (r < 0) {
+ return r;
+ }
+ info.target.key.get_index_key(&key);
+ delete_marker = info.removed;
+ }
+
+ for (iter = log.begin(); iter != log.end(); ++iter) {
+ vector<rgw_bucket_olh_log_entry>::iterator viter = iter->second.begin();
+ for (; viter != iter->second.end(); ++viter) {
+ rgw_bucket_olh_log_entry& entry = *viter;
+
+ ldout(cct, 20) << "olh_log_entry: epoch=" << iter->first << " op=" << (int)entry.op
+ << " key=" << entry.key.name << "[" << entry.key.instance << "] "
+ << (entry.delete_marker ? "(delete)" : "") << dendl;
+ switch (entry.op) {
+ case CLS_RGW_OLH_OP_REMOVE_INSTANCE:
+ remove_instances.push_back(entry.key);
+ break;
+ case CLS_RGW_OLH_OP_LINK_OLH:
+ // only overwrite a link of the same epoch if its key sorts before
+ if (link_epoch < iter->first || key.instance.empty() ||
+ key.instance > entry.key.instance) {
+ ldout(cct, 20) << "apply_olh_log applying key=" << entry.key << " epoch=" << iter->first << " delete_marker=" << entry.delete_marker
+ << " over current=" << key << " epoch=" << link_epoch << " delete_marker=" << delete_marker << dendl;
+ need_to_link = true;
+ need_to_remove = false;
+ key = entry.key;
+ delete_marker = entry.delete_marker;
+ } else {
+ ldout(cct, 20) << "apply_olh skipping key=" << entry.key<< " epoch=" << iter->first << " delete_marker=" << entry.delete_marker
+ << " before current=" << key << " epoch=" << link_epoch << " delete_marker=" << delete_marker << dendl;
+ }
+ break;
+ case CLS_RGW_OLH_OP_UNLINK_OLH:
+ need_to_remove = true;
+ need_to_link = false;
+ break;
+ default:
+ ldout(cct, 0) << "ERROR: apply_olh_log: invalid op: " << (int)entry.op << dendl;
+ return -EIO;
+ }
+ string attr_name = RGW_ATTR_OLH_PENDING_PREFIX;
+ attr_name.append(entry.op_tag);
+ op.rmxattr(attr_name.c_str());
+ }
+ }
+
+ rgw_rados_ref ref;
+ int r = get_obj_head_ref(bucket_info, obj, &ref);
+ if (r < 0) {
+ return r;
+ }
+
+ const rgw_bucket& bucket = obj.bucket;
+
+ if (need_to_link) {
+ rgw_obj target(bucket, key);
+ RGWOLHInfo info;
+ info.target = target;
+ info.removed = delete_marker;
+ bufferlist bl;
+ encode(info, bl);
+ op.setxattr(RGW_ATTR_OLH_INFO, bl);
+ }
+
+ /* first remove object instances */
+ for (list<cls_rgw_obj_key>::iterator liter = remove_instances.begin();
+ liter != remove_instances.end(); ++liter) {
+ cls_rgw_obj_key& key = *liter;
+ rgw_obj obj_instance(bucket, key);
+ int ret = delete_obj(obj_ctx, bucket_info, obj_instance, 0, RGW_BILOG_FLAG_VERSIONED_OP, ceph::real_time(), zones_trace);
+ if (ret < 0 && ret != -ENOENT) {
+ ldout(cct, 0) << "ERROR: delete_obj() returned " << ret << " obj_instance=" << obj_instance << dendl;
+ return ret;
+ }
+ }
+
+ /* update olh object */
+ r = ref.ioctx.operate(ref.obj.oid, &op);
+ if (r == -ECANCELED) {
+ r = 0;
+ }
+ if (r < 0) {
+ ldout(cct, 0) << "ERROR: could not apply olh update, r=" << r << dendl;
+ return r;
+ }
+
+ r = bucket_index_trim_olh_log(bucket_info, state, obj, last_ver);
+ if (r < 0) {
+ ldout(cct, 0) << "ERROR: could not trim olh log, r=" << r << dendl;
+ return r;
+ }
+
+ if (need_to_remove) {
+ ObjectWriteOperation rm_op;
+
+ rm_op.cmpxattr(RGW_ATTR_OLH_ID_TAG, CEPH_OSD_CMPXATTR_OP_EQ, olh_tag);
+ rm_op.cmpxattr(RGW_ATTR_OLH_VER, CEPH_OSD_CMPXATTR_OP_EQ, last_ver);
+ cls_obj_check_prefix_exist(rm_op, RGW_ATTR_OLH_PENDING_PREFIX, true); /* fail if found one of these, pending modification */
+ rm_op.remove();
+
+ r = ref.ioctx.operate(ref.obj.oid, &rm_op);
+ if (r == -ECANCELED) {
+ return 0; /* someone else won this race */
+ } else {
+ /*
+ * only clear if was successful, otherwise we might clobber pending operations on this object
+ */
+ r = bucket_index_clear_olh(bucket_info, state, obj);
+ if (r < 0) {
+ ldout(cct, 0) << "ERROR: could not clear bucket index olh entries r=" << r << dendl;
+ return r;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * read olh log and apply it
+ */
+int RGWRados::update_olh(RGWObjectCtx& obj_ctx, RGWObjState *state, const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_zone_set *zones_trace)
+{
+ map<uint64_t, vector<rgw_bucket_olh_log_entry> > log;
+ bool is_truncated;
+ uint64_t ver_marker = 0;
+
+ do {
+ int ret = bucket_index_read_olh_log(bucket_info, *state, obj, ver_marker, &log, &is_truncated);
+ if (ret < 0) {
+ return ret;
+ }
+ ret = apply_olh_log(obj_ctx, *state, bucket_info, obj, state->olh_tag, log, &ver_marker, zones_trace);
+ if (ret < 0) {
+ return ret;
+ }
+ } while (is_truncated);
+
+ return 0;
+}
+
+int RGWRados::set_olh(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, const rgw_obj& target_obj, bool delete_marker, rgw_bucket_dir_entry_meta *meta,
+ uint64_t olh_epoch, real_time unmod_since, bool high_precision_time,
+ rgw_zone_set *zones_trace, bool log_data_change)
+{
+ string op_tag;
+
+ rgw_obj olh_obj = target_obj;
+ olh_obj.key.instance.clear();
+
+ RGWObjState *state = NULL;
+
+ int ret = 0;
+ int i;
+
+#define MAX_ECANCELED_RETRY 100
+ for (i = 0; i < MAX_ECANCELED_RETRY; i++) {
+ if (ret == -ECANCELED) {
+ obj_ctx.invalidate(olh_obj);
+ }
+
+ ret = get_obj_state(&obj_ctx, bucket_info, olh_obj, &state, false); /* don't follow olh */
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = olh_init_modification(bucket_info, *state, olh_obj, &op_tag);
+ if (ret < 0) {
+ ldout(cct, 20) << "olh_init_modification() target_obj=" << target_obj << " delete_marker=" << (int)delete_marker << " returned " << ret << dendl;
+ if (ret == -ECANCELED) {
+ continue;
+ }
+ return ret;
+ }
+ ret = bucket_index_link_olh(bucket_info, *state, target_obj, delete_marker,
+ op_tag, meta, olh_epoch, unmod_since, high_precision_time,
+ zones_trace, log_data_change);
+ if (ret < 0) {
+ ldout(cct, 20) << "bucket_index_link_olh() target_obj=" << target_obj << " delete_marker=" << (int)delete_marker << " returned " << ret << dendl;
+ if (ret == -ECANCELED) {
+ // the bucket index rejected the link_olh() due to olh tag mismatch;
+ // attempt to reconstruct olh head attributes based on the bucket index
+ int r2 = repair_olh(state, bucket_info, olh_obj);
+ if (r2 < 0 && r2 != -ECANCELED) {
+ return r2;
+ }
+ continue;
+ }
+ return ret;
+ }
+ break;
+ }
+
+ if (i == MAX_ECANCELED_RETRY) {
+ ldout(cct, 0) << "ERROR: exceeded max ECANCELED retries, aborting (EIO)" << dendl;
+ return -EIO;
+ }
+
+ ret = update_olh(obj_ctx, state, bucket_info, olh_obj);
+ if (ret == -ECANCELED) { /* already did what we needed, no need to retry, raced with another user */
+ ret = 0;
+ }
+ if (ret < 0) {
+ ldout(cct, 20) << "update_olh() target_obj=" << target_obj << " returned " << ret << dendl;
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWRados::unlink_obj_instance(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, const rgw_obj& target_obj,
+ uint64_t olh_epoch, rgw_zone_set *zones_trace)
+{
+ string op_tag;
+
+ rgw_obj olh_obj = target_obj;
+ olh_obj.key.instance.clear();
+
+ RGWObjState *state = NULL;
+
+ int ret = 0;
+ int i;
+
+ for (i = 0; i < MAX_ECANCELED_RETRY; i++) {
+ if (ret == -ECANCELED) {
+ obj_ctx.invalidate(olh_obj);
+ }
+
+ ret = get_obj_state(&obj_ctx, bucket_info, olh_obj, &state, false); /* don't follow olh */
+ if (ret < 0)
+ return ret;
+
+ ret = olh_init_modification(bucket_info, *state, olh_obj, &op_tag);
+ if (ret < 0) {
+ ldout(cct, 20) << "olh_init_modification() target_obj=" << target_obj << " returned " << ret << dendl;
+ if (ret == -ECANCELED) {
+ continue;
+ }
+ return ret;
+ }
+
+ string olh_tag(state->olh_tag.c_str(), state->olh_tag.length());
+
+ ret = bucket_index_unlink_instance(bucket_info, target_obj, op_tag, olh_tag, olh_epoch, zones_trace);
+ if (ret < 0) {
+ ldout(cct, 20) << "bucket_index_unlink_instance() target_obj=" << target_obj << " returned " << ret << dendl;
+ if (ret == -ECANCELED) {
+ continue;
+ }
+ return ret;
+ }
+ break;
+ }
+
+ if (i == MAX_ECANCELED_RETRY) {
+ ldout(cct, 0) << "ERROR: exceeded max ECANCELED retries, aborting (EIO)" << dendl;
+ return -EIO;
+ }
+
+ ret = update_olh(obj_ctx, state, bucket_info, olh_obj, zones_trace);
+ if (ret == -ECANCELED) { /* already did what we needed, no need to retry, raced with another user */
+ return 0;
+ }
+ if (ret < 0) {
+ ldout(cct, 20) << "update_olh() target_obj=" << target_obj << " returned " << ret << dendl;
+ return ret;
+ }
+
+ return 0;
+}
+
+void RGWRados::gen_rand_obj_instance_name(rgw_obj_key *target_key)
+{
+#define OBJ_INSTANCE_LEN 32
+ char buf[OBJ_INSTANCE_LEN + 1];
+
+ gen_rand_alphanumeric_no_underscore(cct, buf, OBJ_INSTANCE_LEN); /* don't want it to get url escaped,
+ no underscore for instance name due to the way we encode the raw keys */
+
+ target_key->set_instance(buf);
+}
+
+void RGWRados::gen_rand_obj_instance_name(rgw_obj *target_obj)
+{
+ gen_rand_obj_instance_name(&target_obj->key);
+}
+
+int RGWRados::get_olh(const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWOLHInfo *olh)
+{
+ map<string, bufferlist> attrset;
+
+ ObjectReadOperation op;
+ op.getxattrs(&attrset, NULL);
+
+ int r = obj_operate(bucket_info, obj, &op);
+ if (r < 0) {
+ return r;
+ }
+
+ auto iter = attrset.find(RGW_ATTR_OLH_INFO);
+ if (iter == attrset.end()) { /* not an olh */
+ return -EINVAL;
+ }
+
+ return decode_olh_info(cct, iter->second, olh);
+}
+
+void RGWRados::check_pending_olh_entries(map<string, bufferlist>& pending_entries,
+ map<string, bufferlist> *rm_pending_entries)
+{
+ map<string, bufferlist>::iterator iter = pending_entries.begin();
+
+ real_time now = real_clock::now();
+
+ while (iter != pending_entries.end()) {
+ auto biter = iter->second.cbegin();
+ RGWOLHPendingInfo pending_info;
+ try {
+ decode(pending_info, biter);
+ } catch (buffer::error& err) {
+ /* skipping bad entry, we could remove it but it might hide a bug */
+ ldout(cct, 0) << "ERROR: failed to decode pending entry " << iter->first << dendl;
+ ++iter;
+ continue;
+ }
+
+ map<string, bufferlist>::iterator cur_iter = iter;
+ ++iter;
+ if (now - pending_info.time >= make_timespan(cct->_conf->rgw_olh_pending_timeout_sec)) {
+ (*rm_pending_entries)[cur_iter->first] = cur_iter->second;
+ pending_entries.erase(cur_iter);
+ } else {
+ /* entries names are sorted by time (rounded to a second) */
+ break;
+ }
+ }
+}
+
+int RGWRados::remove_olh_pending_entries(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, map<string, bufferlist>& pending_attrs)
+{
+ rgw_rados_ref ref;
+ int r = get_obj_head_ref(bucket_info, olh_obj, &ref);
+ if (r < 0) {
+ return r;
+ }
+
+ // trim no more than 1000 entries per osd op
+ constexpr int max_entries = 1000;
+
+ auto i = pending_attrs.begin();
+ while (i != pending_attrs.end()) {
+ ObjectWriteOperation op;
+ bucket_index_guard_olh_op(state, op);
+
+ for (int n = 0; n < max_entries && i != pending_attrs.end(); ++n, ++i) {
+ op.rmxattr(i->first.c_str());
+ }
+
+ r = ref.ioctx.operate(ref.obj.oid, &op);
+ if (r == -ENOENT || r == -ECANCELED) {
+ /* raced with some other change, shouldn't sweat about it */
+ return 0;
+ }
+ if (r < 0) {
+ ldout(cct, 0) << "ERROR: could not apply olh update, r=" << r << dendl;
+ return r;
+ }
+ }
+ return 0;
+}
+
+int RGWRados::follow_olh(const RGWBucketInfo& bucket_info, RGWObjectCtx& obj_ctx, RGWObjState *state, const rgw_obj& olh_obj, rgw_obj *target)
+{
+ map<string, bufferlist> pending_entries;
+ rgw_filter_attrset(state->attrset, RGW_ATTR_OLH_PENDING_PREFIX, &pending_entries);
+
+ map<string, bufferlist> rm_pending_entries;
+ check_pending_olh_entries(pending_entries, &rm_pending_entries);
+
+ if (!rm_pending_entries.empty()) {
+ int ret = remove_olh_pending_entries(bucket_info, *state, olh_obj, rm_pending_entries);
+ if (ret < 0) {
+ ldout(cct, 20) << "ERROR: rm_pending_entries returned ret=" << ret << dendl;
+ return ret;
+ }
+ }
+ if (!pending_entries.empty()) {
+ ldout(cct, 20) << __func__ << "(): found pending entries, need to update_olh() on bucket=" << olh_obj.bucket << dendl;
+
+ int ret = update_olh(obj_ctx, state, bucket_info, olh_obj);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ auto iter = state->attrset.find(RGW_ATTR_OLH_INFO);
+ if (iter == state->attrset.end()) {
+ return -EINVAL;
+ }
+
+ RGWOLHInfo olh;
+ int ret = decode_olh_info(cct, iter->second, &olh);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (olh.removed) {
+ return -ENOENT;
+ }
+
+ *target = olh.target;
+
+ return 0;
+}
+
+int RGWRados::raw_obj_stat(rgw_raw_obj& obj, uint64_t *psize, real_time *pmtime, uint64_t *epoch,
+ map<string, bufferlist> *attrs, bufferlist *first_chunk,
+ RGWObjVersionTracker *objv_tracker)
+{
+ rgw_rados_ref ref;
+ int r = get_raw_obj_ref(obj, &ref);
+ if (r < 0) {
+ return r;
+ }
+
+ map<string, bufferlist> unfiltered_attrset;
+ uint64_t size = 0;
+ struct timespec mtime_ts;
+
+ ObjectReadOperation op;
+ if (objv_tracker) {
+ objv_tracker->prepare_op_for_read(&op);
+ }
+ if (attrs) {
+ op.getxattrs(&unfiltered_attrset, NULL);
+ }
+ if (psize || pmtime) {
+ op.stat2(&size, &mtime_ts, NULL);
+ }
+ if (first_chunk) {
+ op.read(0, cct->_conf->rgw_max_chunk_size, first_chunk, NULL);
+ }
+ bufferlist outbl;
+ r = ref.ioctx.operate(ref.obj.oid, &op, &outbl);
+
+ if (epoch) {
+ *epoch = ref.ioctx.get_last_version();
+ }
+
+ if (r < 0)
+ return r;
+
+ if (psize)
+ *psize = size;
+ if (pmtime)
+ *pmtime = ceph::real_clock::from_timespec(mtime_ts);
+ if (attrs) {
+ rgw_filter_attrset(unfiltered_attrset, RGW_ATTR_PREFIX, attrs);
+ }
+
+ return 0;
+}
+
+int RGWRados::get_bucket_stats(RGWBucketInfo& bucket_info, int shard_id, string *bucket_ver, string *master_ver,
+ map<RGWObjCategory, RGWStorageStats>& stats, string *max_marker, bool *syncstopped)
+{
+ vector<rgw_bucket_dir_header> headers;
+ map<int, string> bucket_instance_ids;
+ int r = cls_bucket_head(bucket_info, shard_id, headers, &bucket_instance_ids);
+ if (r < 0) {
+ return r;
+ }
+
+ ceph_assert(headers.size() == bucket_instance_ids.size());
+
+ auto iter = headers.begin();
+ map<int, string>::iterator viter = bucket_instance_ids.begin();
+ BucketIndexShardsManager ver_mgr;
+ BucketIndexShardsManager master_ver_mgr;
+ BucketIndexShardsManager marker_mgr;
+ char buf[64];
+ for(; iter != headers.end(); ++iter, ++viter) {
+ accumulate_raw_stats(*iter, stats);
+ snprintf(buf, sizeof(buf), "%lu", (unsigned long)iter->ver);
+ ver_mgr.add(viter->first, string(buf));
+ snprintf(buf, sizeof(buf), "%lu", (unsigned long)iter->master_ver);
+ master_ver_mgr.add(viter->first, string(buf));
+ if (shard_id >= 0) {
+ *max_marker = iter->max_marker;
+ } else {
+ marker_mgr.add(viter->first, iter->max_marker);
+ }
+ if (syncstopped != NULL)
+ *syncstopped = iter->syncstopped;
+ }
+ ver_mgr.to_string(bucket_ver);
+ master_ver_mgr.to_string(master_ver);
+ if (shard_id < 0) {
+ marker_mgr.to_string(max_marker);
+ }
+ return 0;
+}
+
+int RGWRados::get_bi_log_status(RGWBucketInfo& bucket_info, int shard_id,
+ map<int, string>& markers)
+{
+ vector<rgw_bucket_dir_header> headers;
+ map<int, string> bucket_instance_ids;
+ int r = cls_bucket_head(bucket_info, shard_id, headers, &bucket_instance_ids);
+ if (r < 0)
+ return r;
+
+ ceph_assert(headers.size() == bucket_instance_ids.size());
+
+ auto iter = headers.begin();
+ map<int, string>::iterator viter = bucket_instance_ids.begin();
+
+ for(; iter != headers.end(); ++iter, ++viter) {
+ if (shard_id >= 0) {
+ markers[shard_id] = iter->max_marker;
+ } else {
+ markers[viter->first] = iter->max_marker;
+ }
+ }
+ return 0;
+}
+
+class RGWGetBucketStatsContext : public RGWGetDirHeader_CB {
+ RGWGetBucketStats_CB *cb;
+ uint32_t pendings;
+ map<RGWObjCategory, RGWStorageStats> stats;
+ int ret_code;
+ bool should_cb;
+ Mutex lock;
+
+public:
+ RGWGetBucketStatsContext(RGWGetBucketStats_CB *_cb, uint32_t _pendings)
+ : cb(_cb), pendings(_pendings), stats(), ret_code(0), should_cb(true),
+ lock("RGWGetBucketStatsContext") {}
+
+ void handle_response(int r, rgw_bucket_dir_header& header) override {
+ Mutex::Locker l(lock);
+ if (should_cb) {
+ if ( r >= 0) {
+ accumulate_raw_stats(header, stats);
+ } else {
+ ret_code = r;
+ }
+
+ // Are we all done?
+ if (--pendings == 0) {
+ if (!ret_code) {
+ cb->set_response(&stats);
+ }
+ cb->handle_response(ret_code);
+ cb->put();
+ }
+ }
+ }
+
+ void unset_cb() {
+ Mutex::Locker l(lock);
+ should_cb = false;
+ }
+};
+
+int RGWRados::get_bucket_stats_async(RGWBucketInfo& bucket_info, int shard_id, RGWGetBucketStats_CB *ctx)
+{
+ int num_aio = 0;
+ RGWGetBucketStatsContext *get_ctx = new RGWGetBucketStatsContext(ctx, bucket_info.num_shards ? : 1);
+ ceph_assert(get_ctx);
+ int r = cls_bucket_head_async(bucket_info, shard_id, get_ctx, &num_aio);
+ if (r < 0) {
+ ctx->put();
+ if (num_aio) {
+ get_ctx->unset_cb();
+ }
+ }
+ get_ctx->put();
+ return r;
+}
+
+class RGWGetUserStatsContext : public RGWGetUserHeader_CB {
+ RGWGetUserStats_CB *cb;
+
+public:
+ explicit RGWGetUserStatsContext(RGWGetUserStats_CB * const cb)
+ : cb(cb) {}
+
+ void handle_response(int r, cls_user_header& header) override {
+ const cls_user_stats& hs = header.stats;
+ if (r >= 0) {
+ RGWStorageStats stats;
+
+ stats.size = hs.total_bytes;
+ stats.size_rounded = hs.total_bytes_rounded;
+ stats.num_objects = hs.total_entries;
+
+ cb->set_response(stats);
+ }
+
+ cb->handle_response(r);
+
+ cb->put();
+ }
+};
+
+int RGWRados::get_user_stats(const rgw_user& user, RGWStorageStats& stats)
+{
+ string user_str = user.to_str();
+
+ cls_user_header header;
+ int r = cls_user_get_header(user_str, &header);
+ if (r < 0)
+ return r;
+
+ const cls_user_stats& hs = header.stats;
+
+ stats.size = hs.total_bytes;
+ stats.size_rounded = hs.total_bytes_rounded;
+ stats.num_objects = hs.total_entries;
+
+ return 0;
+}
+
+int RGWRados::get_user_stats_async(const rgw_user& user, RGWGetUserStats_CB *ctx)
+{
+ string user_str = user.to_str();
+
+ RGWGetUserStatsContext *get_ctx = new RGWGetUserStatsContext(ctx);
+ int r = cls_user_get_header_async(user_str, get_ctx);
+ if (r < 0) {
+ ctx->put();
+ delete get_ctx;
+ return r;
+ }
+
+ return 0;
+}
+
+void RGWRados::get_bucket_meta_oid(const rgw_bucket& bucket, string& oid)
+{
+ oid = RGW_BUCKET_INSTANCE_MD_PREFIX + bucket.get_key(':');
+}
+
+void RGWRados::get_bucket_instance_obj(const rgw_bucket& bucket, rgw_raw_obj& obj)
+{
+ if (!bucket.oid.empty()) {
+ obj.init(svc.zone->get_zone_params().domain_root, bucket.oid);
+ } else {
+ string oid;
+ get_bucket_meta_oid(bucket, oid);
+ obj.init(svc.zone->get_zone_params().domain_root, oid);
+ }
+}
+
+int RGWRados::get_bucket_instance_info(RGWSysObjectCtx& obj_ctx, const string& meta_key, RGWBucketInfo& info,
+ real_time *pmtime, map<string, bufferlist> *pattrs)
+{
+ size_t pos = meta_key.find(':');
+ if (pos == string::npos) {
+ return -EINVAL;
+ }
+ string oid = RGW_BUCKET_INSTANCE_MD_PREFIX + meta_key;
+ rgw_bucket_instance_key_to_oid(oid);
+
+ return get_bucket_instance_from_oid(obj_ctx, oid, info, pmtime, pattrs);
+}
+
+int RGWRados::get_bucket_instance_info(RGWSysObjectCtx& obj_ctx, const rgw_bucket& bucket, RGWBucketInfo& info,
+ real_time *pmtime, map<string, bufferlist> *pattrs)
+{
+ string oid;
+ if (bucket.oid.empty()) {
+ get_bucket_meta_oid(bucket, oid);
+ } else {
+ oid = bucket.oid;
+ }
+
+ return get_bucket_instance_from_oid(obj_ctx, oid, info, pmtime, pattrs);
+}
+
+int RGWRados::get_bucket_instance_from_oid(RGWSysObjectCtx& obj_ctx, const string& oid, RGWBucketInfo& info,
+ real_time *pmtime, map<string, bufferlist> *pattrs,
+ rgw_cache_entry_info *cache_info,
+ boost::optional<obj_version> refresh_version)
+{
+ auto& domain_root = svc.zone->get_zone_params().domain_root;
+
+ ldout(cct, 20) << "reading from " << domain_root << ":" << oid << dendl;
+
+ bufferlist epbl;
+
+ int ret = rgw_get_system_obj(this, obj_ctx, domain_root,
+ oid, epbl, &info.objv_tracker, pmtime, pattrs,
+ cache_info, refresh_version);
+ if (ret < 0) {
+ return ret;
+ }
+
+ auto iter = epbl.cbegin();
+ try {
+ decode(info, iter);
+ } catch (buffer::error& err) {
+ ldout(cct, 0) << "ERROR: could not decode buffer info, caught buffer::error" << dendl;
+ return -EIO;
+ }
+ info.bucket.oid = oid;
+ return 0;
+}
+
+int RGWRados::get_bucket_entrypoint_info(RGWSysObjectCtx& obj_ctx,
+ const string& tenant_name,
+ const string& bucket_name,
+ RGWBucketEntryPoint& entry_point,
+ RGWObjVersionTracker *objv_tracker,
+ real_time *pmtime,
+ map<string, bufferlist> *pattrs,
+ rgw_cache_entry_info *cache_info,
+ boost::optional<obj_version> refresh_version)
+{
+ bufferlist bl;
+ string bucket_entry;
+
+ rgw_make_bucket_entry_name(tenant_name, bucket_name, bucket_entry);
+ int ret = rgw_get_system_obj(this, obj_ctx, svc.zone->get_zone_params().domain_root,
+ bucket_entry, bl, objv_tracker, pmtime, pattrs,
+ cache_info, refresh_version);
+ if (ret < 0) {
+ return ret;
+ }
+
+ auto iter = bl.cbegin();
+ try {
+ decode(entry_point, iter);
+ } catch (buffer::error& err) {
+ ldout(cct, 0) << "ERROR: could not decode buffer info, caught buffer::error" << dendl;
+ return -EIO;
+ }
+ return 0;
+}
+
+int RGWRados::convert_old_bucket_info(RGWSysObjectCtx& obj_ctx,
+ const string& tenant_name,
+ const string& bucket_name)
+{
+ RGWBucketEntryPoint entry_point;
+ real_time ep_mtime;
+ RGWObjVersionTracker ot;
+ map<string, bufferlist> attrs;
+ RGWBucketInfo info;
+
+ ldout(cct, 10) << "RGWRados::convert_old_bucket_info(): bucket=" << bucket_name << dendl;
+
+ int ret = get_bucket_entrypoint_info(obj_ctx, tenant_name, bucket_name, entry_point, &ot, &ep_mtime, &attrs);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: get_bucket_entrypoint_info() returned " << ret << " bucket=" << bucket_name << dendl;
+ return ret;
+ }
+
+ if (!entry_point.has_bucket_info) {
+ /* already converted! */
+ return 0;
+ }
+
+ info = entry_point.old_bucket_info;
+ info.bucket.oid = bucket_name;
+ info.ep_objv = ot.read_version;
+
+ ot.generate_new_write_ver(cct);
+
+ ret = put_linked_bucket_info(info, false, ep_mtime, &ot.write_version, &attrs, true);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: failed to put_linked_bucket_info(): " << ret << dendl;
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWRados::_get_bucket_info(RGWSysObjectCtx& obj_ctx,
+ const string& tenant,
+ const string& bucket_name,
+ RGWBucketInfo& info,
+ real_time *pmtime,
+ map<string, bufferlist> *pattrs,
+ boost::optional<obj_version> refresh_version)
+{
+ string bucket_entry;
+ rgw_make_bucket_entry_name(tenant, bucket_name, bucket_entry);
+
+
+ if (auto e = binfo_cache->find(bucket_entry)) {
+ if (refresh_version &&
+ e->info.objv_tracker.read_version.compare(&(*refresh_version))) {
+ lderr(cct) << "WARNING: The bucket info cache is inconsistent. This is "
+ << "a failure that should be debugged. I am a nice machine, "
+ << "so I will try to recover." << dendl;
+ binfo_cache->invalidate(bucket_entry);
+ } else {
+ info = e->info;
+ if (pattrs)
+ *pattrs = e->attrs;
+ if (pmtime)
+ *pmtime = e->mtime;
+ return 0;
+ }
+ }
+
+ bucket_info_entry e;
+ RGWBucketEntryPoint entry_point;
+ real_time ep_mtime;
+ RGWObjVersionTracker ot;
+ rgw_cache_entry_info entry_cache_info;
+ int ret = get_bucket_entrypoint_info(obj_ctx, tenant, bucket_name,
+ entry_point, &ot, &ep_mtime, pattrs,
+ &entry_cache_info, refresh_version);
+ if (ret < 0) {
+ /* only init these fields */
+ info.bucket.tenant = tenant;
+ info.bucket.name = bucket_name;
+ return ret;
+ }
+
+ if (entry_point.has_bucket_info) {
+ info = entry_point.old_bucket_info;
+ info.bucket.oid = bucket_name;
+ info.bucket.tenant = tenant;
+ info.ep_objv = ot.read_version;
+ ldout(cct, 20) << "rgw_get_bucket_info: old bucket info, bucket=" << info.bucket << " owner " << info.owner << dendl;
+ return 0;
+ }
+
+ /* data is in the bucket instance object, we need to get attributes from there, clear everything
+ * that we got
+ */
+ if (pattrs) {
+ pattrs->clear();
+ }
+
+ ldout(cct, 20) << "rgw_get_bucket_info: bucket instance: " << entry_point.bucket << dendl;
+
+
+ /* read bucket instance info */
+
+ string oid;
+ get_bucket_meta_oid(entry_point.bucket, oid);
+
+ rgw_cache_entry_info cache_info;
+
+ ret = get_bucket_instance_from_oid(obj_ctx, oid, e.info, &e.mtime, &e.attrs,
+ &cache_info, refresh_version);
+ e.info.ep_objv = ot.read_version;
+ info = e.info;
+ if (ret < 0) {
+ lderr(cct) << "ERROR: get_bucket_instance_from_oid failed: " << ret << dendl;
+ info.bucket.tenant = tenant;
+ info.bucket.name = bucket_name;
+ // XXX and why return anything in case of an error anyway?
+ return ret;
+ }
+
+ if (pmtime)
+ *pmtime = e.mtime;
+ if (pattrs)
+ *pattrs = e.attrs;
+
+ /* chain to both bucket entry point and bucket instance */
+ if (!binfo_cache->put(svc.cache, bucket_entry, &e, {&entry_cache_info, &cache_info})) {
+ ldout(cct, 20) << "couldn't put binfo cache entry, might have raced with data changes" << dendl;
+ }
+
+ if (refresh_version &&
+ refresh_version->compare(&info.objv_tracker.read_version)) {
+ lderr(cct) << "WARNING: The OSD has the same version I have. Something may "
+ << "have gone squirrelly. An administrator may have forced a "
+ << "change; otherwise there is a problem somewhere." << dendl;
+ }
+
+ return 0;
+}
+
+int RGWRados::get_bucket_info(RGWSysObjectCtx& obj_ctx,
+ const string& tenant, const string& bucket_name,
+ RGWBucketInfo& info,
+ real_time *pmtime, map<string, bufferlist> *pattrs)
+{
+ return _get_bucket_info(obj_ctx, tenant, bucket_name, info, pmtime,
+ pattrs, boost::none);
+}
+
+int RGWRados::try_refresh_bucket_info(RGWBucketInfo& info,
+ ceph::real_time *pmtime,
+ map<string, bufferlist> *pattrs)
+{
+ RGWSysObjectCtx obj_ctx = svc.sysobj->init_obj_ctx();
+
+ return _get_bucket_info(obj_ctx, info.bucket.tenant, info.bucket.name,
+ info, pmtime, pattrs, info.objv_tracker.read_version);
+}
+
+int RGWRados::put_bucket_entrypoint_info(const string& tenant_name, const string& bucket_name, RGWBucketEntryPoint& entry_point,
+ bool exclusive, RGWObjVersionTracker& objv_tracker, real_time mtime,
+ map<string, bufferlist> *pattrs)
+{
+ bufferlist epbl;
+ encode(entry_point, epbl);
+ string bucket_entry;
+ rgw_make_bucket_entry_name(tenant_name, bucket_name, bucket_entry);
+ return rgw_bucket_store_info(this, bucket_entry, epbl, exclusive, pattrs, &objv_tracker, mtime);
+}
+
+int RGWRados::put_bucket_instance_info(RGWBucketInfo& info, bool exclusive,
+ real_time mtime, map<string, bufferlist> *pattrs)
+{
+ info.has_instance_obj = true;
+ bufferlist bl;
+
+ encode(info, bl);
+
+ string key = info.bucket.get_key(); /* when we go through meta api, we don't use oid directly */
+ int ret = rgw_bucket_instance_store_info(this, key, bl, exclusive, pattrs, &info.objv_tracker, mtime);
+ if (ret == -EEXIST) {
+ /* well, if it's exclusive we shouldn't overwrite it, because we might race with another
+ * bucket operation on this specific bucket (e.g., being synced from the master), but
+ * since bucket instace meta object is unique for this specific bucket instace, we don't
+ * need to return an error.
+ * A scenario where we'd get -EEXIST here, is in a multi-zone config, we're not on the
+ * master, creating a bucket, sending bucket creation to the master, we create the bucket
+ * locally, while in the sync thread we sync the new bucket.
+ */
+ ret = 0;
+ }
+ return ret;
+}
+
+int RGWRados::put_linked_bucket_info(RGWBucketInfo& info, bool exclusive, real_time mtime, obj_version *pep_objv,
+ map<string, bufferlist> *pattrs, bool create_entry_point)
+{
+ bool create_head = !info.has_instance_obj || create_entry_point;
+
+ int ret = put_bucket_instance_info(info, exclusive, mtime, pattrs);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (!create_head)
+ return 0; /* done! */
+
+ RGWBucketEntryPoint entry_point;
+ entry_point.bucket = info.bucket;
+ entry_point.owner = info.owner;
+ entry_point.creation_time = info.creation_time;
+ entry_point.linked = true;
+ RGWObjVersionTracker ot;
+ if (pep_objv && !pep_objv->tag.empty()) {
+ ot.write_version = *pep_objv;
+ } else {
+ ot.generate_new_write_ver(cct);
+ if (pep_objv) {
+ *pep_objv = ot.write_version;
+ }
+ }
+ ret = put_bucket_entrypoint_info(info.bucket.tenant, info.bucket.name, entry_point, exclusive, ot, mtime, NULL);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+int RGWRados::update_containers_stats(map<string, RGWBucketEnt>& m)
+{
+ auto obj_ctx = svc.sysobj->init_obj_ctx();
+
+ map<string, RGWBucketEnt>::iterator iter;
+ for (iter = m.begin(); iter != m.end(); ++iter) {
+ RGWBucketEnt& ent = iter->second;
+ rgw_bucket& bucket = ent.bucket;
+ ent.count = 0;
+ ent.size = 0;
+ ent.size_rounded = 0;
+
+ vector<rgw_bucket_dir_header> headers;
+
+ RGWBucketInfo bucket_info;
+ int ret = get_bucket_instance_info(obj_ctx, bucket, bucket_info, NULL, NULL);
+ if (ret < 0) {
+ return ret;
+ }
+
+ int r = cls_bucket_head(bucket_info, RGW_NO_SHARD, headers);
+ if (r < 0)
+ return r;
+
+ auto hiter = headers.begin();
+ for (; hiter != headers.end(); ++hiter) {
+ RGWObjCategory category = main_category;
+ auto iter = (hiter->stats).find(category);
+ if (iter != hiter->stats.end()) {
+ struct rgw_bucket_category_stats& stats = iter->second;
+ ent.count += stats.num_entries;
+ ent.size += stats.total_size;
+ ent.size_rounded += stats.total_size_rounded;
+ }
+ }
+
+ // fill in placement_rule from the bucket instance for use in swift's
+ // per-storage policy statistics
+ ent.placement_rule = std::move(bucket_info.placement_rule);
+ }
+
+ return m.size();
+}
+
+int RGWRados::append_async(rgw_raw_obj& obj, size_t size, bufferlist& bl)
+{
+ rgw_rados_ref ref;
+ int r = get_raw_obj_ref(obj, &ref);
+ if (r < 0) {
+ return r;
+ }
+ librados::Rados *rad = get_rados_handle();
+ librados::AioCompletion *completion = rad->aio_create_completion(NULL, NULL, NULL);
+
+ r = ref.ioctx.aio_append(ref.obj.oid, completion, bl, size);
+ completion->release();
+ return r;
+}
+
+int RGWRados::pool_iterate_begin(const rgw_pool& pool, RGWPoolIterCtx& ctx)
+{
+ librados::IoCtx& io_ctx = ctx.io_ctx;
+ librados::NObjectIterator& iter = ctx.iter;
+
+ int r = open_pool_ctx(pool, io_ctx, false);
+ if (r < 0)
+ return r;
+
+ iter = io_ctx.nobjects_begin();
+
+ return 0;
+}
+
+int RGWRados::pool_iterate_begin(const rgw_pool& pool, const string& cursor, RGWPoolIterCtx& ctx)
+{
+ librados::IoCtx& io_ctx = ctx.io_ctx;
+ librados::NObjectIterator& iter = ctx.iter;
+
+ int r = open_pool_ctx(pool, io_ctx, false);
+ if (r < 0)
+ return r;
+
+ librados::ObjectCursor oc;
+ if (!oc.from_str(cursor)) {
+ ldout(cct, 10) << "failed to parse cursor: " << cursor << dendl;
+ return -EINVAL;
+ }
+
+ try {
+ iter = io_ctx.nobjects_begin(oc);
+ return 0;
+ } catch (const std::system_error& e) {
+ r = -e.code().value();
+ ldout(cct, 10) << "nobjects_begin threw " << e.what()
+ << ", returning " << r << dendl;
+ return r;
+ } catch (const std::exception& e) {
+ ldout(cct, 10) << "nobjects_begin threw " << e.what()
+ << ", returning -5" << dendl;
+ return -EIO;
+ }
+}
+
+string RGWRados::pool_iterate_get_cursor(RGWPoolIterCtx& ctx)
+{
+ return ctx.iter.get_cursor().to_str();
+}
+
+static int do_pool_iterate(CephContext* cct, RGWPoolIterCtx& ctx, uint32_t num,
+ vector<rgw_bucket_dir_entry>& objs,
+ bool *is_truncated, RGWAccessListFilter *filter)
+{
+ librados::IoCtx& io_ctx = ctx.io_ctx;
+ librados::NObjectIterator& iter = ctx.iter;
+
+ if (iter == io_ctx.nobjects_end())
+ return -ENOENT;
+
+ uint32_t i;
+
+ for (i = 0; i < num && iter != io_ctx.nobjects_end(); ++i, ++iter) {
+ rgw_bucket_dir_entry e;
+
+ string oid = iter->get_oid();
+ ldout(cct, 20) << "RGWRados::pool_iterate: got " << oid << dendl;
+
+ // fill it in with initial values; we may correct later
+ if (filter && !filter->filter(oid, oid))
+ continue;
+
+ e.key = oid;
+ objs.push_back(e);
+ }
+
+ if (is_truncated)
+ *is_truncated = (iter != io_ctx.nobjects_end());
+
+ return objs.size();
+}
+
+int RGWRados::pool_iterate(RGWPoolIterCtx& ctx, uint32_t num, vector<rgw_bucket_dir_entry>& objs,
+ bool *is_truncated, RGWAccessListFilter *filter)
+{
+ // catch exceptions from NObjectIterator::operator++()
+ try {
+ return do_pool_iterate(cct, ctx, num, objs, is_truncated, filter);
+ } catch (const std::system_error& e) {
+ int r = -e.code().value();
+ ldout(cct, 10) << "NObjectIterator threw exception " << e.what()
+ << ", returning " << r << dendl;
+ return r;
+ } catch (const std::exception& e) {
+ ldout(cct, 10) << "NObjectIterator threw exception " << e.what()
+ << ", returning -5" << dendl;
+ return -EIO;
+ }
+}
+
+int RGWRados::list_raw_objects_init(const rgw_pool& pool, const string& marker, RGWListRawObjsCtx *ctx)
+{
+ if (!ctx->initialized) {
+ int r = pool_iterate_begin(pool, marker, ctx->iter_ctx);
+ if (r < 0) {
+ ldout(cct, 10) << "failed to list objects pool_iterate_begin() returned r=" << r << dendl;
+ return r;
+ }
+ ctx->initialized = true;
+ }
+ return 0;
+}
+
+int RGWRados::list_raw_objects_next(const string& prefix_filter, int max,
+ RGWListRawObjsCtx& ctx, list<string>& oids,
+ bool *is_truncated)
+{
+ if (!ctx.initialized) {
+ return -EINVAL;
+ }
+ RGWAccessListFilterPrefix filter(prefix_filter);
+ vector<rgw_bucket_dir_entry> objs;
+ int r = pool_iterate(ctx.iter_ctx, max, objs, is_truncated, &filter);
+ if (r < 0) {
+ if(r != -ENOENT)
+ ldout(cct, 10) << "failed to list objects pool_iterate returned r=" << r << dendl;
+ return r;
+ }
+
+ vector<rgw_bucket_dir_entry>::iterator iter;
+ for (iter = objs.begin(); iter != objs.end(); ++iter) {
+ oids.push_back(iter->key.name);
+ }
+
+ return oids.size();
+}
+
+int RGWRados::list_raw_objects(const rgw_pool& pool, const string& prefix_filter,
+ int max, RGWListRawObjsCtx& ctx, list<string>& oids,
+ bool *is_truncated)
+{
+ if (!ctx.initialized) {
+ int r = list_raw_objects_init(pool, string(), &ctx);
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ return list_raw_objects_next(prefix_filter, max, ctx, oids, is_truncated);
+}
+
+string RGWRados::list_raw_objs_get_cursor(RGWListRawObjsCtx& ctx)
+{
+ return pool_iterate_get_cursor(ctx.iter_ctx);
+}
+
+int RGWRados::list_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id, string& marker, uint32_t max,
+ std::list<rgw_bi_log_entry>& result, bool *truncated)
+{
+ ldout(cct, 20) << __func__ << ": " << bucket_info.bucket << " marker " << marker << " shard_id=" << shard_id << " max " << max << dendl;
+ result.clear();
+
+ librados::IoCtx index_ctx;
+ map<int, string> oids;
+ map<int, cls_rgw_bi_log_list_ret> bi_log_lists;
+ int r = open_bucket_index(bucket_info, index_ctx, oids, shard_id);
+ if (r < 0)
+ return r;
+
+ BucketIndexShardsManager marker_mgr;
+ bool has_shards = (oids.size() > 1 || shard_id >= 0);
+ // If there are multiple shards for the bucket index object, the marker
+ // should have the pattern '{shard_id_1}#{shard_marker_1},{shard_id_2}#
+ // {shard_marker_2}...', if there is no sharding, the bi_log_list should
+ // only contain one record, and the key is the bucket instance id.
+ r = marker_mgr.from_string(marker, shard_id);
+ if (r < 0)
+ return r;
+
+ r = CLSRGWIssueBILogList(index_ctx, marker_mgr, max, oids, bi_log_lists, cct->_conf->rgw_bucket_index_max_aio)();
+ if (r < 0)
+ return r;
+
+ map<int, list<rgw_bi_log_entry>::iterator> vcurrents;
+ map<int, list<rgw_bi_log_entry>::iterator> vends;
+ if (truncated) {
+ *truncated = false;
+ }
+ map<int, cls_rgw_bi_log_list_ret>::iterator miter = bi_log_lists.begin();
+ for (; miter != bi_log_lists.end(); ++miter) {
+ int shard_id = miter->first;
+ vcurrents[shard_id] = miter->second.entries.begin();
+ vends[shard_id] = miter->second.entries.end();
+ if (truncated) {
+ *truncated = (*truncated || miter->second.truncated);
+ }
+ }
+
+ size_t total = 0;
+ bool has_more = true;
+ map<int, list<rgw_bi_log_entry>::iterator>::iterator viter;
+ map<int, list<rgw_bi_log_entry>::iterator>::iterator eiter;
+ while (total < max && has_more) {
+ has_more = false;
+
+ viter = vcurrents.begin();
+ eiter = vends.begin();
+
+ for (; total < max && viter != vcurrents.end(); ++viter, ++eiter) {
+ assert (eiter != vends.end());
+
+ int shard_id = viter->first;
+ list<rgw_bi_log_entry>::iterator& liter = viter->second;
+
+ if (liter == eiter->second){
+ continue;
+ }
+ rgw_bi_log_entry& entry = *(liter);
+ if (has_shards) {
+ char buf[16];
+ snprintf(buf, sizeof(buf), "%d", shard_id);
+ string tmp_id;
+ build_bucket_index_marker(buf, entry.id, &tmp_id);
+ entry.id.swap(tmp_id);
+ }
+ marker_mgr.add(shard_id, entry.id);
+ result.push_back(entry);
+ total++;
+ has_more = true;
+ ++liter;
+ }
+ }
+
+ if (truncated) {
+ for (viter = vcurrents.begin(), eiter = vends.begin(); viter != vcurrents.end(); ++viter, ++eiter) {
+ assert (eiter != vends.end());
+ *truncated = (*truncated || (viter->second != eiter->second));
+ }
+ }
+
+ // Refresh marker, if there are multiple shards, the output will look like
+ // '{shard_oid_1}#{shard_marker_1},{shard_oid_2}#{shard_marker_2}...',
+ // if there is no sharding, the simply marker (without oid) is returned
+ if (has_shards) {
+ marker_mgr.to_string(&marker);
+ } else {
+ if (!result.empty()) {
+ marker = result.rbegin()->id;
+ }
+ }
+
+ return 0;
+}
+
+int RGWRados::trim_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id, string& start_marker, string& end_marker)
+{
+ librados::IoCtx index_ctx;
+ map<int, string> bucket_objs;
+
+ BucketIndexShardsManager start_marker_mgr;
+ BucketIndexShardsManager end_marker_mgr;
+
+ int r = open_bucket_index(bucket_info, index_ctx, bucket_objs, shard_id);
+ if (r < 0) {
+ return r;
+ }
+
+ r = start_marker_mgr.from_string(start_marker, shard_id);
+ if (r < 0) {
+ return r;
+ }
+
+ r = end_marker_mgr.from_string(end_marker, shard_id);
+ if (r < 0) {
+ return r;
+ }
+
+ return CLSRGWIssueBILogTrim(index_ctx, start_marker_mgr, end_marker_mgr, bucket_objs,
+ cct->_conf->rgw_bucket_index_max_aio)();
+}
+
+int RGWRados::resync_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id)
+{
+ librados::IoCtx index_ctx;
+ map<int, string> bucket_objs;
+ int r = open_bucket_index(bucket_info, index_ctx, bucket_objs, shard_id);
+ if (r < 0)
+ return r;
+
+ return CLSRGWIssueResyncBucketBILog(index_ctx, bucket_objs, cct->_conf->rgw_bucket_index_max_aio)();
+}
+
+int RGWRados::stop_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id)
+{
+ librados::IoCtx index_ctx;
+ map<int, string> bucket_objs;
+ int r = open_bucket_index(bucket_info, index_ctx, bucket_objs, shard_id);
+ if (r < 0)
+ return r;
+
+ return CLSRGWIssueBucketBILogStop(index_ctx, bucket_objs, cct->_conf->rgw_bucket_index_max_aio)();
+}
+
+int RGWRados::bi_get_instance(const RGWBucketInfo& bucket_info, const rgw_obj& obj,
+ rgw_bucket_dir_entry *dirent)
+{
+ rgw_cls_bi_entry bi_entry;
+ int r = bi_get(bucket_info, obj, BIIndexType::Instance, &bi_entry);
+ if (r < 0 && r != -ENOENT) {
+ ldout(cct, 0) << "ERROR: bi_get() returned r=" << r << dendl;
+ }
+ if (r < 0) {
+ return r;
+ }
+ auto iter = bi_entry.data.cbegin();
+ try {
+ decode(*dirent, iter);
+ } catch (buffer::error& err) {
+ ldout(cct, 0) << "ERROR: failed to decode bi_entry()" << dendl;
+ return -EIO;
+ }
+
+ return 0;
+}
+
+int RGWRados::bi_get_olh(const RGWBucketInfo& bucket_info, const rgw_obj& obj,
+ rgw_bucket_olh_entry *olh)
+{
+ rgw_cls_bi_entry bi_entry;
+ int r = bi_get(bucket_info, obj, BIIndexType::OLH, &bi_entry);
+ if (r < 0 && r != -ENOENT) {
+ ldout(cct, 0) << "ERROR: bi_get() returned r=" << r << dendl;
+ }
+ if (r < 0) {
+ return r;
+ }
+ auto iter = bi_entry.data.cbegin();
+ try {
+ decode(*olh, iter);
+ } catch (buffer::error& err) {
+ ldout(cct, 0) << "ERROR: failed to decode bi_entry()" << dendl;
+ return -EIO;
+ }
+
+ return 0;
+}
+
+int RGWRados::bi_get(const RGWBucketInfo& bucket_info, const rgw_obj& obj,
+ BIIndexType index_type, rgw_cls_bi_entry *entry)
+{
+ BucketShard bs(this);
+ int ret = bs.init(bucket_info, obj);
+ if (ret < 0) {
+ ldout(cct, 5) << "bs.init() returned ret=" << ret << dendl;
+ return ret;
+ }
+
+ cls_rgw_obj_key key(obj.key.get_index_key_name(), obj.key.instance);
+
+ return cls_rgw_bi_get(bs.index_ctx, bs.bucket_obj, index_type, key, entry);
+}
+
+void RGWRados::bi_put(ObjectWriteOperation& op, BucketShard& bs, rgw_cls_bi_entry& entry)
+{
+ cls_rgw_bi_put(op, bs.bucket_obj, entry);
+}
+
+int RGWRados::bi_put(BucketShard& bs, rgw_cls_bi_entry& entry)
+{
+ int ret = cls_rgw_bi_put(bs.index_ctx, bs.bucket_obj, entry);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+int RGWRados::bi_put(rgw_bucket& bucket, rgw_obj& obj, rgw_cls_bi_entry& entry)
+{
+ BucketShard bs(this);
+ int ret = bs.init(bucket, obj, nullptr /* no RGWBucketInfo */);
+ if (ret < 0) {
+ ldout(cct, 5) << "bs.init() returned ret=" << ret << dendl;
+ return ret;
+ }
+
+ return bi_put(bs, entry);
+}
+
+int RGWRados::bi_list(rgw_bucket& bucket, const string& obj_name, const string& marker, uint32_t max, list<rgw_cls_bi_entry> *entries, bool *is_truncated)
+{
+ rgw_obj obj(bucket, obj_name);
+ BucketShard bs(this);
+ int ret = bs.init(bucket, obj, nullptr /* no RGWBucketInfo */);
+ if (ret < 0) {
+ ldout(cct, 5) << "bs.init() returned ret=" << ret << dendl;
+ return ret;
+ }
+
+ ret = cls_rgw_bi_list(bs.index_ctx, bs.bucket_obj, obj_name, marker, max, entries, is_truncated);
+ if (ret == -ENOENT) {
+ *is_truncated = false;
+ }
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+int RGWRados::bi_list(BucketShard& bs, const string& filter_obj, const string& marker, uint32_t max, list<rgw_cls_bi_entry> *entries, bool *is_truncated)
+{
+ int ret = cls_rgw_bi_list(bs.index_ctx, bs.bucket_obj, filter_obj, marker, max, entries, is_truncated);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+int RGWRados::bi_remove(BucketShard& bs)
+{
+ int ret = bs.index_ctx.remove(bs.bucket_obj);
+ if (ret == -ENOENT) {
+ ret = 0;
+ }
+ if (ret < 0) {
+ ldout(cct, 5) << "bs.index_ctx.remove(" << bs.bucket_obj << ") returned ret=" << ret << dendl;
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWRados::bi_list(rgw_bucket& bucket, int shard_id, const string& filter_obj, const string& marker, uint32_t max, list<rgw_cls_bi_entry> *entries, bool *is_truncated)
+{
+ BucketShard bs(this);
+ int ret = bs.init(bucket, shard_id, nullptr /* no RGWBucketInfo */);
+ if (ret < 0) {
+ ldout(cct, 5) << "bs.init() returned ret=" << ret << dendl;
+ return ret;
+ }
+
+ return bi_list(bs, filter_obj, marker, max, entries, is_truncated);
+}
+
+int RGWRados::gc_operate(string& oid, librados::ObjectWriteOperation *op)
+{
+ return gc_pool_ctx.operate(oid, op);
+}
+
+int RGWRados::gc_aio_operate(string& oid, librados::ObjectWriteOperation *op, AioCompletion **pc)
+{
+ AioCompletion *c = librados::Rados::aio_create_completion(NULL, NULL, NULL);
+ int r = gc_pool_ctx.aio_operate(oid, c, op);
+ if (!pc) {
+ c->release();
+ } else {
+ *pc = c;
+ }
+ return r;
+}
+
+int RGWRados::gc_operate(string& oid, librados::ObjectReadOperation *op, bufferlist *pbl)
+{
+ return gc_pool_ctx.operate(oid, op, pbl);
+}
+
+int RGWRados::list_gc_objs(int *index, string& marker, uint32_t max, bool expired_only, std::list<cls_rgw_gc_obj_info>& result, bool *truncated)
+{
+ return gc->list(index, marker, max, expired_only, result, truncated);
+}
+
+int RGWRados::process_gc(bool expired_only)
+{
+ return gc->process(expired_only);
+}
+
+int RGWRados::list_lc_progress(const string& marker, uint32_t max_entries, map<string, int> *progress_map)
+{
+ return lc->list_lc_progress(marker, max_entries, progress_map);
+}
+
+int RGWRados::process_lc()
+{
+ return lc->process();
+}
+
+bool RGWRados::process_expire_objects()
+{
+ return obj_expirer->inspect_all_shards(utime_t(), ceph_clock_now());
+}
+
+int RGWRados::cls_obj_prepare_op(BucketShard& bs, RGWModifyOp op, string& tag,
+ rgw_obj& obj, uint16_t bilog_flags, rgw_zone_set *_zones_trace)
+{
+ rgw_zone_set zones_trace;
+ if (_zones_trace) {
+ zones_trace = *_zones_trace;
+ }
+ zones_trace.insert(svc.zone->get_zone().id);
+
+ ObjectWriteOperation o;
+ cls_rgw_obj_key key(obj.key.get_index_key_name(), obj.key.instance);
+ cls_rgw_guard_bucket_resharding(o, -ERR_BUSY_RESHARDING);
+ cls_rgw_bucket_prepare_op(o, op, tag, key, obj.key.get_loc(), svc.zone->get_zone().log_data, bilog_flags, zones_trace);
+ return bs.index_ctx.operate(bs.bucket_obj, &o);
+}
+
+int RGWRados::cls_obj_complete_op(BucketShard& bs, const rgw_obj& obj, RGWModifyOp op, string& tag,
+ int64_t pool, uint64_t epoch,
+ rgw_bucket_dir_entry& ent, RGWObjCategory category,
+ list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *_zones_trace)
+{
+ ObjectWriteOperation o;
+ rgw_bucket_dir_entry_meta dir_meta;
+ dir_meta = ent.meta;
+ dir_meta.category = category;
+
+ rgw_zone_set zones_trace;
+ if (_zones_trace) {
+ zones_trace = *_zones_trace;
+ }
+ zones_trace.insert(svc.zone->get_zone().id);
+
+ rgw_bucket_entry_ver ver;
+ ver.pool = pool;
+ ver.epoch = epoch;
+ cls_rgw_obj_key key(ent.key.name, ent.key.instance);
+ cls_rgw_guard_bucket_resharding(o, -ERR_BUSY_RESHARDING);
+ cls_rgw_bucket_complete_op(o, op, tag, ver, key, dir_meta, remove_objs,
+ svc.zone->get_zone().log_data, bilog_flags, &zones_trace);
+ complete_op_data *arg;
+ index_completion_manager->create_completion(obj, op, tag, ver, key, dir_meta, remove_objs,
+ svc.zone->get_zone().log_data, bilog_flags, &zones_trace, &arg);
+ librados::AioCompletion *completion = arg->rados_completion;
+ int ret = bs.index_ctx.aio_operate(bs.bucket_obj, arg->rados_completion, &o);
+ completion->release(); /* can't reference arg here, as it might have already been released */
+ return ret;
+}
+
+int RGWRados::cls_obj_complete_add(BucketShard& bs, const rgw_obj& obj, string& tag,
+ int64_t pool, uint64_t epoch,
+ rgw_bucket_dir_entry& ent, RGWObjCategory category,
+ list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace)
+{
+ return cls_obj_complete_op(bs, obj, CLS_RGW_OP_ADD, tag, pool, epoch, ent, category, remove_objs, bilog_flags, zones_trace);
+}
+
+int RGWRados::cls_obj_complete_del(BucketShard& bs, string& tag,
+ int64_t pool, uint64_t epoch,
+ rgw_obj& obj,
+ real_time& removed_mtime,
+ list<rgw_obj_index_key> *remove_objs,
+ uint16_t bilog_flags,
+ rgw_zone_set *zones_trace)
+{
+ rgw_bucket_dir_entry ent;
+ ent.meta.mtime = removed_mtime;
+ obj.key.get_index_key(&ent.key);
+ return cls_obj_complete_op(bs, obj, CLS_RGW_OP_DEL, tag, pool, epoch,
+ ent, RGWObjCategory::None, remove_objs,
+ bilog_flags, zones_trace);
+}
+
+int RGWRados::cls_obj_complete_cancel(BucketShard& bs, string& tag, rgw_obj& obj, uint16_t bilog_flags, rgw_zone_set *zones_trace)
+{
+ rgw_bucket_dir_entry ent;
+ obj.key.get_index_key(&ent.key);
+ return cls_obj_complete_op(bs, obj, CLS_RGW_OP_CANCEL, tag,
+ -1 /* pool id */, 0, ent,
+ RGWObjCategory::None, NULL, bilog_flags,
+ zones_trace);
+}
+
+int RGWRados::cls_obj_set_bucket_tag_timeout(RGWBucketInfo& bucket_info, uint64_t timeout)
+{
+ librados::IoCtx index_ctx;
+ map<int, string> bucket_objs;
+ int r = open_bucket_index(bucket_info, index_ctx, bucket_objs);
+ if (r < 0)
+ return r;
+
+ return CLSRGWIssueSetTagTimeout(index_ctx, bucket_objs, cct->_conf->rgw_bucket_index_max_aio, timeout)();
+}
+
+
+uint32_t RGWRados::calc_ordered_bucket_list_per_shard(uint32_t num_entries,
+ uint32_t num_shards)
+{
+ // We want to minimize the chances that when num_shards >>
+ // num_entries that we return much fewer than num_entries to the
+ // client. Given all the overhead of making a cls call to the osd,
+ // returning a few entries is not much more work than returning one
+ // entry. This minimum might be better tuned based on future
+ // experiments where num_shards >> num_entries. (Note: ">>" should
+ // be interpreted as "much greater than".)
+ constexpr uint32_t min_read = 8;
+
+ // The following is based on _"Balls into Bins" -- A Simple and
+ // Tight Analysis_ by Raab and Steger. We add 1 as a way to handle
+ // cases when num_shards >> num_entries (it almost serves as a
+ // ceiling calculation). We also assume alpha is 1.0 and extract it
+ // from the calculation. Future work could involve memoizing some of
+ // the transcendental functions to minimize repeatedly re-calling
+ // them with the same parameters, which we expect to be the case the
+ // majority of the time.
+ uint32_t calc_read =
+ 1 +
+ static_cast<uint32_t>((num_entries / num_shards) +
+ sqrt((2 * num_entries) *
+ log(num_shards) / num_shards));
+
+ return std::max(min_read, calc_read);
+}
+
+
+int RGWRados::cls_bucket_list_ordered(RGWBucketInfo& bucket_info,
+ const int shard_id,
+ const rgw_obj_index_key& start_after,
+ const string& prefix,
+ const uint32_t num_entries,
+ const bool list_versions,
+ const uint16_t expansion_factor,
+ map<string, rgw_bucket_dir_entry>& m,
+ bool *is_truncated,
+ rgw_obj_index_key *last_entry,
+ bool (*force_check_filter)(const string& name))
+{
+ /* expansion_factor allows the number of entries to read to grow
+ * exponentially; this is used when earlier reads are producing too
+ * few results, perhaps due to filtering or to a series of
+ * namespaced entries */
+
+ ldout(cct, 10) << "RGWRados::" << __func__ << ": " << bucket_info.bucket <<
+ " start_after=\"" << start_after.name <<
+ "[" << start_after.instance <<
+ "]\", prefix=\"" << prefix <<
+ "\" num_entries=" << num_entries <<
+ ", list_versions=" << list_versions <<
+ ", expansion_factor=" << expansion_factor << dendl;
+
+ m.clear();
+
+ librados::IoCtx index_ctx;
+ // key - oid (for different shards if there is any)
+ // value - list result for the corresponding oid (shard), it is filled by
+ // the AIO callback
+ map<int, string> oids;
+ int r = open_bucket_index(bucket_info, index_ctx, oids, shard_id);
+ if (r < 0) {
+ return r;
+ }
+
+ const uint32_t shard_count = oids.size();
+ uint32_t num_entries_per_shard;
+ if (expansion_factor == 0) {
+ num_entries_per_shard =
+ calc_ordered_bucket_list_per_shard(num_entries, shard_count);
+ } else if (expansion_factor <= 11) {
+ // we'll max out the exponential multiplication factor at 1024 (2<<10)
+ num_entries_per_shard =
+ std::min(num_entries,
+ (uint32_t(1 << (expansion_factor - 1)) *
+ calc_ordered_bucket_list_per_shard(num_entries, shard_count)));
+ } else {
+ num_entries_per_shard = num_entries;
+ }
+
+ ldout(cct, 10) << "RGWRados::" << __func__ <<
+ " request from each of " << shard_count <<
+ " shard(s) for " << num_entries_per_shard << " entries to get " <<
+ num_entries << " total entries" << dendl;
+
+ map<int, struct rgw_cls_list_ret> list_results;
+ cls_rgw_obj_key start_key(start_after.name, start_after.instance);
+ r = CLSRGWIssueBucketList(index_ctx, start_key, prefix, num_entries_per_shard,
+ list_versions, oids, list_results,
+ cct->_conf->rgw_bucket_index_max_aio)();
+ if (r < 0) {
+ return r;
+ }
+
+ // create a list of iterators that are used to iterate each shard
+ vector<map<string, struct rgw_bucket_dir_entry>::iterator> vcurrents;
+ vector<map<string, struct rgw_bucket_dir_entry>::iterator> vends;
+ vector<string> vnames;
+ vcurrents.reserve(list_results.size());
+ vends.reserve(list_results.size());
+ vnames.reserve(list_results.size());
+ for (auto& iter : list_results) {
+ vcurrents.push_back(iter.second.dir.m.begin());
+ vends.push_back(iter.second.dir.m.end());
+ vnames.push_back(oids[iter.first]);
+ }
+
+ // create a map to track the next candidate entry from each shard,
+ // if the entry from a specified shard is selected/erased, the next
+ // entry from that shard will be inserted for next round selection
+ map<string, size_t> candidates;
+ for (size_t i = 0; i < vcurrents.size(); ++i) {
+ if (vcurrents[i] != vends[i]) {
+ candidates[vcurrents[i]->first] = i;
+ }
+ }
+
+ map<string, bufferlist> updates;
+ uint32_t count = 0;
+ int pos = -1;
+ while (count < num_entries && !candidates.empty()) {
+ r = 0;
+ // Select the next one
+ pos = candidates.begin()->second;
+ const string& name = vcurrents[pos]->first;
+ struct rgw_bucket_dir_entry& dirent = vcurrents[pos]->second;
+
+ ldout(cct, 20) << "RGWRados::" << __func__ << " currently processing " <<
+ dirent.key << " from shard " << pos << dendl;
+
+ bool force_check =
+ force_check_filter && force_check_filter(dirent.key.name);
+
+ if ((!dirent.exists && !dirent.is_delete_marker()) ||
+ !dirent.pending_map.empty() ||
+ force_check) {
+ /* there are uncommitted ops. We need to check the current
+ * state, and if the tags are old we need to do clean-up as
+ * well. */
+ librados::IoCtx sub_ctx;
+ sub_ctx.dup(index_ctx);
+ r = check_disk_state(sub_ctx, bucket_info, dirent, dirent,
+ updates[vnames[pos]]);
+ if (r < 0 && r != -ENOENT) {
+ return r;
+ }
+ } else {
+ r = 0;
+ }
+
+ if (r >= 0) {
+ ldout(cct, 10) << "RGWRados::" << __func__ << ": got " <<
+ dirent.key.name << "[" << dirent.key.instance << "]" << dendl;
+ m[name] = std::move(dirent);
+ ++count;
+ } else {
+ ldout(cct, 10) << "RGWRados::" << __func__ << ": skipping " <<
+ dirent.key.name << "[" << dirent.key.instance << "]" << dendl;
+ }
+
+ // refresh the candidates map
+ candidates.erase(candidates.begin());
+ if (++vcurrents[pos] != vends[pos]) { // note: pre-increment
+ candidates[vcurrents[pos]->first] = pos;
+ } else if (list_results[pos].is_truncated) {
+ // once we exhaust one shard that is truncated, we need to stop,
+ // as we cannot be certain that one of the next entries needs to
+ // come from that shard; S3 and swift protocols allow returning
+ // fewer than what was requested
+ break;
+ }
+ } // while we haven't provided requested # of result entries
+
+ // suggest updates if there are any
+ for (auto& miter : updates) {
+ if (miter.second.length()) {
+ ObjectWriteOperation o;
+ cls_rgw_suggest_changes(o, miter.second);
+ // we don't care if we lose suggested updates, send them off blindly
+ AioCompletion *c = librados::Rados::aio_create_completion(NULL, NULL, NULL);
+ index_ctx.aio_operate(miter.first, c, &o);
+ c->release();
+ }
+ } // updates loop
+
+ *is_truncated = false;
+ // check if all the returned entries are consumed or not
+ for (size_t i = 0; i < vcurrents.size(); ++i) {
+ if (vcurrents[i] != vends[i] || list_results[i].is_truncated) {
+ *is_truncated = true;
+ break;
+ }
+ }
+
+ ldout(cct, 20) << "RGWRados::" << __func__ <<
+ ": returning, count=" << count << ", is_truncated=" << *is_truncated <<
+ dendl;
+
+ if (*is_truncated && count < num_entries) {
+ ldout(cct, 10) << "RGWRados::" << __func__ <<
+ ": INFO requested " << num_entries << " entries but returning " <<
+ count << ", which is truncated" << dendl;
+ }
+
+ if (pos >= 0) {
+ *last_entry = std::move((--vcurrents[pos])->first);
+ ldout(cct, 20) << "RGWRados::" << __func__ <<
+ ": returning, last_entry=" << *last_entry << dendl;
+ } else {
+ ldout(cct, 20) << "RGWRados::" << __func__ <<
+ ": returning, last_entry NOT SET" << dendl;
+ }
+
+ return 0;
+}
+
+
+int RGWRados::cls_bucket_list_unordered(RGWBucketInfo& bucket_info,
+ int shard_id,
+ const rgw_obj_index_key& start,
+ const string& prefix,
+ uint32_t num_entries,
+ bool list_versions,
+ std::vector<rgw_bucket_dir_entry>& ent_list,
+ bool *is_truncated,
+ rgw_obj_index_key *last_entry,
+ bool (*force_check_filter)(const string& name)) {
+ ldout(cct, 10) << "cls_bucket_list_unordered " << bucket_info.bucket <<
+ " start " << start.name << "[" << start.instance <<
+ "] num_entries " << num_entries << dendl;
+
+ ent_list.clear();
+ static MultipartMetaFilter multipart_meta_filter;
+
+ *is_truncated = false;
+ librados::IoCtx index_ctx;
+
+ map<int, string> oids;
+ int r = open_bucket_index(bucket_info, index_ctx, oids, shard_id);
+ if (r < 0)
+ return r;
+ const uint32_t num_shards = oids.size();
+
+ rgw_obj_index_key marker = start;
+ uint32_t current_shard;
+ if (shard_id >= 0) {
+ current_shard = shard_id;
+ } else if (start.empty()) {
+ current_shard = 0u;
+ } else {
+ // at this point we have a marker (start) that has something in
+ // it, so we need to get to the bucket shard index, so we can
+ // start reading from there
+
+ std::string key;
+ // test whether object name is a multipart meta name
+ if(! multipart_meta_filter.filter(start.name, key)) {
+ // if multipart_meta_filter fails, must be "regular" (i.e.,
+ // unadorned) and the name is the key
+ key = start.name;
+ }
+
+ // now convert the key (oid) to an rgw_obj_key since that will
+ // separate out the namespace, name, and instance
+ rgw_obj_key obj_key;
+ bool parsed = rgw_obj_key::parse_raw_oid(key, &obj_key);
+ if (!parsed) {
+ ldout(cct, 0) <<
+ "ERROR: RGWRados::cls_bucket_list_unordered received an invalid "
+ "start marker: '" << start << "'" << dendl;
+ return -EINVAL;
+ } else if (obj_key.name.empty()) {
+ // if the name is empty that means the object name came in with
+ // a namespace only, and therefore we need to start our scan at
+ // the first bucket index shard
+ current_shard = 0u;
+ } else {
+ // so now we have the key used to compute the bucket index shard
+ // and can extract the specific shard from it
+ current_shard = rgw_bucket_shard_index(obj_key.name, num_shards);
+ }
+ }
+
+ uint32_t count = 0u;
+ map<string, bufferlist> updates;
+ rgw_obj_index_key last_added_entry;
+ while (count <= num_entries &&
+ ((shard_id >= 0 && current_shard == uint32_t(shard_id)) ||
+ current_shard < num_shards)) {
+ const std::string& oid = oids[current_shard];
+ rgw_cls_list_ret result;
+
+ librados::ObjectReadOperation op;
+ cls_rgw_bucket_list_op(op, marker, prefix, num_entries,
+ list_versions, &result);
+ r = index_ctx.operate(oid, &op, nullptr);
+ if (r < 0)
+ return r;
+
+ for (auto& entry : result.dir.m) {
+ rgw_bucket_dir_entry& dirent = entry.second;
+
+ bool force_check = force_check_filter &&
+ force_check_filter(dirent.key.name);
+ if ((!dirent.exists && !dirent.is_delete_marker()) ||
+ !dirent.pending_map.empty() ||
+ force_check) {
+ /* there are uncommitted ops. We need to check the current state,
+ * and if the tags are old we need to do cleanup as well. */
+ librados::IoCtx sub_ctx;
+ sub_ctx.dup(index_ctx);
+ r = check_disk_state(sub_ctx, bucket_info, dirent, dirent, updates[oid]);
+ if (r < 0 && r != -ENOENT) {
+ return r;
+ }
+ } else {
+ r = 0;
+ }
+
+ // at this point either r >=0 or r == -ENOENT
+ if (r >= 0) { // i.e., if r != -ENOENT
+ ldout(cct, 10) << "RGWRados::cls_bucket_list_unordered: got " <<
+ dirent.key.name << "[" << dirent.key.instance << "]" << dendl;
+
+ if (count < num_entries) {
+ marker = last_added_entry = dirent.key; // double assign
+ ent_list.emplace_back(std::move(dirent));
+ ++count;
+ } else {
+ *is_truncated = true;
+ goto check_updates;
+ }
+ } else { // r == -ENOENT
+ // in the case of -ENOENT, make sure we're advancing marker
+ // for possible next call to CLSRGWIssueBucketList
+ marker = dirent.key;
+ }
+ } // entry for loop
+
+ if (!result.is_truncated) {
+ // if we reached the end of the shard read next shard
+ ++current_shard;
+ marker = rgw_obj_index_key();
+ }
+ } // shard loop
+
+check_updates:
+
+ // suggest updates if there is any
+ map<string, bufferlist>::iterator miter = updates.begin();
+ for (; miter != updates.end(); ++miter) {
+ if (miter->second.length()) {
+ ObjectWriteOperation o;
+ cls_rgw_suggest_changes(o, miter->second);
+ // we don't care if we lose suggested updates, send them off blindly
+ AioCompletion *c = librados::Rados::aio_create_completion(NULL, NULL, NULL);
+ index_ctx.aio_operate(miter->first, c, &o);
+ c->release();
+ }
+ }
+
+ if (last_entry && !ent_list.empty()) {
+ *last_entry = last_added_entry;
+ }
+
+ return 0;
+} // RGWRados::cls_bucket_list_unordered
+
+
+int RGWRados::cls_obj_usage_log_add(const string& oid,
+ rgw_usage_log_info& info)
+{
+ rgw_raw_obj obj(svc.zone->get_zone_params().usage_log_pool, oid);
+
+ rgw_rados_ref ref;
+ int r = get_raw_obj_ref(obj, &ref);
+ if (r < 0) {
+ return r;
+ }
+
+ ObjectWriteOperation op;
+ cls_rgw_usage_log_add(op, info);
+
+ r = ref.ioctx.operate(ref.obj.oid, &op);
+ return r;
+}
+
+int RGWRados::cls_obj_usage_log_read(const string& oid, const string& user, const string& bucket,
+ uint64_t start_epoch, uint64_t end_epoch, uint32_t max_entries,
+ string& read_iter, map<rgw_user_bucket, rgw_usage_log_entry>& usage,
+ bool *is_truncated)
+{
+ rgw_raw_obj obj(svc.zone->get_zone_params().usage_log_pool, oid);
+
+ rgw_rados_ref ref;
+ int r = get_raw_obj_ref(obj, &ref);
+ if (r < 0) {
+ return r;
+ }
+
+ *is_truncated = false;
+
+ r = cls_rgw_usage_log_read(ref.ioctx, ref.obj.oid, user, bucket, start_epoch, end_epoch,
+ max_entries, read_iter, usage, is_truncated);
+
+ return r;
+}
+
+int RGWRados::cls_obj_usage_log_trim(const string& oid, const string& user, const string& bucket,
+ uint64_t start_epoch, uint64_t end_epoch)
+{
+ rgw_raw_obj obj(svc.zone->get_zone_params().usage_log_pool, oid);
+
+ rgw_rados_ref ref;
+ int r = get_raw_obj_ref(obj, &ref);
+ if (r < 0) {
+ return r;
+ }
+
+ r = cls_rgw_usage_log_trim(ref.ioctx, ref.obj.oid, user, bucket, start_epoch, end_epoch);
+ return r;
+}
+
+int RGWRados::cls_obj_usage_log_clear(string& oid)
+{
+ rgw_raw_obj obj(svc.zone->get_zone_params().usage_log_pool, oid);
+
+ rgw_rados_ref ref;
+ int r = get_raw_obj_ref(obj, &ref);
+ if (r < 0) {
+ return r;
+ }
+ librados::ObjectWriteOperation op;
+ cls_rgw_usage_log_clear(op);
+ r = ref.ioctx.operate(ref.obj.oid, &op);
+ return r;
+}
+
+
+int RGWRados::remove_objs_from_index(RGWBucketInfo& bucket_info, list<rgw_obj_index_key>& oid_list)
+{
+ librados::IoCtx index_ctx;
+ string dir_oid;
+
+ uint8_t suggest_flag = (svc.zone->get_zone().log_data ? CEPH_RGW_DIR_SUGGEST_LOG_OP : 0);
+
+ int r = open_bucket_index(bucket_info, index_ctx, dir_oid);
+ if (r < 0)
+ return r;
+
+ bufferlist updates;
+
+ for (auto iter = oid_list.begin(); iter != oid_list.end(); ++iter) {
+ rgw_bucket_dir_entry entry;
+ entry.key = *iter;
+ dout(2) << "RGWRados::remove_objs_from_index bucket=" << bucket_info.bucket << " obj=" << entry.key.name << ":" << entry.key.instance << dendl;
+ entry.ver.epoch = (uint64_t)-1; // ULLONG_MAX, needed to that objclass doesn't skip out request
+ updates.append(CEPH_RGW_REMOVE | suggest_flag);
+ encode(entry, updates);
+ }
+
+ bufferlist out;
+
+ r = index_ctx.exec(dir_oid, RGW_CLASS, RGW_DIR_SUGGEST_CHANGES, updates, out);
+
+ return r;
+}
+
+int RGWRados::check_disk_state(librados::IoCtx io_ctx,
+ const RGWBucketInfo& bucket_info,
+ rgw_bucket_dir_entry& list_state,
+ rgw_bucket_dir_entry& object,
+ bufferlist& suggested_updates)
+{
+ const rgw_bucket& bucket = bucket_info.bucket;
+ uint8_t suggest_flag = (svc.zone->get_zone().log_data ? CEPH_RGW_DIR_SUGGEST_LOG_OP : 0);
+
+ std::string loc;
+
+ rgw_obj obj(bucket, list_state.key);
+
+ string oid;
+ get_obj_bucket_and_oid_loc(obj, oid, loc);
+
+ if (loc != list_state.locator) {
+ ldout(cct, 0) << "WARNING: generated locator (" << loc << ") is different from listed locator (" << list_state.locator << ")" << dendl;
+ }
+
+ io_ctx.locator_set_key(list_state.locator);
+
+ RGWObjState *astate = NULL;
+ RGWObjectCtx rctx(this);
+ int r = get_obj_state(&rctx, bucket_info, obj, &astate, false);
+ if (r < 0)
+ return r;
+
+ list_state.pending_map.clear(); // we don't need this and it inflates size
+ if (!astate->exists) {
+ /* object doesn't exist right now -- hopefully because it's
+ * marked as !exists and got deleted */
+ if (list_state.exists) {
+ /* FIXME: what should happen now? Work out if there are any
+ * non-bad ways this could happen (there probably are, but annoying
+ * to handle!) */
+ }
+ // encode a suggested removal of that key
+ list_state.ver.epoch = io_ctx.get_last_version();
+ list_state.ver.pool = io_ctx.get_id();
+ cls_rgw_encode_suggestion(CEPH_RGW_REMOVE, list_state, suggested_updates);
+ return -ENOENT;
+ }
+
+ string etag;
+ string content_type;
+ ACLOwner owner;
+
+ object.meta.size = astate->size;
+ object.meta.accounted_size = astate->accounted_size;
+ object.meta.mtime = astate->mtime;
+
+ map<string, bufferlist>::iterator iter = astate->attrset.find(RGW_ATTR_ETAG);
+ if (iter != astate->attrset.end()) {
+ etag = rgw_bl_str(iter->second);
+ }
+ iter = astate->attrset.find(RGW_ATTR_CONTENT_TYPE);
+ if (iter != astate->attrset.end()) {
+ content_type = rgw_bl_str(iter->second);
+ }
+ iter = astate->attrset.find(RGW_ATTR_ACL);
+ if (iter != astate->attrset.end()) {
+ r = decode_policy(iter->second, &owner);
+ if (r < 0) {
+ dout(0) << "WARNING: could not decode policy for object: " << obj << dendl;
+ }
+ }
+
+ if (astate->has_manifest) {
+ RGWObjManifest::obj_iterator miter;
+ RGWObjManifest& manifest = astate->manifest;
+ for (miter = manifest.obj_begin(); miter != manifest.obj_end(); ++miter) {
+ const rgw_raw_obj& raw_loc = miter.get_location().get_raw_obj(this);
+ rgw_obj loc;
+ rgw_raw_obj_to_obj(manifest.get_obj().bucket, raw_loc, &loc);
+
+ if (loc.key.ns == RGW_OBJ_NS_MULTIPART) {
+ dout(10) << "check_disk_state(): removing manifest part from index: " << loc << dendl;
+ r = delete_obj_index(loc, astate->mtime);
+ if (r < 0) {
+ dout(0) << "WARNING: delete_obj_index() returned r=" << r << dendl;
+ }
+ }
+ }
+ }
+
+ object.meta.etag = etag;
+ object.meta.content_type = content_type;
+ object.meta.owner = owner.get_id().to_str();
+ object.meta.owner_display_name = owner.get_display_name();
+
+ // encode suggested updates
+ list_state.ver.pool = io_ctx.get_id();
+ list_state.ver.epoch = astate->epoch;
+ list_state.meta.size = object.meta.size;
+ list_state.meta.accounted_size = object.meta.accounted_size;
+ list_state.meta.mtime = object.meta.mtime;
+ list_state.meta.category = main_category;
+ list_state.meta.etag = etag;
+ list_state.meta.content_type = content_type;
+ if (astate->obj_tag.length() > 0)
+ list_state.tag = astate->obj_tag.c_str();
+ list_state.meta.owner = owner.get_id().to_str();
+ list_state.meta.owner_display_name = owner.get_display_name();
+
+ list_state.exists = true;
+ cls_rgw_encode_suggestion(CEPH_RGW_UPDATE | suggest_flag, list_state, suggested_updates);
+ return 0;
+}
+
+int RGWRados::cls_bucket_head(const RGWBucketInfo& bucket_info, int shard_id, vector<rgw_bucket_dir_header>& headers, map<int, string> *bucket_instance_ids)
+{
+ librados::IoCtx index_ctx;
+ map<int, string> oids;
+ map<int, struct rgw_cls_list_ret> list_results;
+ int r = open_bucket_index(bucket_info, index_ctx, oids, list_results, shard_id, bucket_instance_ids);
+ if (r < 0)
+ return r;
+
+ r = CLSRGWIssueGetDirHeader(index_ctx, oids, list_results, cct->_conf->rgw_bucket_index_max_aio)();
+ if (r < 0)
+ return r;
+
+ map<int, struct rgw_cls_list_ret>::iterator iter = list_results.begin();
+ for(; iter != list_results.end(); ++iter) {
+ headers.push_back(std::move(iter->second.dir.header));
+ }
+ return 0;
+}
+
+int RGWRados::cls_bucket_head_async(const RGWBucketInfo& bucket_info, int shard_id, RGWGetDirHeader_CB *ctx, int *num_aio)
+{
+ librados::IoCtx index_ctx;
+ map<int, string> bucket_objs;
+ int r = open_bucket_index(bucket_info, index_ctx, bucket_objs, shard_id);
+ if (r < 0)
+ return r;
+
+ map<int, string>::iterator iter = bucket_objs.begin();
+ for (; iter != bucket_objs.end(); ++iter) {
+ r = cls_rgw_get_dir_header_async(index_ctx, iter->second, static_cast<RGWGetDirHeader_CB*>(ctx->get()));
+ if (r < 0) {
+ ctx->put();
+ break;
+ } else {
+ (*num_aio)++;
+ }
+ }
+ return r;
+}
+
+int RGWRados::cls_user_get_header(const string& user_id, cls_user_header *header)
+{
+ string buckets_obj_id;
+ rgw_get_buckets_obj(user_id, buckets_obj_id);
+ rgw_raw_obj obj(svc.zone->get_zone_params().user_uid_pool, buckets_obj_id);
+
+ rgw_rados_ref ref;
+ int r = get_raw_obj_ref(obj, &ref);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::ObjectReadOperation op;
+ int rc;
+ ::cls_user_get_header(op, header, &rc);
+ bufferlist ibl;
+ r = ref.ioctx.operate(ref.obj.oid, &op, &ibl);
+ if (r < 0)
+ return r;
+ if (rc < 0)
+ return rc;
+
+ return 0;
+}
+
+int RGWRados::cls_user_reset_stats(const string& user_id)
+{
+ string buckets_obj_id;
+ rgw_get_buckets_obj(user_id, buckets_obj_id);
+ rgw_raw_obj obj(svc.zone->get_zone_params().user_uid_pool, buckets_obj_id);
+
+ rgw_rados_ref ref;
+ int r = get_raw_obj_ref(obj, &ref);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::ObjectWriteOperation op;
+ ::cls_user_reset_stats(op);
+ return ref.ioctx.operate(ref.obj.oid, &op);
+}
+
+int RGWRados::cls_user_get_header_async(const string& user_id, RGWGetUserHeader_CB *ctx)
+{
+ string buckets_obj_id;
+ rgw_get_buckets_obj(user_id, buckets_obj_id);
+ rgw_raw_obj obj(svc.zone->get_zone_params().user_uid_pool, buckets_obj_id);
+
+ rgw_rados_ref ref;
+ int r = get_raw_obj_ref(obj, &ref);
+ if (r < 0) {
+ return r;
+ }
+
+ r = ::cls_user_get_header_async(ref.ioctx, ref.obj.oid, ctx);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int RGWRados::cls_user_sync_bucket_stats(rgw_raw_obj& user_obj,
+ const RGWBucketInfo& bucket_info)
+{
+ vector<rgw_bucket_dir_header> headers;
+ int r = cls_bucket_head(bucket_info, RGW_NO_SHARD, headers);
+ if (r < 0) {
+ ldout(cct, 20) << "cls_bucket_header() returned " << r << dendl;
+ return r;
+ }
+
+ cls_user_bucket_entry entry;
+
+ bucket_info.bucket.convert(&entry.bucket);
+
+ for (const auto& hiter : headers) {
+ for (const auto& iter : hiter.stats) {
+ if (RGWObjCategory::Main == iter.first ||
+ RGWObjCategory::MultiMeta == iter.first) {
+ const struct rgw_bucket_category_stats& header_stats = iter.second;
+ entry.size += header_stats.total_size;
+ entry.size_rounded += header_stats.total_size_rounded;
+ entry.count += header_stats.num_entries;
+ }
+ }
+ }
+
+ list<cls_user_bucket_entry> entries;
+ entries.push_back(entry);
+
+ r = cls_user_update_buckets(user_obj, entries, false);
+ if (r < 0) {
+ ldout(cct, 20) << "cls_user_update_buckets() returned " << r << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+int RGWRados::cls_user_get_bucket_stats(const rgw_bucket& bucket, cls_user_bucket_entry& entry)
+{
+ vector<rgw_bucket_dir_header> headers;
+ RGWBucketInfo bucket_info;
+ auto obj_ctx = svc.sysobj->init_obj_ctx();
+ int ret = get_bucket_instance_info(obj_ctx, bucket, bucket_info, NULL, NULL);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = cls_bucket_head(bucket_info, RGW_NO_SHARD, headers);
+ if (ret < 0) {
+ ldout(cct, 20) << "cls_bucket_header() returned " << ret << dendl;
+ return ret;
+ }
+
+ bucket.convert(&entry.bucket);
+
+ for (const auto& hiter : headers) {
+ for (const auto& iter : hiter.stats) {
+ const struct rgw_bucket_category_stats& header_stats = iter.second;
+ entry.size += header_stats.total_size;
+ entry.size_rounded += header_stats.total_size_rounded;
+ entry.count += header_stats.num_entries;
+ }
+ }
+
+ return 0;
+}
+
+int RGWRados::cls_user_list_buckets(rgw_raw_obj& obj,
+ const string& in_marker,
+ const string& end_marker,
+ const int max_entries,
+ list<cls_user_bucket_entry>& entries,
+ string * const out_marker,
+ bool * const truncated)
+{
+ rgw_rados_ref ref;
+ int r = get_raw_obj_ref(obj, &ref);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::ObjectReadOperation op;
+ int rc;
+
+ cls_user_bucket_list(op, in_marker, end_marker, max_entries, entries, out_marker, truncated, &rc);
+ bufferlist ibl;
+ r = ref.ioctx.operate(ref.obj.oid, &op, &ibl);
+ if (r < 0)
+ return r;
+ if (rc < 0)
+ return rc;
+
+ return 0;
+}
+
+int RGWRados::cls_user_update_buckets(rgw_raw_obj& obj, list<cls_user_bucket_entry>& entries, bool add)
+{
+ rgw_rados_ref ref;
+ int r = get_raw_obj_ref(obj, &ref);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::ObjectWriteOperation op;
+ cls_user_set_buckets(op, entries, add);
+ r = ref.ioctx.operate(ref.obj.oid, &op);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int RGWRados::complete_sync_user_stats(const rgw_user& user_id)
+{
+ string buckets_obj_id;
+ rgw_get_buckets_obj(user_id, buckets_obj_id);
+ rgw_raw_obj obj(svc.zone->get_zone_params().user_uid_pool, buckets_obj_id);
+ return cls_user_complete_stats_sync(obj);
+}
+
+int RGWRados::cls_user_complete_stats_sync(rgw_raw_obj& obj)
+{
+ rgw_rados_ref ref;
+ int r = get_raw_obj_ref(obj, &ref);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::ObjectWriteOperation op;
+ ::cls_user_complete_stats_sync(op);
+ r = ref.ioctx.operate(ref.obj.oid, &op);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int RGWRados::cls_user_add_bucket(rgw_raw_obj& obj, const cls_user_bucket_entry& entry)
+{
+ list<cls_user_bucket_entry> l;
+ l.push_back(entry);
+
+ return cls_user_update_buckets(obj, l, true);
+}
+
+int RGWRados::cls_user_remove_bucket(rgw_raw_obj& obj, const cls_user_bucket& bucket)
+{
+ rgw_rados_ref ref;
+ int r = get_system_obj_ref(obj, &ref);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::ObjectWriteOperation op;
+ ::cls_user_remove_bucket(op, bucket);
+ r = ref.ioctx.operate(ref.obj.oid, &op);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int RGWRados::check_bucket_shards(const RGWBucketInfo& bucket_info, const rgw_bucket& bucket,
+ RGWQuotaInfo& bucket_quota)
+{
+ if (! cct->_conf.get_val<bool>("rgw_dynamic_resharding")) {
+ return 0;
+ }
+
+ bool need_resharding = false;
+ int num_source_shards = (bucket_info.num_shards > 0 ? bucket_info.num_shards : 1);
+ uint32_t suggested_num_shards;
+
+ const uint64_t max_objs_per_shard =
+ cct->_conf.get_val<uint64_t>("rgw_max_objs_per_shard");
+ int ret =
+ quota_handler->check_bucket_shards(max_objs_per_shard, num_source_shards,
+ bucket_info.owner, bucket, bucket_quota,
+ 1, need_resharding, &suggested_num_shards);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (need_resharding) {
+ ldout(cct, 1) << __func__ << " bucket " << bucket.name << " need resharding " <<
+ " old num shards " << bucket_info.num_shards << " new num shards " << suggested_num_shards <<
+ dendl;
+ return add_bucket_to_reshard(bucket_info, suggested_num_shards);
+ }
+
+ return ret;
+}
+
+int RGWRados::add_bucket_to_reshard(const RGWBucketInfo& bucket_info, uint32_t new_num_shards)
+{
+ RGWReshard reshard(this);
+
+ uint32_t num_source_shards = (bucket_info.num_shards > 0 ? bucket_info.num_shards : 1);
+
+ new_num_shards = std::min(new_num_shards, get_max_bucket_shards());
+ if (new_num_shards <= num_source_shards) {
+ ldout(cct, 20) << "not resharding bucket name=" << bucket_info.bucket.name << ", orig_num=" << num_source_shards << ", new_num_shards=" << new_num_shards << dendl;
+ return 0;
+ }
+
+ cls_rgw_reshard_entry entry;
+ entry.time = real_clock::now();
+ entry.tenant = bucket_info.owner.tenant;
+ entry.bucket_name = bucket_info.bucket.name;
+ entry.bucket_id = bucket_info.bucket.bucket_id;
+ entry.old_num_shards = num_source_shards;
+ entry.new_num_shards = new_num_shards;
+
+ return reshard.add(entry);
+}
+
+int RGWRados::check_quota(const rgw_user& bucket_owner, rgw_bucket& bucket,
+ RGWQuotaInfo& user_quota, RGWQuotaInfo& bucket_quota, uint64_t obj_size, bool check_size_only)
+{
+ // if we only check size, then num_objs will set to 0
+ if(check_size_only)
+ return quota_handler->check_quota(bucket_owner, bucket, user_quota, bucket_quota, 0, obj_size);
+
+ return quota_handler->check_quota(bucket_owner, bucket, user_quota, bucket_quota, 1, obj_size);
+}
+
+void RGWRados::get_bucket_index_objects(const string& bucket_oid_base,
+ uint32_t num_shards,
+ map<int, string>& bucket_objects,
+ int shard_id) {
+ if (!num_shards) {
+ bucket_objects[0] = bucket_oid_base;
+ } else {
+ char buf[bucket_oid_base.size() + 32];
+ if (shard_id < 0) {
+ for (uint32_t i = 0; i < num_shards; ++i) {
+ snprintf(buf, sizeof(buf), "%s.%d", bucket_oid_base.c_str(), i);
+ bucket_objects[i] = buf;
+ }
+ } else {
+ if ((uint32_t)shard_id > num_shards) {
+ return;
+ }
+ snprintf(buf, sizeof(buf), "%s.%d", bucket_oid_base.c_str(), shard_id);
+ bucket_objects[shard_id] = buf;
+ }
+ }
+}
+
+void RGWRados::get_bucket_instance_ids(const RGWBucketInfo& bucket_info, int shard_id, map<int, string> *result)
+{
+ const rgw_bucket& bucket = bucket_info.bucket;
+ string plain_id = bucket.name + ":" + bucket.bucket_id;
+ if (!bucket_info.num_shards) {
+ (*result)[0] = plain_id;
+ } else {
+ char buf[16];
+ if (shard_id < 0) {
+ for (uint32_t i = 0; i < bucket_info.num_shards; ++i) {
+ snprintf(buf, sizeof(buf), ":%d", i);
+ (*result)[i] = plain_id + buf;
+ }
+ } else {
+ if ((uint32_t)shard_id > bucket_info.num_shards) {
+ return;
+ }
+ snprintf(buf, sizeof(buf), ":%d", shard_id);
+ (*result)[shard_id] = plain_id + buf;
+ }
+ }
+}
+
+int RGWRados::get_target_shard_id(const RGWBucketInfo& bucket_info, const string& obj_key,
+ int *shard_id)
+{
+ int r = 0;
+ switch (bucket_info.bucket_index_shard_hash_type) {
+ case RGWBucketInfo::MOD:
+ if (!bucket_info.num_shards) {
+ if (shard_id) {
+ *shard_id = -1;
+ }
+ } else {
+ uint32_t sid = rgw_bucket_shard_index(obj_key, bucket_info.num_shards);
+ if (shard_id) {
+ *shard_id = (int)sid;
+ }
+ }
+ break;
+ default:
+ r = -ENOTSUP;
+ }
+ return r;
+}
+
+void RGWRados::get_bucket_index_object(const string& bucket_oid_base, uint32_t num_shards,
+ int shard_id, string *bucket_obj)
+{
+ if (!num_shards) {
+ // By default with no sharding, we use the bucket oid as itself
+ (*bucket_obj) = bucket_oid_base;
+ } else {
+ char buf[bucket_oid_base.size() + 32];
+ snprintf(buf, sizeof(buf), "%s.%d", bucket_oid_base.c_str(), shard_id);
+ (*bucket_obj) = buf;
+ }
+}
+
+int RGWRados::get_bucket_index_object(const string& bucket_oid_base, const string& obj_key,
+ uint32_t num_shards, RGWBucketInfo::BIShardsHashType hash_type, string *bucket_obj, int *shard_id)
+{
+ int r = 0;
+ switch (hash_type) {
+ case RGWBucketInfo::MOD:
+ if (!num_shards) {
+ // By default with no sharding, we use the bucket oid as itself
+ (*bucket_obj) = bucket_oid_base;
+ if (shard_id) {
+ *shard_id = -1;
+ }
+ } else {
+ uint32_t sid = rgw_bucket_shard_index(obj_key, num_shards);
+ char buf[bucket_oid_base.size() + 32];
+ snprintf(buf, sizeof(buf), "%s.%d", bucket_oid_base.c_str(), sid);
+ (*bucket_obj) = buf;
+ if (shard_id) {
+ *shard_id = (int)sid;
+ }
+ }
+ break;
+ default:
+ r = -ENOTSUP;
+ }
+ return r;
+}
+
+uint64_t RGWRados::instance_id()
+{
+ return get_rados_handle()->get_instance_id();
+}
+
+uint64_t RGWRados::next_bucket_id()
+{
+ Mutex::Locker l(bucket_id_lock);
+ return ++max_bucket_id;
+}
+
+RGWRados *RGWStoreManager::init_storage_provider(CephContext *cct, bool use_gc_thread, bool use_lc_thread,
+ bool quota_threads, bool run_sync_thread, bool run_reshard_thread, bool use_cache)
+{
+ RGWRados *store = new RGWRados;
+
+ if ((*store).set_use_cache(use_cache)
+ .set_run_gc_thread(use_gc_thread)
+ .set_run_lc_thread(use_lc_thread)
+ .set_run_quota_threads(quota_threads)
+ .set_run_sync_thread(run_sync_thread)
+ .set_run_reshard_thread(run_reshard_thread)
+ .initialize(cct) < 0) {
+ delete store;
+ return NULL;
+ }
+
+ return store;
+}
+
+RGWRados *RGWStoreManager::init_raw_storage_provider(CephContext *cct)
+{
+ RGWRados *store = NULL;
+ store = new RGWRados;
+
+ store->set_context(cct);
+
+ int ret = store->init_svc(true);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: failed to init services (ret=" << cpp_strerror(-ret) << ")" << dendl;
+ return nullptr;
+ }
+
+ if (store->init_rados() < 0) {
+ delete store;
+ return nullptr;
+ }
+
+ return store;
+}
+
+void RGWStoreManager::close_storage(RGWRados *store)
+{
+ if (!store)
+ return;
+
+ store->finalize();
+
+ delete store;
+}
+
+librados::Rados* RGWRados::get_rados_handle()
+{
+ return &rados;
+}
+
+int RGWRados::delete_raw_obj_aio(const rgw_raw_obj& obj, list<librados::AioCompletion *>& handles)
+{
+ rgw_rados_ref ref;
+ int ret = get_raw_obj_ref(obj, &ref);
+ if (ret < 0) {
+ lderr(cct) << "ERROR: failed to get obj ref with ret=" << ret << dendl;
+ return ret;
+ }
+
+ ObjectWriteOperation op;
+ list<string> prefixes;
+ cls_rgw_remove_obj(op, prefixes);
+
+ AioCompletion *c = librados::Rados::aio_create_completion(NULL, NULL, NULL);
+ ret = ref.ioctx.aio_operate(ref.obj.oid, c, &op);
+ if (ret < 0) {
+ lderr(cct) << "ERROR: AioOperate failed with ret=" << ret << dendl;
+ c->release();
+ return ret;
+ }
+
+ handles.push_back(c);
+
+ return 0;
+}
+
+int RGWRados::delete_obj_aio(const rgw_obj& obj,
+ RGWBucketInfo& bucket_info, RGWObjState *astate,
+ list<librados::AioCompletion *>& handles, bool keep_index_consistent)
+{
+ rgw_rados_ref ref;
+ int ret = get_obj_head_ref(bucket_info, obj, &ref);
+ if (ret < 0) {
+ lderr(cct) << "ERROR: failed to get obj ref with ret=" << ret << dendl;
+ return ret;
+ }
+
+ if (keep_index_consistent) {
+ RGWRados::Bucket bop(this, bucket_info);
+ RGWRados::Bucket::UpdateIndex index_op(&bop, obj);
+
+ ret = index_op.prepare(CLS_RGW_OP_DEL, &astate->write_tag);
+ if (ret < 0) {
+ lderr(cct) << "ERROR: failed to prepare index op with ret=" << ret << dendl;
+ return ret;
+ }
+ }
+
+ ObjectWriteOperation op;
+ list<string> prefixes;
+ cls_rgw_remove_obj(op, prefixes);
+
+ AioCompletion *c = librados::Rados::aio_create_completion(NULL, NULL, NULL);
+ ret = ref.ioctx.aio_operate(ref.obj.oid, c, &op);
+ if (ret < 0) {
+ lderr(cct) << "ERROR: AioOperate failed with ret=" << ret << dendl;
+ c->release();
+ return ret;
+ }
+
+ handles.push_back(c);
+
+ if (keep_index_consistent) {
+ ret = delete_obj_index(obj, astate->mtime);
+ if (ret < 0) {
+ lderr(cct) << "ERROR: failed to delete obj index with ret=" << ret << dendl;
+ return ret;
+ }
+ }
+ return ret;
+}
+
+int rgw_compression_info_from_attrset(map<string, bufferlist>& attrs, bool& need_decompress, RGWCompressionInfo& cs_info) {
+ map<string, bufferlist>::iterator value = attrs.find(RGW_ATTR_COMPRESSION);
+ if (value != attrs.end()) {
+ auto bliter = value->second.cbegin();
+ try {
+ decode(cs_info, bliter);
+ } catch (buffer::error& err) {
+ return -EIO;
+ }
+ if (cs_info.blocks.size() == 0) {
+ return -EIO;
+ }
+ if (cs_info.compression_type != "none")
+ need_decompress = true;
+ else
+ need_decompress = false;
+ return 0;
+ } else {
+ need_decompress = false;
+ return 0;
+ }
+}
+
+bool RGWRados::call(std::string_view command, const cmdmap_t& cmdmap,
+ std::string_view format, bufferlist& out)
+{
+ if (command == "cache list"sv) {
+ std::optional<std::string> filter;
+ if (auto i = cmdmap.find("filter"); i != cmdmap.cend()) {
+ filter = boost::get<std::string>(i->second);
+ }
+ std::unique_ptr<Formatter> f(ceph::Formatter::create(format, "table"));
+ if (f) {
+ f->open_array_section("cache_entries");
+ call_list(filter, f.get());
+ f->close_section();
+ f->flush(out);
+ return true;
+ } else {
+ out.append("Unable to create Formatter.\n");
+ return false;
+ }
+ } else if (command == "cache inspect"sv) {
+ std::unique_ptr<Formatter> f(ceph::Formatter::create(format, "json-pretty"));
+ if (f) {
+ const auto& target = boost::get<std::string>(cmdmap.at("target"));
+ if (call_inspect(target, f.get())) {
+ f->flush(out);
+ return true;
+ } else {
+ out.append("Unable to find entry "s + target + ".\n");
+ return false;
+ }
+ } else {
+ out.append("Unable to create Formatter.\n");
+ return false;
+ }
+ } else if (command == "cache erase"sv) {
+ const auto& target = boost::get<std::string>(cmdmap.at("target"));
+ if (call_erase(target)) {
+ return true;
+ } else {
+ out.append("Unable to find entry "s + target + ".\n");
+ return false;
+ }
+ } else if (command == "cache zap"sv) {
+ call_zap();
+ return true;
+ }
+ return false;
+}
+
+void RGWRados::call_list(const std::optional<std::string>& s,
+ ceph::Formatter *f)
+{
+ if (!svc.cache) {
+ return;
+ }
+ svc.cache->call_list(s, f);
+}
+
+bool RGWRados::call_inspect(const std::string& s, Formatter *f)
+{
+ if (!svc.cache) {
+ return false;
+ }
+ return svc.cache->call_inspect(s, f);
+}
+
+bool RGWRados::call_erase(const std::string& s) {
+ if (!svc.cache) {
+ return false;
+ }
+ return svc.cache->call_erase(s);
+}
+
+void RGWRados::call_zap() {
+ if (svc.cache) {
+ return;
+ }
+ svc.cache->call_zap();
+}
+
+string RGWRados::get_mfa_oid(const rgw_user& user)
+{
+ return string("user:") + user.to_str();
+}
+
+int RGWRados::get_mfa_ref(const rgw_user& user, rgw_rados_ref *ref)
+{
+ string oid = get_mfa_oid(user);
+ rgw_raw_obj obj(svc.zone->get_zone_params().otp_pool, oid);
+ return get_system_obj_ref(obj, ref);
+}
+
+int RGWRados::check_mfa(const rgw_user& user, const string& otp_id, const string& pin)
+{
+ rgw_rados_ref ref;
+
+ int r = get_mfa_ref(user, &ref);
+ if (r < 0) {
+ return r;
+ }
+
+ rados::cls::otp::otp_check_t result;
+
+ r = rados::cls::otp::OTP::check(cct, ref.ioctx, ref.obj.oid, otp_id, pin, &result);
+ if (r < 0)
+ return r;
+
+ ldout(cct, 20) << "OTP check, otp_id=" << otp_id << " result=" << (int)result.result << dendl;
+
+ return (result.result == rados::cls::otp::OTP_CHECK_SUCCESS ? 0 : -EACCES);
+}
+
+void RGWRados::prepare_mfa_write(librados::ObjectWriteOperation *op,
+ RGWObjVersionTracker *objv_tracker,
+ const ceph::real_time& mtime)
+{
+ RGWObjVersionTracker ot;
+
+ if (objv_tracker) {
+ ot = *objv_tracker;
+ }
+
+ if (ot.write_version.tag.empty()) {
+ if (ot.read_version.tag.empty()) {
+ ot.generate_new_write_ver(cct);
+ } else {
+ ot.write_version = ot.read_version;
+ ot.write_version.ver++;
+ }
+ }
+
+ ot.prepare_op_for_write(op);
+ struct timespec mtime_ts = real_clock::to_timespec(mtime);
+ op->mtime2(&mtime_ts);
+}
+
+int RGWRados::create_mfa(const rgw_user& user, const rados::cls::otp::otp_info_t& config,
+ RGWObjVersionTracker *objv_tracker, const ceph::real_time& mtime)
+{
+ rgw_rados_ref ref;
+
+ int r = get_mfa_ref(user, &ref);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::ObjectWriteOperation op;
+ prepare_mfa_write(&op, objv_tracker, mtime);
+ rados::cls::otp::OTP::create(&op, config);
+ r = ref.ioctx.operate(ref.obj.oid, &op);
+ if (r < 0) {
+ ldout(cct, 20) << "OTP create, otp_id=" << config.id << " result=" << (int)r << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+int RGWRados::remove_mfa(const rgw_user& user, const string& id,
+ RGWObjVersionTracker *objv_tracker,
+ const ceph::real_time& mtime)
+{
+ rgw_rados_ref ref;
+
+ int r = get_mfa_ref(user, &ref);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::ObjectWriteOperation op;
+ prepare_mfa_write(&op, objv_tracker, mtime);
+ rados::cls::otp::OTP::remove(&op, id);
+ r = ref.ioctx.operate(ref.obj.oid, &op);
+ if (r < 0) {
+ ldout(cct, 20) << "OTP remove, otp_id=" << id << " result=" << (int)r << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+int RGWRados::get_mfa(const rgw_user& user, const string& id, rados::cls::otp::otp_info_t *result)
+{
+ rgw_rados_ref ref;
+
+ int r = get_mfa_ref(user, &ref);
+ if (r < 0) {
+ return r;
+ }
+
+ r = rados::cls::otp::OTP::get(nullptr, ref.ioctx, ref.obj.oid, id, result);
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+int RGWRados::list_mfa(const rgw_user& user, list<rados::cls::otp::otp_info_t> *result)
+{
+ rgw_rados_ref ref;
+
+ int r = get_mfa_ref(user, &ref);
+ if (r < 0) {
+ return r;
+ }
+
+ r = rados::cls::otp::OTP::get_all(nullptr, ref.ioctx, ref.obj.oid, result);
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+int RGWRados::otp_get_current_time(const rgw_user& user, ceph::real_time *result)
+{
+ rgw_rados_ref ref;
+
+ int r = get_mfa_ref(user, &ref);
+ if (r < 0) {
+ return r;
+ }
+
+ r = rados::cls::otp::OTP::get_current_time(ref.ioctx, ref.obj.oid, result);
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+int RGWRados::set_mfa(const string& oid, const list<rados::cls::otp::otp_info_t>& entries,
+ bool reset_obj, RGWObjVersionTracker *objv_tracker,
+ const real_time& mtime)
+{
+ rgw_raw_obj obj(svc.zone->get_zone_params().otp_pool, oid);
+ rgw_rados_ref ref;
+ int r = get_system_obj_ref(obj, &ref);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::ObjectWriteOperation op;
+ if (reset_obj) {
+ op.remove();
+ op.set_op_flags2(LIBRADOS_OP_FLAG_FAILOK);
+ op.create(false);
+ }
+ prepare_mfa_write(&op, objv_tracker, mtime);
+ rados::cls::otp::OTP::set(&op, entries);
+ r = ref.ioctx.operate(ref.obj.oid, &op);
+ if (r < 0) {
+ ldout(cct, 20) << "OTP set entries.size()=" << entries.size() << " result=" << (int)r << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+int RGWRados::list_mfa(const string& oid, list<rados::cls::otp::otp_info_t> *result,
+ RGWObjVersionTracker *objv_tracker, ceph::real_time *pmtime)
+{
+ rgw_raw_obj obj(svc.zone->get_zone_params().otp_pool, oid);
+ rgw_rados_ref ref;
+ int r = get_system_obj_ref(obj, &ref);
+ if (r < 0) {
+ return r;
+ }
+ librados::ObjectReadOperation op;
+ struct timespec mtime_ts;
+ if (pmtime) {
+ op.stat2(nullptr, &mtime_ts, nullptr);
+ }
+ objv_tracker->prepare_op_for_read(&op);
+ r = rados::cls::otp::OTP::get_all(&op, ref.ioctx, ref.obj.oid, result);
+ if (r < 0) {
+ return r;
+ }
+ if (pmtime) {
+ *pmtime = ceph::real_clock::from_timespec(mtime_ts);
+ }
+
+ return 0;
+}
diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h
new file mode 100644
index 00000000..395c574f
--- /dev/null
+++ b/src/rgw/rgw_rados.h
@@ -0,0 +1,2633 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGWRADOS_H
+#define CEPH_RGWRADOS_H
+
+#include <functional>
+#include <iomanip>
+
+#include "include/rados/librados.hpp"
+#include "include/Context.h"
+#include "common/admin_socket.h"
+#include "common/RefCountedObj.h"
+#include "common/RWLock.h"
+#include "common/ceph_time.h"
+#include "common/lru_map.h"
+#include "common/ceph_json.h"
+#include "rgw_common.h"
+#include "cls/rgw/cls_rgw_types.h"
+#include "cls/version/cls_version_types.h"
+#include "cls/log/cls_log_types.h"
+#include "cls/timeindex/cls_timeindex_types.h"
+#include "cls/otp/cls_otp_types.h"
+#include "rgw_log.h"
+#include "rgw_metadata.h"
+#include "rgw_meta_sync_status.h"
+#include "rgw_period_puller.h"
+#include "rgw_sync_module.h"
+#include "rgw_sync_log_trim.h"
+#include "rgw_service.h"
+
+#include "services/svc_rados.h"
+#include "services/svc_zone.h"
+
+class RGWWatcher;
+class SafeTimer;
+class ACLOwner;
+class RGWGC;
+class RGWMetaNotifier;
+class RGWDataNotifier;
+class RGWLC;
+class RGWObjectExpirer;
+class RGWMetaSyncProcessorThread;
+class RGWDataSyncProcessorThread;
+class RGWSyncLogTrimThread;
+class RGWSyncTraceManager;
+struct RGWZoneGroup;
+struct RGWZoneParams;
+class RGWReshard;
+class RGWReshardWait;
+
+class RGWSysObjectCtx;
+
+/* flags for put_obj_meta() */
+#define PUT_OBJ_CREATE 0x01
+#define PUT_OBJ_EXCL 0x02
+#define PUT_OBJ_CREATE_EXCL (PUT_OBJ_CREATE | PUT_OBJ_EXCL)
+
+#define RGW_OBJ_NS_MULTIPART "multipart"
+#define RGW_OBJ_NS_SHADOW "shadow"
+
+#define RGW_BUCKET_INSTANCE_MD_PREFIX ".bucket.meta."
+
+#define RGW_NO_SHARD -1
+
+#define RGW_SHARDS_PRIME_0 7877
+#define RGW_SHARDS_PRIME_1 65521
+
+extern const std::string MP_META_SUFFIX;
+
+// only called by rgw_shard_id and rgw_bucket_shard_index
+static inline int rgw_shards_mod(unsigned hval, int max_shards)
+{
+ if (max_shards <= RGW_SHARDS_PRIME_0) {
+ return hval % RGW_SHARDS_PRIME_0 % max_shards;
+ }
+ return hval % RGW_SHARDS_PRIME_1 % max_shards;
+}
+
+// used for logging and tagging
+static inline int rgw_shard_id(const string& key, int max_shards)
+{
+ return rgw_shards_mod(ceph_str_hash_linux(key.c_str(), key.size()),
+ max_shards);
+}
+
+// used for bucket indices
+static inline uint32_t rgw_bucket_shard_index(const std::string& key,
+ int num_shards) {
+ uint32_t sid = ceph_str_hash_linux(key.c_str(), key.size());
+ uint32_t sid2 = sid ^ ((sid & 0xFF) << 24);
+ return rgw_shards_mod(sid2, num_shards);
+}
+
+static inline int rgw_shards_max()
+{
+ return RGW_SHARDS_PRIME_1;
+}
+
+static inline void prepend_bucket_marker(const rgw_bucket& bucket, const string& orig_oid, string& oid)
+{
+ if (bucket.marker.empty() || orig_oid.empty()) {
+ oid = orig_oid;
+ } else {
+ oid = bucket.marker;
+ oid.append("_");
+ oid.append(orig_oid);
+ }
+}
+
+static inline void get_obj_bucket_and_oid_loc(const rgw_obj& obj, string& oid, string& locator)
+{
+ const rgw_bucket& bucket = obj.bucket;
+ prepend_bucket_marker(bucket, obj.get_oid(), oid);
+ const string& loc = obj.key.get_loc();
+ if (!loc.empty()) {
+ prepend_bucket_marker(bucket, loc, locator);
+ } else {
+ locator.clear();
+ }
+}
+
+int rgw_policy_from_attrset(CephContext *cct, map<string, bufferlist>& attrset, RGWAccessControlPolicy *policy);
+
+static inline bool rgw_raw_obj_to_obj(const rgw_bucket& bucket, const rgw_raw_obj& raw_obj, rgw_obj *obj)
+{
+ ssize_t pos = raw_obj.oid.find('_');
+ if (pos < 0) {
+ return false;
+ }
+
+ if (!rgw_obj_key::parse_raw_oid(raw_obj.oid.substr(pos + 1), &obj->key)) {
+ return false;
+ }
+ obj->bucket = bucket;
+
+ return true;
+}
+
+
+struct rgw_bucket_placement {
+ rgw_placement_rule placement_rule;
+ rgw_bucket bucket;
+
+ void dump(Formatter *f) const;
+};
+
+class rgw_obj_select {
+ rgw_placement_rule placement_rule;
+ rgw_obj obj;
+ rgw_raw_obj raw_obj;
+ bool is_raw;
+
+public:
+ rgw_obj_select() : is_raw(false) {}
+ explicit rgw_obj_select(const rgw_obj& _obj) : obj(_obj), is_raw(false) {}
+ explicit rgw_obj_select(const rgw_raw_obj& _raw_obj) : raw_obj(_raw_obj), is_raw(true) {}
+ rgw_obj_select(const rgw_obj_select& rhs) {
+ placement_rule = rhs.placement_rule;
+ is_raw = rhs.is_raw;
+ if (is_raw) {
+ raw_obj = rhs.raw_obj;
+ } else {
+ obj = rhs.obj;
+ }
+ }
+
+ rgw_raw_obj get_raw_obj(const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params) const;
+ rgw_raw_obj get_raw_obj(RGWRados *store) const;
+
+ rgw_obj_select& operator=(const rgw_obj& rhs) {
+ obj = rhs;
+ is_raw = false;
+ return *this;
+ }
+
+ rgw_obj_select& operator=(const rgw_raw_obj& rhs) {
+ raw_obj = rhs;
+ is_raw = true;
+ return *this;
+ }
+
+ void set_placement_rule(const rgw_placement_rule& rule) {
+ placement_rule = rule;
+ }
+ void dump(Formatter *f) const;
+};
+
+struct compression_block {
+ uint64_t old_ofs;
+ uint64_t new_ofs;
+ uint64_t len;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(old_ofs, bl);
+ encode(new_ofs, bl);
+ encode(len, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(old_ofs, bl);
+ decode(new_ofs, bl);
+ decode(len, bl);
+ DECODE_FINISH(bl);
+ }
+ void dump(Formatter *f) const;
+};
+WRITE_CLASS_ENCODER(compression_block)
+
+struct RGWCompressionInfo {
+ string compression_type;
+ uint64_t orig_size;
+ vector<compression_block> blocks;
+
+ RGWCompressionInfo() : compression_type("none"), orig_size(0) {}
+ RGWCompressionInfo(const RGWCompressionInfo& cs_info) : compression_type(cs_info.compression_type),
+ orig_size(cs_info.orig_size),
+ blocks(cs_info.blocks) {}
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(compression_type, bl);
+ encode(orig_size, bl);
+ encode(blocks, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(compression_type, bl);
+ decode(orig_size, bl);
+ decode(blocks, bl);
+ DECODE_FINISH(bl);
+ }
+ void dump(Formatter *f) const;
+};
+WRITE_CLASS_ENCODER(RGWCompressionInfo)
+
+int rgw_compression_info_from_attrset(map<string, bufferlist>& attrs, bool& need_decompress, RGWCompressionInfo& cs_info);
+
+struct RGWOLHInfo {
+ rgw_obj target;
+ bool removed;
+
+ RGWOLHInfo() : removed(false) {}
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(target, bl);
+ encode(removed, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(target, bl);
+ decode(removed, bl);
+ DECODE_FINISH(bl);
+ }
+ static void generate_test_instances(list<RGWOLHInfo*>& o);
+ void dump(Formatter *f) const;
+};
+WRITE_CLASS_ENCODER(RGWOLHInfo)
+
+struct RGWOLHPendingInfo {
+ ceph::real_time time;
+
+ RGWOLHPendingInfo() {}
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(time, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(time, bl);
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const;
+};
+WRITE_CLASS_ENCODER(RGWOLHPendingInfo)
+
+struct RGWUsageBatch {
+ map<ceph::real_time, rgw_usage_log_entry> m;
+
+ void insert(ceph::real_time& t, rgw_usage_log_entry& entry, bool *account) {
+ bool exists = m.find(t) != m.end();
+ *account = !exists;
+ m[t].aggregate(entry);
+ }
+};
+
+struct RGWUsageIter {
+ string read_iter;
+ uint32_t index;
+
+ RGWUsageIter() : index(0) {}
+};
+
+class RGWGetDataCB {
+public:
+ virtual int handle_data(bufferlist& bl, off_t bl_ofs, off_t bl_len) = 0;
+ RGWGetDataCB() {}
+ virtual ~RGWGetDataCB() {}
+};
+
+struct RGWCloneRangeInfo {
+ rgw_obj src;
+ off_t src_ofs;
+ off_t dst_ofs;
+ uint64_t len;
+};
+
+struct RGWObjManifestPart {
+ rgw_obj loc; /* the object where the data is located */
+ uint64_t loc_ofs; /* the offset at that object where the data is located */
+ uint64_t size; /* the part size */
+
+ RGWObjManifestPart() : loc_ofs(0), size(0) {}
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(2, 2, bl);
+ encode(loc, bl);
+ encode(loc_ofs, bl);
+ encode(size, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START_LEGACY_COMPAT_LEN_32(2, 2, 2, bl);
+ decode(loc, bl);
+ decode(loc_ofs, bl);
+ decode(size, bl);
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<RGWObjManifestPart*>& o);
+};
+WRITE_CLASS_ENCODER(RGWObjManifestPart)
+
+/*
+ The manifest defines a set of rules for structuring the object parts.
+ There are a few terms to note:
+ - head: the head part of the object, which is the part that contains
+ the first chunk of data. An object might not have a head (as in the
+ case of multipart-part objects).
+ - stripe: data portion of a single rgw object that resides on a single
+ rados object.
+ - part: a collection of stripes that make a contiguous part of an
+ object. A regular object will only have one part (although might have
+ many stripes), a multipart object might have many parts. Each part
+ has a fixed stripe size, although the last stripe of a part might
+ be smaller than that. Consecutive parts may be merged if their stripe
+ value is the same.
+*/
+
+struct RGWObjManifestRule {
+ uint32_t start_part_num;
+ uint64_t start_ofs;
+ uint64_t part_size; /* each part size, 0 if there's no part size, meaning it's unlimited */
+ uint64_t stripe_max_size; /* underlying obj max size */
+ string override_prefix;
+
+ RGWObjManifestRule() : start_part_num(0), start_ofs(0), part_size(0), stripe_max_size(0) {}
+ RGWObjManifestRule(uint32_t _start_part_num, uint64_t _start_ofs, uint64_t _part_size, uint64_t _stripe_max_size) :
+ start_part_num(_start_part_num), start_ofs(_start_ofs), part_size(_part_size), stripe_max_size(_stripe_max_size) {}
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(2, 1, bl);
+ encode(start_part_num, bl);
+ encode(start_ofs, bl);
+ encode(part_size, bl);
+ encode(stripe_max_size, bl);
+ encode(override_prefix, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(2, bl);
+ decode(start_part_num, bl);
+ decode(start_ofs, bl);
+ decode(part_size, bl);
+ decode(stripe_max_size, bl);
+ if (struct_v >= 2)
+ decode(override_prefix, bl);
+ DECODE_FINISH(bl);
+ }
+ void dump(Formatter *f) const;
+};
+WRITE_CLASS_ENCODER(RGWObjManifestRule)
+
+class RGWObjManifest {
+protected:
+ bool explicit_objs; /* old manifest? */
+ map<uint64_t, RGWObjManifestPart> objs;
+
+ uint64_t obj_size;
+
+ rgw_obj obj;
+ uint64_t head_size;
+ rgw_placement_rule head_placement_rule;
+
+ uint64_t max_head_size;
+ string prefix;
+ rgw_bucket_placement tail_placement; /* might be different than the original bucket,
+ as object might have been copied across pools */
+ map<uint64_t, RGWObjManifestRule> rules;
+
+ string tail_instance; /* tail object's instance */
+
+ void convert_to_explicit(const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params);
+ int append_explicit(RGWObjManifest& m, const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params);
+ void append_rules(RGWObjManifest& m, map<uint64_t, RGWObjManifestRule>::iterator& iter, string *override_prefix);
+
+ void update_iterators() {
+ begin_iter.seek(0);
+ end_iter.seek(obj_size);
+ }
+public:
+
+ RGWObjManifest() : explicit_objs(false), obj_size(0), head_size(0), max_head_size(0),
+ begin_iter(this), end_iter(this) {}
+ RGWObjManifest(const RGWObjManifest& rhs) {
+ *this = rhs;
+ }
+ RGWObjManifest& operator=(const RGWObjManifest& rhs) {
+ explicit_objs = rhs.explicit_objs;
+ objs = rhs.objs;
+ obj_size = rhs.obj_size;
+ obj = rhs.obj;
+ head_size = rhs.head_size;
+ max_head_size = rhs.max_head_size;
+ prefix = rhs.prefix;
+ tail_placement = rhs.tail_placement;
+ rules = rhs.rules;
+ tail_instance = rhs.tail_instance;
+
+ begin_iter.set_manifest(this);
+ end_iter.set_manifest(this);
+
+ begin_iter.seek(rhs.begin_iter.get_ofs());
+ end_iter.seek(rhs.end_iter.get_ofs());
+
+ return *this;
+ }
+
+ map<uint64_t, RGWObjManifestPart>& get_explicit_objs() {
+ return objs;
+ }
+
+
+ void set_explicit(uint64_t _size, map<uint64_t, RGWObjManifestPart>& _objs) {
+ explicit_objs = true;
+ obj_size = _size;
+ objs.swap(_objs);
+ }
+
+ void get_implicit_location(uint64_t cur_part_id, uint64_t cur_stripe, uint64_t ofs, string *override_prefix, rgw_obj_select *location);
+
+ void set_trivial_rule(uint64_t tail_ofs, uint64_t stripe_max_size) {
+ RGWObjManifestRule rule(0, tail_ofs, 0, stripe_max_size);
+ rules[0] = rule;
+ max_head_size = tail_ofs;
+ }
+
+ void set_multipart_part_rule(uint64_t stripe_max_size, uint64_t part_num) {
+ RGWObjManifestRule rule(0, 0, 0, stripe_max_size);
+ rule.start_part_num = part_num;
+ rules[0] = rule;
+ max_head_size = 0;
+ }
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(7, 6, bl);
+ encode(obj_size, bl);
+ encode(objs, bl);
+ encode(explicit_objs, bl);
+ encode(obj, bl);
+ encode(head_size, bl);
+ encode(max_head_size, bl);
+ encode(prefix, bl);
+ encode(rules, bl);
+ bool encode_tail_bucket = !(tail_placement.bucket == obj.bucket);
+ encode(encode_tail_bucket, bl);
+ if (encode_tail_bucket) {
+ encode(tail_placement.bucket, bl);
+ }
+ bool encode_tail_instance = (tail_instance != obj.key.instance);
+ encode(encode_tail_instance, bl);
+ if (encode_tail_instance) {
+ encode(tail_instance, bl);
+ }
+ encode(head_placement_rule, bl);
+ encode(tail_placement.placement_rule, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START_LEGACY_COMPAT_LEN_32(7, 2, 2, bl);
+ decode(obj_size, bl);
+ decode(objs, bl);
+ if (struct_v >= 3) {
+ decode(explicit_objs, bl);
+ decode(obj, bl);
+ decode(head_size, bl);
+ decode(max_head_size, bl);
+ decode(prefix, bl);
+ decode(rules, bl);
+ } else {
+ explicit_objs = true;
+ if (!objs.empty()) {
+ map<uint64_t, RGWObjManifestPart>::iterator iter = objs.begin();
+ obj = iter->second.loc;
+ head_size = iter->second.size;
+ max_head_size = head_size;
+ }
+ }
+
+ if (explicit_objs && head_size > 0 && !objs.empty()) {
+ /* patch up manifest due to issue 16435:
+ * the first object in the explicit objs list might not be the one we need to access, use the
+ * head object instead if set. This would happen if we had an old object that was created
+ * when the explicit objs manifest was around, and it got copied.
+ */
+ rgw_obj& obj_0 = objs[0].loc;
+ if (!obj_0.get_oid().empty() && obj_0.key.ns.empty()) {
+ objs[0].loc = obj;
+ objs[0].size = head_size;
+ }
+ }
+
+ if (struct_v >= 4) {
+ if (struct_v < 6) {
+ decode(tail_placement.bucket, bl);
+ } else {
+ bool need_to_decode;
+ decode(need_to_decode, bl);
+ if (need_to_decode) {
+ decode(tail_placement.bucket, bl);
+ } else {
+ tail_placement.bucket = obj.bucket;
+ }
+ }
+ }
+
+ if (struct_v >= 5) {
+ if (struct_v < 6) {
+ decode(tail_instance, bl);
+ } else {
+ bool need_to_decode;
+ decode(need_to_decode, bl);
+ if (need_to_decode) {
+ decode(tail_instance, bl);
+ } else {
+ tail_instance = obj.key.instance;
+ }
+ }
+ } else { // old object created before 'tail_instance' field added to manifest
+ tail_instance = obj.key.instance;
+ }
+
+ if (struct_v >= 7) {
+ decode(head_placement_rule, bl);
+ decode(tail_placement.placement_rule, bl);
+ }
+
+ update_iterators();
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<RGWObjManifest*>& o);
+
+ int append(RGWObjManifest& m, const RGWZoneGroup& zonegroup,
+ const RGWZoneParams& zone_params);
+ int append(RGWObjManifest& m, RGWSI_Zone *zone_svc);
+
+ bool get_rule(uint64_t ofs, RGWObjManifestRule *rule);
+
+ bool empty() {
+ if (explicit_objs)
+ return objs.empty();
+ return rules.empty();
+ }
+
+ bool has_explicit_objs() {
+ return explicit_objs;
+ }
+
+ bool has_tail() {
+ if (explicit_objs) {
+ if (objs.size() == 1) {
+ map<uint64_t, RGWObjManifestPart>::iterator iter = objs.begin();
+ rgw_obj& o = iter->second.loc;
+ return !(obj == o);
+ }
+ return (objs.size() >= 2);
+ }
+ return (obj_size > head_size);
+ }
+
+ void set_head(const rgw_placement_rule& placement_rule, const rgw_obj& _o, uint64_t _s) {
+ head_placement_rule = placement_rule;
+ obj = _o;
+ head_size = _s;
+
+ if (explicit_objs && head_size > 0) {
+ objs[0].loc = obj;
+ objs[0].size = head_size;
+ }
+ }
+
+ const rgw_obj& get_obj() {
+ return obj;
+ }
+
+ void set_tail_placement(const rgw_placement_rule& placement_rule, const rgw_bucket& _b) {
+ tail_placement.placement_rule = placement_rule;
+ tail_placement.bucket = _b;
+ }
+
+ const rgw_bucket_placement& get_tail_placement() {
+ return tail_placement;
+ }
+
+ const rgw_placement_rule& get_head_placement_rule() {
+ return head_placement_rule;
+ }
+
+ void set_prefix(const string& _p) {
+ prefix = _p;
+ }
+
+ const string& get_prefix() {
+ return prefix;
+ }
+
+ void set_tail_instance(const string& _ti) {
+ tail_instance = _ti;
+ }
+
+ const string& get_tail_instance() {
+ return tail_instance;
+ }
+
+ void set_head_size(uint64_t _s) {
+ head_size = _s;
+ }
+
+ void set_obj_size(uint64_t s) {
+ obj_size = s;
+
+ update_iterators();
+ }
+
+ uint64_t get_obj_size() {
+ return obj_size;
+ }
+
+ uint64_t get_head_size() {
+ return head_size;
+ }
+
+ uint64_t get_max_head_size() {
+ return max_head_size;
+ }
+
+ class obj_iterator {
+ RGWObjManifest *manifest;
+ uint64_t part_ofs; /* where current part starts */
+ uint64_t stripe_ofs; /* where current stripe starts */
+ uint64_t ofs; /* current position within the object */
+ uint64_t stripe_size; /* current part size */
+
+ int cur_part_id;
+ int cur_stripe;
+ string cur_override_prefix;
+
+ rgw_obj_select location;
+
+ map<uint64_t, RGWObjManifestRule>::iterator rule_iter;
+ map<uint64_t, RGWObjManifestRule>::iterator next_rule_iter;
+
+ map<uint64_t, RGWObjManifestPart>::iterator explicit_iter;
+
+ void init() {
+ part_ofs = 0;
+ stripe_ofs = 0;
+ ofs = 0;
+ stripe_size = 0;
+ cur_part_id = 0;
+ cur_stripe = 0;
+ }
+
+ void update_explicit_pos();
+
+
+ protected:
+
+ void set_manifest(RGWObjManifest *m) {
+ manifest = m;
+ }
+
+ public:
+ obj_iterator() : manifest(NULL) {
+ init();
+ }
+ explicit obj_iterator(RGWObjManifest *_m) : manifest(_m) {
+ init();
+ if (!manifest->empty()) {
+ seek(0);
+ }
+ }
+ obj_iterator(RGWObjManifest *_m, uint64_t _ofs) : manifest(_m) {
+ init();
+ if (!manifest->empty()) {
+ seek(_ofs);
+ }
+ }
+ void seek(uint64_t ofs);
+
+ void operator++();
+ bool operator==(const obj_iterator& rhs) const {
+ return (ofs == rhs.ofs);
+ }
+ bool operator!=(const obj_iterator& rhs) const {
+ return (ofs != rhs.ofs);
+ }
+ const rgw_obj_select& get_location() const {
+ return location;
+ }
+
+ /* where current part starts */
+ uint64_t get_part_ofs() const {
+ return part_ofs;
+ }
+
+ /* start of current stripe */
+ uint64_t get_stripe_ofs() const {
+ if (manifest->explicit_objs) {
+ return explicit_iter->first;
+ }
+ return stripe_ofs;
+ }
+
+ /* current ofs relative to start of rgw object */
+ uint64_t get_ofs() const {
+ return ofs;
+ }
+
+ /* stripe number */
+ int get_cur_stripe() const {
+ return cur_stripe;
+ }
+
+ /* current stripe size */
+ uint64_t get_stripe_size() const {
+ if (manifest->explicit_objs) {
+ return explicit_iter->second.size;
+ }
+ return stripe_size;
+ }
+
+ /* offset where data starts within current stripe */
+ uint64_t location_ofs() const {
+ if (manifest->explicit_objs) {
+ return explicit_iter->second.loc_ofs;
+ }
+ return 0; /* all stripes start at zero offset */
+ }
+
+ void update_location();
+
+ friend class RGWObjManifest;
+ void dump(Formatter *f) const;
+ };
+
+ const obj_iterator& obj_begin();
+ const obj_iterator& obj_end();
+ obj_iterator obj_find(uint64_t ofs);
+
+ obj_iterator begin_iter;
+ obj_iterator end_iter;
+
+ /*
+ * simple object generator. Using a simple single rule manifest.
+ */
+ class generator {
+ RGWObjManifest *manifest;
+ uint64_t last_ofs;
+ uint64_t cur_part_ofs;
+ int cur_part_id;
+ int cur_stripe;
+ uint64_t cur_stripe_size;
+ string cur_oid;
+
+ string oid_prefix;
+
+ rgw_obj_select cur_obj;
+
+ RGWObjManifestRule rule;
+
+ public:
+ generator() : manifest(NULL), last_ofs(0), cur_part_ofs(0), cur_part_id(0),
+ cur_stripe(0), cur_stripe_size(0) {}
+ int create_begin(CephContext *cct, RGWObjManifest *manifest,
+ const rgw_placement_rule& head_placement_rule,
+ const rgw_placement_rule *tail_placement_rule,
+ const rgw_bucket& bucket,
+ const rgw_obj& obj);
+
+ int create_next(uint64_t ofs);
+
+ rgw_raw_obj get_cur_obj(RGWZoneGroup& zonegroup, RGWZoneParams& zone_params) { return cur_obj.get_raw_obj(zonegroup, zone_params); }
+ rgw_raw_obj get_cur_obj(RGWRados *store) const { return cur_obj.get_raw_obj(store); }
+
+ /* total max size of current stripe (including head obj) */
+ uint64_t cur_stripe_max_size() const {
+ return cur_stripe_size;
+ }
+ };
+};
+WRITE_CLASS_ENCODER(RGWObjManifest)
+
+struct RGWUploadPartInfo {
+ uint32_t num;
+ uint64_t size;
+ uint64_t accounted_size{0};
+ string etag;
+ ceph::real_time modified;
+ RGWObjManifest manifest;
+ RGWCompressionInfo cs_info;
+
+ RGWUploadPartInfo() : num(0), size(0) {}
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(4, 2, bl);
+ encode(num, bl);
+ encode(size, bl);
+ encode(etag, bl);
+ encode(modified, bl);
+ encode(manifest, bl);
+ encode(cs_info, bl);
+ encode(accounted_size, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START_LEGACY_COMPAT_LEN(4, 2, 2, bl);
+ decode(num, bl);
+ decode(size, bl);
+ decode(etag, bl);
+ decode(modified, bl);
+ if (struct_v >= 3)
+ decode(manifest, bl);
+ if (struct_v >= 4) {
+ decode(cs_info, bl);
+ decode(accounted_size, bl);
+ } else {
+ accounted_size = size;
+ }
+ DECODE_FINISH(bl);
+ }
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<RGWUploadPartInfo*>& o);
+};
+WRITE_CLASS_ENCODER(RGWUploadPartInfo)
+
+struct RGWObjState {
+ rgw_obj obj;
+ bool is_atomic;
+ bool has_attrs;
+ bool exists;
+ uint64_t size; //< size of raw object
+ uint64_t accounted_size{0}; //< size before compression, encryption
+ ceph::real_time mtime;
+ uint64_t epoch;
+ bufferlist obj_tag;
+ bufferlist tail_tag;
+ string write_tag;
+ bool fake_tag;
+ RGWObjManifest manifest;
+ bool has_manifest;
+ string shadow_obj;
+ bool has_data;
+ bufferlist data;
+ bool prefetch_data;
+ bool keep_tail;
+ bool is_olh;
+ bufferlist olh_tag;
+ uint64_t pg_ver;
+ uint32_t zone_short_id;
+
+ /* important! don't forget to update copy constructor */
+
+ RGWObjVersionTracker objv_tracker;
+
+ map<string, bufferlist> attrset;
+ RGWObjState() : is_atomic(false), has_attrs(0), exists(false),
+ size(0), epoch(0), fake_tag(false), has_manifest(false),
+ has_data(false), prefetch_data(false), keep_tail(false), is_olh(false),
+ pg_ver(0), zone_short_id(0) {}
+ RGWObjState(const RGWObjState& rhs) : obj (rhs.obj) {
+ is_atomic = rhs.is_atomic;
+ has_attrs = rhs.has_attrs;
+ exists = rhs.exists;
+ size = rhs.size;
+ accounted_size = rhs.accounted_size;
+ mtime = rhs.mtime;
+ epoch = rhs.epoch;
+ if (rhs.obj_tag.length()) {
+ obj_tag = rhs.obj_tag;
+ }
+ if (rhs.tail_tag.length()) {
+ tail_tag = rhs.tail_tag;
+ }
+ write_tag = rhs.write_tag;
+ fake_tag = rhs.fake_tag;
+ if (rhs.has_manifest) {
+ manifest = rhs.manifest;
+ }
+ has_manifest = rhs.has_manifest;
+ shadow_obj = rhs.shadow_obj;
+ has_data = rhs.has_data;
+ if (rhs.data.length()) {
+ data = rhs.data;
+ }
+ prefetch_data = rhs.prefetch_data;
+ keep_tail = rhs.keep_tail;
+ is_olh = rhs.is_olh;
+ objv_tracker = rhs.objv_tracker;
+ pg_ver = rhs.pg_ver;
+ }
+
+ bool get_attr(string name, bufferlist& dest) {
+ map<string, bufferlist>::iterator iter = attrset.find(name);
+ if (iter != attrset.end()) {
+ dest = iter->second;
+ return true;
+ }
+ return false;
+ }
+};
+
+struct RGWRawObjState {
+ rgw_raw_obj obj;
+ bool has_attrs{false};
+ bool exists{false};
+ uint64_t size{0};
+ ceph::real_time mtime;
+ uint64_t epoch{0};
+ bufferlist obj_tag;
+ bool has_data{false};
+ bufferlist data;
+ bool prefetch_data{false};
+ uint64_t pg_ver{0};
+
+ /* important! don't forget to update copy constructor */
+
+ RGWObjVersionTracker objv_tracker;
+
+ map<string, bufferlist> attrset;
+ RGWRawObjState() {}
+ RGWRawObjState(const RGWRawObjState& rhs) : obj (rhs.obj) {
+ has_attrs = rhs.has_attrs;
+ exists = rhs.exists;
+ size = rhs.size;
+ mtime = rhs.mtime;
+ epoch = rhs.epoch;
+ if (rhs.obj_tag.length()) {
+ obj_tag = rhs.obj_tag;
+ }
+ has_data = rhs.has_data;
+ if (rhs.data.length()) {
+ data = rhs.data;
+ }
+ prefetch_data = rhs.prefetch_data;
+ pg_ver = rhs.pg_ver;
+ objv_tracker = rhs.objv_tracker;
+ }
+};
+
+struct RGWPoolIterCtx {
+ librados::IoCtx io_ctx;
+ librados::NObjectIterator iter;
+};
+
+struct RGWListRawObjsCtx {
+ bool initialized;
+ RGWPoolIterCtx iter_ctx;
+
+ RGWListRawObjsCtx() : initialized(false) {}
+};
+
+struct objexp_hint_entry {
+ string tenant;
+ string bucket_name;
+ string bucket_id;
+ rgw_obj_key obj_key;
+ ceph::real_time exp_time;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(2, 1, bl);
+ encode(bucket_name, bl);
+ encode(bucket_id, bl);
+ encode(obj_key, bl);
+ encode(exp_time, bl);
+ encode(tenant, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ // XXX Do we want DECODE_START_LEGACY_COMPAT_LEN(2, 1, 1, bl); ?
+ DECODE_START(2, bl);
+ decode(bucket_name, bl);
+ decode(bucket_id, bl);
+ decode(obj_key, bl);
+ decode(exp_time, bl);
+ if (struct_v >= 2) {
+ decode(tenant, bl);
+ } else {
+ tenant.clear();
+ }
+ DECODE_FINISH(bl);
+ }
+};
+WRITE_CLASS_ENCODER(objexp_hint_entry)
+
+class RGWDataChangesLog;
+class RGWMetaSyncStatusManager;
+class RGWDataSyncStatusManager;
+class RGWCoroutinesManagerRegistry;
+
+class RGWGetBucketStats_CB : public RefCountedObject {
+protected:
+ rgw_bucket bucket;
+ map<RGWObjCategory, RGWStorageStats> *stats;
+public:
+ explicit RGWGetBucketStats_CB(const rgw_bucket& _bucket) : bucket(_bucket), stats(NULL) {}
+ ~RGWGetBucketStats_CB() override {}
+ virtual void handle_response(int r) = 0;
+ virtual void set_response(map<RGWObjCategory, RGWStorageStats> *_stats) {
+ stats = _stats;
+ }
+};
+
+class RGWGetUserStats_CB : public RefCountedObject {
+protected:
+ rgw_user user;
+ RGWStorageStats stats;
+public:
+ explicit RGWGetUserStats_CB(const rgw_user& _user) : user(_user) {}
+ ~RGWGetUserStats_CB() override {}
+ virtual void handle_response(int r) = 0;
+ virtual void set_response(RGWStorageStats& _stats) {
+ stats = _stats;
+ }
+};
+
+class RGWGetDirHeader_CB;
+class RGWGetUserHeader_CB;
+
+class RGWObjectCtx {
+ RGWRados *store;
+ RWLock lock{"RGWObjectCtx"};
+ void *s{nullptr};
+
+ std::map<rgw_obj, RGWObjState> objs_state;
+public:
+ explicit RGWObjectCtx(RGWRados *_store) : store(_store) {}
+ explicit RGWObjectCtx(RGWRados *_store, void *_s) : store(_store), s(_s) {}
+
+ void *get_private() {
+ return s;
+ }
+
+ RGWRados *get_store() {
+ return store;
+ }
+
+ RGWObjState *get_state(const rgw_obj& obj) {
+ RGWObjState *result;
+ typename std::map<rgw_obj, RGWObjState>::iterator iter;
+ lock.get_read();
+ assert (!obj.empty());
+ iter = objs_state.find(obj);
+ if (iter != objs_state.end()) {
+ result = &iter->second;
+ lock.unlock();
+ } else {
+ lock.unlock();
+ lock.get_write();
+ result = &objs_state[obj];
+ lock.unlock();
+ }
+ return result;
+ }
+
+ void set_atomic(rgw_obj& obj) {
+ RWLock::WLocker wl(lock);
+ assert (!obj.empty());
+ objs_state[obj].is_atomic = true;
+ }
+ void set_prefetch_data(const rgw_obj& obj) {
+ RWLock::WLocker wl(lock);
+ assert (!obj.empty());
+ objs_state[obj].prefetch_data = true;
+ }
+
+ void invalidate(const rgw_obj& obj) {
+ RWLock::WLocker wl(lock);
+ auto iter = objs_state.find(obj);
+ if (iter == objs_state.end()) {
+ return;
+ }
+ bool is_atomic = iter->second.is_atomic;
+ bool prefetch_data = iter->second.prefetch_data;
+
+ objs_state.erase(iter);
+
+ if (is_atomic || prefetch_data) {
+ auto& state = objs_state[obj];
+ state.is_atomic = is_atomic;
+ state.prefetch_data = prefetch_data;
+ }
+ }
+};
+
+class RGWAsyncRadosProcessor;
+
+template <class T>
+class RGWChainedCacheImpl;
+
+struct bucket_info_entry {
+ RGWBucketInfo info;
+ real_time mtime;
+ map<string, bufferlist> attrs;
+};
+
+struct tombstone_entry {
+ ceph::real_time mtime;
+ uint32_t zone_short_id;
+ uint64_t pg_ver;
+
+ tombstone_entry() = default;
+ explicit tombstone_entry(const RGWObjState& state)
+ : mtime(state.mtime), zone_short_id(state.zone_short_id),
+ pg_ver(state.pg_ver) {}
+};
+
+class RGWIndexCompletionManager;
+
+class RGWRados : public AdminSocketHook
+{
+ friend class RGWGC;
+ friend class RGWMetaNotifier;
+ friend class RGWDataNotifier;
+ friend class RGWLC;
+ friend class RGWObjectExpirer;
+ friend class RGWMetaSyncProcessorThread;
+ friend class RGWDataSyncProcessorThread;
+ friend class RGWReshard;
+ friend class RGWBucketReshard;
+ friend class RGWBucketReshardLock;
+ friend class BucketIndexLockGuard;
+ friend class RGWCompleteMultipart;
+
+ static constexpr const char* admin_commands[4][3] = {
+ { "cache list",
+ "cache list name=filter,type=CephString,req=false",
+ "cache list [filter_str]: list object cache, possibly matching substrings" },
+ { "cache inspect",
+ "cache inspect name=target,type=CephString,req=true",
+ "cache inspect target: print cache element" },
+ { "cache erase",
+ "cache erase name=target,type=CephString,req=true",
+ "cache erase target: erase element from cache" },
+ { "cache zap",
+ "cache zap",
+ "cache zap: erase all elements from cache" }
+ };
+
+ /** Open the pool used as root for this gateway */
+ int open_root_pool_ctx();
+ int open_gc_pool_ctx();
+ int open_lc_pool_ctx();
+ int open_objexp_pool_ctx();
+ int open_reshard_pool_ctx();
+
+ int open_pool_ctx(const rgw_pool& pool, librados::IoCtx& io_ctx,
+ bool mostly_omap);
+ int open_bucket_index_ctx(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx);
+ int open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx, string& bucket_oid);
+ int open_bucket_index_base(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
+ string& bucket_oid_base);
+ int open_bucket_index_shard(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
+ const string& obj_key, string *bucket_obj, int *shard_id);
+ int open_bucket_index_shard(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
+ int shard_id, string *bucket_obj);
+ int open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
+ map<int, string>& bucket_objs, int shard_id = -1, map<int, string> *bucket_instance_ids = NULL);
+ template<typename T>
+ int open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
+ map<int, string>& oids, map<int, T>& bucket_objs,
+ int shard_id = -1, map<int, string> *bucket_instance_ids = NULL);
+ void build_bucket_index_marker(const string& shard_id_str, const string& shard_marker,
+ string *marker);
+
+ void get_bucket_instance_ids(const RGWBucketInfo& bucket_info, int shard_id, map<int, string> *result);
+
+ std::atomic<int64_t> max_req_id = { 0 };
+ Mutex lock;
+ SafeTimer *timer;
+
+ RGWGC *gc;
+ RGWLC *lc;
+ RGWObjectExpirer *obj_expirer;
+ bool use_gc_thread;
+ bool use_lc_thread;
+ bool quota_threads;
+ bool run_sync_thread;
+ bool run_reshard_thread;
+
+ RGWAsyncRadosProcessor* async_rados;
+
+ RGWMetaNotifier *meta_notifier;
+ RGWDataNotifier *data_notifier;
+ RGWMetaSyncProcessorThread *meta_sync_processor_thread;
+ RGWSyncTraceManager *sync_tracer = nullptr;
+ map<string, RGWDataSyncProcessorThread *> data_sync_processor_threads;
+
+ boost::optional<rgw::BucketTrimManager> bucket_trim;
+ RGWSyncLogTrimThread *sync_log_trimmer{nullptr};
+
+ Mutex meta_sync_thread_lock;
+ Mutex data_sync_thread_lock;
+
+ librados::IoCtx root_pool_ctx; // .rgw
+
+ double inject_notify_timeout_probability = 0;
+ unsigned max_notify_retries = 0;
+
+ friend class RGWWatcher;
+
+ Mutex bucket_id_lock;
+
+ // This field represents the number of bucket index object shards
+ uint32_t bucket_index_max_shards;
+
+ int get_obj_head_ioctx(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::IoCtx *ioctx);
+ int get_obj_head_ref(const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_rados_ref *ref);
+ int get_system_obj_ref(const rgw_raw_obj& obj, rgw_rados_ref *ref);
+ uint64_t max_bucket_id;
+
+ int get_olh_target_state(RGWObjectCtx& rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
+ RGWObjState *olh_state, RGWObjState **target_state);
+ int get_obj_state_impl(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state,
+ bool follow_olh, bool assume_noent = false);
+ int append_atomic_test(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
+ librados::ObjectOperation& op, RGWObjState **state);
+ int append_atomic_test(const RGWObjState* astate, librados::ObjectOperation& op);
+
+ int update_placement_map();
+ int store_bucket_info(RGWBucketInfo& info, map<string, bufferlist> *pattrs, RGWObjVersionTracker *objv_tracker, bool exclusive);
+
+ void remove_rgw_head_obj(librados::ObjectWriteOperation& op);
+ void cls_obj_check_prefix_exist(librados::ObjectOperation& op, const string& prefix, bool fail_if_exist);
+ void cls_obj_check_mtime(librados::ObjectOperation& op, const real_time& mtime, bool high_precision_time, RGWCheckMTimeType type);
+protected:
+ CephContext *cct;
+
+ librados::Rados rados;
+
+ using RGWChainedCacheImpl_bucket_info_entry = RGWChainedCacheImpl<bucket_info_entry>;
+ RGWChainedCacheImpl_bucket_info_entry *binfo_cache;
+
+ using tombstone_cache_t = lru_map<rgw_obj, tombstone_entry>;
+ tombstone_cache_t *obj_tombstone_cache;
+
+ librados::IoCtx gc_pool_ctx; // .rgw.gc
+ librados::IoCtx lc_pool_ctx; // .rgw.lc
+ librados::IoCtx objexp_pool_ctx;
+ librados::IoCtx reshard_pool_ctx;
+
+ bool pools_initialized;
+
+ RGWQuotaHandler *quota_handler;
+
+ RGWCoroutinesManagerRegistry *cr_registry;
+
+ RGWSyncModuleInstanceRef sync_module;
+ bool writeable_zone{false};
+
+ RGWIndexCompletionManager *index_completion_manager{nullptr};
+
+ bool use_cache{false};
+public:
+ RGWRados(): lock("rados_timer_lock"), timer(NULL),
+ gc(NULL), lc(NULL), obj_expirer(NULL), use_gc_thread(false), use_lc_thread(false), quota_threads(false),
+ run_sync_thread(false), run_reshard_thread(false), async_rados(nullptr), meta_notifier(NULL),
+ data_notifier(NULL), meta_sync_processor_thread(NULL),
+ meta_sync_thread_lock("meta_sync_thread_lock"), data_sync_thread_lock("data_sync_thread_lock"),
+ bucket_id_lock("rados_bucket_id"),
+ bucket_index_max_shards(0),
+ max_bucket_id(0), cct(NULL),
+ binfo_cache(NULL), obj_tombstone_cache(nullptr),
+ pools_initialized(false),
+ quota_handler(NULL),
+ cr_registry(NULL),
+ meta_mgr(NULL), data_log(NULL), reshard(NULL) {}
+
+ RGWRados& set_use_cache(bool status) {
+ use_cache = status;
+ return *this;
+ }
+
+ RGWLC *get_lc() {
+ return lc;
+ }
+
+ RGWRados& set_run_gc_thread(bool _use_gc_thread) {
+ use_gc_thread = _use_gc_thread;
+ return *this;
+ }
+
+ RGWRados& set_run_lc_thread(bool _use_lc_thread) {
+ use_lc_thread = _use_lc_thread;
+ return *this;
+ }
+
+ RGWRados& set_run_quota_threads(bool _run_quota_threads) {
+ quota_threads = _run_quota_threads;
+ return *this;
+ }
+
+ RGWRados& set_run_sync_thread(bool _run_sync_thread) {
+ run_sync_thread = _run_sync_thread;
+ return *this;
+ }
+
+ RGWRados& set_run_reshard_thread(bool _run_reshard_thread) {
+ run_reshard_thread = _run_reshard_thread;
+ return *this;
+ }
+
+ uint64_t get_new_req_id() {
+ return ++max_req_id;
+ }
+
+ librados::IoCtx* get_lc_pool_ctx() {
+ return &lc_pool_ctx;
+ }
+ void set_context(CephContext *_cct) {
+ cct = _cct;
+ }
+
+ RGWServices svc;
+
+ /**
+ * AmazonS3 errors contain a HostId string, but is an opaque base64 blob; we
+ * try to be more transparent. This has a wrapper so we can update it when zonegroup/zone are changed.
+ */
+ string host_id;
+
+ // pulls missing periods for period_history
+ std::unique_ptr<RGWPeriodPuller> period_puller;
+ // maintains a connected history of periods
+ std::unique_ptr<RGWPeriodHistory> period_history;
+
+ RGWAsyncRadosProcessor* get_async_rados() const { return async_rados; };
+
+ RGWMetadataManager *meta_mgr;
+
+ RGWDataChangesLog *data_log;
+
+ RGWReshard *reshard;
+ std::shared_ptr<RGWReshardWait> reshard_wait;
+
+ virtual ~RGWRados() = default;
+
+ tombstone_cache_t *get_tombstone_cache() {
+ return obj_tombstone_cache;
+ }
+ const RGWSyncModuleInstanceRef& get_sync_module() {
+ return sync_module;
+ }
+ RGWSyncTraceManager *get_sync_tracer() {
+ return sync_tracer;
+ }
+
+ int get_required_alignment(const rgw_pool& pool, uint64_t *alignment);
+ void get_max_aligned_size(uint64_t size, uint64_t alignment, uint64_t *max_size);
+ int get_max_chunk_size(const rgw_pool& pool, uint64_t *max_chunk_size, uint64_t *palignment = nullptr);
+ int get_max_chunk_size(const rgw_placement_rule& placement_rule, const rgw_obj& obj, uint64_t *max_chunk_size, uint64_t *palignment = nullptr);
+
+ uint32_t get_max_bucket_shards() {
+ return rgw_shards_max();
+ }
+
+
+ int get_raw_obj_ref(const rgw_raw_obj& obj, rgw_rados_ref *ref);
+
+ int list_raw_objects_init(const rgw_pool& pool, const string& marker, RGWListRawObjsCtx *ctx);
+ int list_raw_objects_next(const string& prefix_filter, int max,
+ RGWListRawObjsCtx& ctx, list<string>& oids,
+ bool *is_truncated);
+ int list_raw_objects(const rgw_pool& pool, const string& prefix_filter, int max,
+ RGWListRawObjsCtx& ctx, list<string>& oids,
+ bool *is_truncated);
+ string list_raw_objs_get_cursor(RGWListRawObjsCtx& ctx);
+
+ CephContext *ctx() { return cct; }
+ /** do all necessary setup of the storage device */
+ int initialize(CephContext *_cct) {
+ set_context(_cct);
+ return initialize();
+ }
+ /** Initialize the RADOS instance and prepare to do other ops */
+ int init_svc(bool raw);
+ int init_rados();
+ int init_complete();
+ int initialize();
+ void finalize();
+
+ int register_to_service_map(const string& daemon_type, const map<string, string>& meta);
+ int update_service_map(std::map<std::string, std::string>&& status);
+
+ /// list logs
+ int log_list_init(const string& prefix, RGWAccessHandle *handle);
+ int log_list_next(RGWAccessHandle handle, string *name);
+
+ /// remove log
+ int log_remove(const string& name);
+
+ /// show log
+ int log_show_init(const string& name, RGWAccessHandle *handle);
+ int log_show_next(RGWAccessHandle handle, rgw_log_entry *entry);
+
+ // log bandwidth info
+ int log_usage(map<rgw_user_bucket, RGWUsageBatch>& usage_info);
+ int read_usage(const rgw_user& user, const string& bucket_name, uint64_t start_epoch, uint64_t end_epoch,
+ uint32_t max_entries, bool *is_truncated, RGWUsageIter& read_iter, map<rgw_user_bucket,
+ rgw_usage_log_entry>& usage);
+ int trim_usage(const rgw_user& user, const string& bucket_name, uint64_t start_epoch, uint64_t end_epoch);
+ int clear_usage();
+
+ int create_pool(const rgw_pool& pool);
+
+ int init_bucket_index(RGWBucketInfo& bucket_info, int num_shards);
+ int clean_bucket_index(RGWBucketInfo& bucket_info, int num_shards);
+ void create_bucket_id(string *bucket_id);
+
+ bool get_obj_data_pool(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_pool *pool);
+ bool obj_to_raw(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_raw_obj *raw_obj);
+
+ int create_bucket(const RGWUserInfo& owner, rgw_bucket& bucket,
+ const string& zonegroup_id,
+ const rgw_placement_rule& placement_rule,
+ const string& swift_ver_location,
+ const RGWQuotaInfo * pquota_info,
+ map<std::string,bufferlist>& attrs,
+ RGWBucketInfo& bucket_info,
+ obj_version *pobjv,
+ obj_version *pep_objv,
+ ceph::real_time creation_time,
+ rgw_bucket *master_bucket,
+ uint32_t *master_num_shards,
+ bool exclusive = true);
+
+ RGWCoroutinesManagerRegistry *get_cr_registry() { return cr_registry; }
+
+ struct BucketShard {
+ RGWRados *store;
+ rgw_bucket bucket;
+ int shard_id;
+ librados::IoCtx index_ctx;
+ string bucket_obj;
+
+ explicit BucketShard(RGWRados *_store) : store(_store), shard_id(-1) {}
+ int init(const rgw_bucket& _bucket, const rgw_obj& obj, RGWBucketInfo* out);
+ int init(const rgw_bucket& _bucket, int sid, RGWBucketInfo* out);
+ int init(const RGWBucketInfo& bucket_info, const rgw_obj& obj);
+ int init(const RGWBucketInfo& bucket_info, int sid);
+ };
+
+ class Object {
+ RGWRados *store;
+ RGWBucketInfo bucket_info;
+ RGWObjectCtx& ctx;
+ rgw_obj obj;
+
+ BucketShard bs;
+
+ RGWObjState *state;
+
+ bool versioning_disabled;
+
+ bool bs_initialized;
+
+ protected:
+ int get_state(RGWObjState **pstate, bool follow_olh, bool assume_noent = false);
+ void invalidate_state();
+
+ int prepare_atomic_modification(librados::ObjectWriteOperation& op, bool reset_obj, const string *ptag,
+ const char *ifmatch, const char *ifnomatch, bool removal_op, bool modify_tail);
+ int complete_atomic_modification();
+
+ public:
+ Object(RGWRados *_store, const RGWBucketInfo& _bucket_info, RGWObjectCtx& _ctx, const rgw_obj& _obj) : store(_store), bucket_info(_bucket_info),
+ ctx(_ctx), obj(_obj), bs(store),
+ state(NULL), versioning_disabled(false),
+ bs_initialized(false) {}
+
+ RGWRados *get_store() { return store; }
+ rgw_obj& get_obj() { return obj; }
+ RGWObjectCtx& get_ctx() { return ctx; }
+ RGWBucketInfo& get_bucket_info() { return bucket_info; }
+ int get_manifest(RGWObjManifest **pmanifest);
+
+ int get_bucket_shard(BucketShard **pbs) {
+ if (!bs_initialized) {
+ int r =
+ bs.init(bucket_info.bucket, obj, nullptr /* no RGWBucketInfo */);
+ if (r < 0) {
+ return r;
+ }
+ bs_initialized = true;
+ }
+ *pbs = &bs;
+ return 0;
+ }
+
+ void set_versioning_disabled(bool status) {
+ versioning_disabled = status;
+ }
+
+ bool versioning_enabled() {
+ return (!versioning_disabled && bucket_info.versioning_enabled());
+ }
+
+ struct Read {
+ RGWRados::Object *source;
+
+ struct GetObjState {
+ map<rgw_pool, librados::IoCtx> io_ctxs;
+ rgw_pool cur_pool;
+ librados::IoCtx *cur_ioctx{nullptr};
+ rgw_obj obj;
+ rgw_raw_obj head_obj;
+ } state;
+
+ struct ConditionParams {
+ const ceph::real_time *mod_ptr;
+ const ceph::real_time *unmod_ptr;
+ bool high_precision_time;
+ uint32_t mod_zone_id;
+ uint64_t mod_pg_ver;
+ const char *if_match;
+ const char *if_nomatch;
+
+ ConditionParams() :
+ mod_ptr(NULL), unmod_ptr(NULL), high_precision_time(false), mod_zone_id(0), mod_pg_ver(0),
+ if_match(NULL), if_nomatch(NULL) {}
+ } conds;
+
+ struct Params {
+ ceph::real_time *lastmod;
+ uint64_t *obj_size;
+ map<string, bufferlist> *attrs;
+ rgw_obj *target_obj;
+
+ Params() : lastmod(nullptr), obj_size(nullptr), attrs(nullptr),
+ target_obj(nullptr) {}
+ } params;
+
+ explicit Read(RGWRados::Object *_source) : source(_source) {}
+
+ int prepare();
+ static int range_to_ofs(uint64_t obj_size, int64_t &ofs, int64_t &end);
+ int read(int64_t ofs, int64_t end, bufferlist& bl);
+ int iterate(int64_t ofs, int64_t end, RGWGetDataCB *cb);
+ int get_attr(const char *name, bufferlist& dest);
+ };
+
+ struct Write {
+ RGWRados::Object *target;
+
+ struct MetaParams {
+ ceph::real_time *mtime;
+ map<std::string, bufferlist>* rmattrs;
+ const bufferlist *data;
+ RGWObjManifest *manifest;
+ const string *ptag;
+ list<rgw_obj_index_key> *remove_objs;
+ ceph::real_time set_mtime;
+ rgw_user owner;
+ RGWObjCategory category;
+ int flags;
+ const char *if_match;
+ const char *if_nomatch;
+ std::optional<uint64_t> olh_epoch;
+ ceph::real_time delete_at;
+ bool canceled;
+ const string *user_data;
+ rgw_zone_set *zones_trace;
+ bool modify_tail;
+ bool completeMultipart;
+ bool appendable;
+
+ MetaParams() : mtime(NULL), rmattrs(NULL), data(NULL), manifest(NULL), ptag(NULL),
+ remove_objs(NULL), category(RGWObjCategory::Main), flags(0),
+ if_match(NULL), if_nomatch(NULL), canceled(false), user_data(nullptr), zones_trace(nullptr),
+ modify_tail(false), completeMultipart(false), appendable(false) {}
+ } meta;
+
+ explicit Write(RGWRados::Object *_target) : target(_target) {}
+
+ int _do_write_meta(uint64_t size, uint64_t accounted_size,
+ map<std::string, bufferlist>& attrs,
+ bool modify_tail, bool assume_noent,
+ void *index_op);
+ int write_meta(uint64_t size, uint64_t accounted_size,
+ map<std::string, bufferlist>& attrs);
+ int write_data(const char *data, uint64_t ofs, uint64_t len, bool exclusive);
+ const req_state* get_req_state() {
+ return (req_state *)target->get_ctx().get_private();
+ }
+ };
+
+ struct Delete {
+ RGWRados::Object *target;
+
+ struct DeleteParams {
+ rgw_user bucket_owner;
+ int versioning_status;
+ ACLOwner obj_owner; /* needed for creation of deletion marker */
+ uint64_t olh_epoch;
+ string marker_version_id;
+ uint32_t bilog_flags;
+ list<rgw_obj_index_key> *remove_objs;
+ ceph::real_time expiration_time;
+ ceph::real_time unmod_since;
+ ceph::real_time mtime; /* for setting delete marker mtime */
+ bool high_precision_time;
+ rgw_zone_set *zones_trace;
+ bool abortmp;
+ uint64_t parts_accounted_size;
+
+ DeleteParams() : versioning_status(0), olh_epoch(0), bilog_flags(0), remove_objs(NULL), high_precision_time(false), zones_trace(nullptr), abortmp(false), parts_accounted_size(0) {}
+ } params;
+
+ struct DeleteResult {
+ bool delete_marker;
+ string version_id;
+
+ DeleteResult() : delete_marker(false) {}
+ } result;
+
+ explicit Delete(RGWRados::Object *_target) : target(_target) {}
+
+ int delete_obj();
+ };
+
+ struct Stat {
+ RGWRados::Object *source;
+
+ struct Result {
+ rgw_obj obj;
+ RGWObjManifest manifest;
+ bool has_manifest;
+ uint64_t size;
+ struct timespec mtime;
+ map<string, bufferlist> attrs;
+
+ Result() : has_manifest(false), size(0) {}
+ } result;
+
+ struct State {
+ librados::IoCtx io_ctx;
+ librados::AioCompletion *completion;
+ int ret;
+
+ State() : completion(NULL), ret(0) {}
+ } state;
+
+
+ explicit Stat(RGWRados::Object *_source) : source(_source) {}
+
+ int stat_async();
+ int wait();
+ int stat();
+ private:
+ int finish();
+ };
+ };
+
+ class Bucket {
+ RGWRados *store;
+ RGWBucketInfo bucket_info;
+ rgw_bucket& bucket;
+ int shard_id;
+
+ public:
+ Bucket(RGWRados *_store, const RGWBucketInfo& _bucket_info) : store(_store), bucket_info(_bucket_info), bucket(bucket_info.bucket),
+ shard_id(RGW_NO_SHARD) {}
+ RGWRados *get_store() { return store; }
+ rgw_bucket& get_bucket() { return bucket; }
+ RGWBucketInfo& get_bucket_info() { return bucket_info; }
+
+ int update_bucket_id(const string& new_bucket_id);
+
+ int get_shard_id() { return shard_id; }
+ void set_shard_id(int id) {
+ shard_id = id;
+ }
+
+ class UpdateIndex {
+ RGWRados::Bucket *target;
+ string optag;
+ rgw_obj obj;
+ uint16_t bilog_flags{0};
+ BucketShard bs;
+ bool bs_initialized{false};
+ bool blind;
+ bool prepared{false};
+ rgw_zone_set *zones_trace{nullptr};
+
+ int init_bs() {
+ int r =
+ bs.init(target->get_bucket(), obj, nullptr /* no RGWBucketInfo */);
+ if (r < 0) {
+ return r;
+ }
+ bs_initialized = true;
+ return 0;
+ }
+
+ void invalidate_bs() {
+ bs_initialized = false;
+ }
+
+ int guard_reshard(BucketShard **pbs, std::function<int(BucketShard *)> call);
+ public:
+
+ UpdateIndex(RGWRados::Bucket *_target, const rgw_obj& _obj) : target(_target), obj(_obj),
+ bs(target->get_store()) {
+ blind = (target->get_bucket_info().index_type == RGWBIType_Indexless);
+ }
+
+ int get_bucket_shard(BucketShard **pbs) {
+ if (!bs_initialized) {
+ int r = init_bs();
+ if (r < 0) {
+ return r;
+ }
+ }
+ *pbs = &bs;
+ return 0;
+ }
+
+ void set_bilog_flags(uint16_t flags) {
+ bilog_flags = flags;
+ }
+
+ void set_zones_trace(rgw_zone_set *_zones_trace) {
+ zones_trace = _zones_trace;
+ }
+
+ int prepare(RGWModifyOp, const string *write_tag);
+ int complete(int64_t poolid, uint64_t epoch, uint64_t size,
+ uint64_t accounted_size, ceph::real_time& ut,
+ const string& etag, const string& content_type,
+ const string& storage_class,
+ bufferlist *acl_bl, RGWObjCategory category,
+ list<rgw_obj_index_key> *remove_objs, const string *user_data = nullptr, bool appendable = false);
+ int complete_del(int64_t poolid, uint64_t epoch,
+ ceph::real_time& removed_mtime, /* mtime of removed object */
+ list<rgw_obj_index_key> *remove_objs);
+ int cancel();
+
+ const string *get_optag() { return &optag; }
+
+ bool is_prepared() { return prepared; }
+ }; // class UpdateIndex
+
+ class List {
+ protected:
+ // absolute maximum number of objects that
+ // list_objects_(un)ordered can return
+ static constexpr int64_t bucket_list_objects_absolute_max = 25000;
+
+ RGWRados::Bucket *target;
+ rgw_obj_key next_marker;
+
+ int list_objects_ordered(int64_t max,
+ vector<rgw_bucket_dir_entry> *result,
+ map<string, bool> *common_prefixes,
+ bool *is_truncated);
+ int list_objects_unordered(int64_t max,
+ vector<rgw_bucket_dir_entry> *result,
+ map<string, bool> *common_prefixes,
+ bool *is_truncated);
+
+ public:
+
+ struct Params {
+ string prefix;
+ string delim;
+ rgw_obj_key marker;
+ rgw_obj_key end_marker;
+ string ns;
+ bool enforce_ns;
+ RGWAccessListFilter *filter;
+ bool list_versions;
+ bool allow_unordered;
+
+ Params() :
+ enforce_ns(true),
+ filter(NULL),
+ list_versions(false),
+ allow_unordered(false)
+ {}
+ } params;
+
+ explicit List(RGWRados::Bucket *_target) : target(_target) {}
+
+ int list_objects(int64_t max,
+ vector<rgw_bucket_dir_entry> *result,
+ map<string, bool> *common_prefixes,
+ bool *is_truncated) {
+ if (params.allow_unordered) {
+ return list_objects_unordered(max, result, common_prefixes,
+ is_truncated);
+ } else {
+ return list_objects_ordered(max, result, common_prefixes,
+ is_truncated);
+ }
+ }
+ rgw_obj_key& get_next_marker() {
+ return next_marker;
+ }
+ }; // class List
+ }; // class Bucket
+
+ int on_last_entry_in_listing(RGWBucketInfo& bucket_info,
+ const std::string& obj_prefix,
+ const std::string& obj_delim,
+ std::function<int(const rgw_bucket_dir_entry&)> handler);
+
+ bool swift_versioning_enabled(const RGWBucketInfo& bucket_info) const {
+ return bucket_info.has_swift_versioning() &&
+ bucket_info.swift_ver_location.size();
+ }
+
+ int swift_versioning_copy(RGWObjectCtx& obj_ctx, /* in/out */
+ const rgw_user& user, /* in */
+ RGWBucketInfo& bucket_info, /* in */
+ rgw_obj& obj); /* in */
+ int swift_versioning_restore(RGWSysObjectCtx& sysobj_ctx,
+ RGWObjectCtx& obj_ctx, /* in/out */
+ const rgw_user& user, /* in */
+ RGWBucketInfo& bucket_info, /* in */
+ rgw_obj& obj, /* in */
+ bool& restored); /* out */
+ int copy_obj_to_remote_dest(RGWObjState *astate,
+ map<string, bufferlist>& src_attrs,
+ RGWRados::Object::Read& read_op,
+ const rgw_user& user_id,
+ rgw_obj& dest_obj,
+ ceph::real_time *mtime);
+
+ enum AttrsMod {
+ ATTRSMOD_NONE = 0,
+ ATTRSMOD_REPLACE = 1,
+ ATTRSMOD_MERGE = 2
+ };
+
+ int rewrite_obj(RGWBucketInfo& dest_bucket_info, const rgw_obj& obj);
+
+ int stat_remote_obj(RGWObjectCtx& obj_ctx,
+ const rgw_user& user_id,
+ req_info *info,
+ const string& source_zone,
+ rgw_obj& src_obj,
+ RGWBucketInfo& src_bucket_info,
+ real_time *src_mtime,
+ uint64_t *psize,
+ const real_time *mod_ptr,
+ const real_time *unmod_ptr,
+ bool high_precision_time,
+ const char *if_match,
+ const char *if_nomatch,
+ map<string, bufferlist> *pattrs,
+ map<string, string> *pheaders,
+ string *version_id,
+ string *ptag,
+ string *petag);
+
+ int fetch_remote_obj(RGWObjectCtx& obj_ctx,
+ const rgw_user& user_id,
+ req_info *info,
+ const string& source_zone,
+ const rgw_obj& dest_obj,
+ const rgw_obj& src_obj,
+ RGWBucketInfo& dest_bucket_info,
+ RGWBucketInfo& src_bucket_info,
+ std::optional<rgw_placement_rule> dest_placement,
+ ceph::real_time *src_mtime,
+ ceph::real_time *mtime,
+ const ceph::real_time *mod_ptr,
+ const ceph::real_time *unmod_ptr,
+ bool high_precision_time,
+ const char *if_match,
+ const char *if_nomatch,
+ AttrsMod attrs_mod,
+ bool copy_if_newer,
+ map<string, bufferlist>& attrs,
+ RGWObjCategory category,
+ std::optional<uint64_t> olh_epoch,
+ ceph::real_time delete_at,
+ string *ptag,
+ string *petag,
+ void (*progress_cb)(off_t, void *),
+ void *progress_data,
+ rgw_zone_set *zones_trace= nullptr,
+ std::optional<uint64_t>* bytes_transferred = 0);
+ /**
+ * Copy an object.
+ * dest_obj: the object to copy into
+ * src_obj: the object to copy from
+ * attrs: usage depends on attrs_mod parameter
+ * attrs_mod: the modification mode of the attrs, may have the following values:
+ * ATTRSMOD_NONE - the attributes of the source object will be
+ * copied without modifications, attrs parameter is ignored;
+ * ATTRSMOD_REPLACE - new object will have the attributes provided by attrs
+ * parameter, source object attributes are not copied;
+ * ATTRSMOD_MERGE - any conflicting meta keys on the source object's attributes
+ * are overwritten by values contained in attrs parameter.
+ * Returns: 0 on success, -ERR# otherwise.
+ */
+ int copy_obj(RGWObjectCtx& obj_ctx,
+ const rgw_user& user_id,
+ req_info *info,
+ const string& source_zone,
+ rgw_obj& dest_obj,
+ rgw_obj& src_obj,
+ RGWBucketInfo& dest_bucket_info,
+ RGWBucketInfo& src_bucket_info,
+ const rgw_placement_rule& dest_placement,
+ ceph::real_time *src_mtime,
+ ceph::real_time *mtime,
+ const ceph::real_time *mod_ptr,
+ const ceph::real_time *unmod_ptr,
+ bool high_precision_time,
+ const char *if_match,
+ const char *if_nomatch,
+ AttrsMod attrs_mod,
+ bool copy_if_newer,
+ map<std::string, bufferlist>& attrs,
+ RGWObjCategory category,
+ uint64_t olh_epoch,
+ ceph::real_time delete_at,
+ string *version_id,
+ string *ptag,
+ string *petag,
+ void (*progress_cb)(off_t, void *),
+ void *progress_data);
+
+ int copy_obj_data(RGWObjectCtx& obj_ctx,
+ RGWBucketInfo& dest_bucket_info,
+ const rgw_placement_rule& dest_placement,
+ RGWRados::Object::Read& read_op, off_t end,
+ const rgw_obj& dest_obj,
+ ceph::real_time *mtime,
+ ceph::real_time set_mtime,
+ map<string, bufferlist>& attrs,
+ uint64_t olh_epoch,
+ ceph::real_time delete_at,
+ string *petag);
+
+ int transition_obj(RGWObjectCtx& obj_ctx,
+ RGWBucketInfo& bucket_info,
+ rgw_obj& obj,
+ const rgw_placement_rule& placement_rule,
+ const real_time& mtime,
+ uint64_t olh_epoch);
+
+ int check_bucket_empty(RGWBucketInfo& bucket_info);
+
+ /**
+ * Delete a bucket.
+ * bucket: the name of the bucket to delete
+ * Returns 0 on success, -ERR# otherwise.
+ */
+ int delete_bucket(RGWBucketInfo& bucket_info, RGWObjVersionTracker& objv_tracker, bool check_empty = true);
+
+ void wakeup_meta_sync_shards(set<int>& shard_ids);
+ void wakeup_data_sync_shards(const string& source_zone, map<int, set<string> >& shard_ids);
+
+ RGWMetaSyncStatusManager* get_meta_sync_manager();
+ RGWDataSyncStatusManager* get_data_sync_manager(const std::string& source_zone);
+
+ int set_bucket_owner(rgw_bucket& bucket, ACLOwner& owner);
+ int set_buckets_enabled(std::vector<rgw_bucket>& buckets, bool enabled);
+ int bucket_suspended(rgw_bucket& bucket, bool *suspended);
+
+ /** Delete an object.*/
+ int delete_obj(RGWObjectCtx& obj_ctx,
+ const RGWBucketInfo& bucket_owner,
+ const rgw_obj& src_obj,
+ int versioning_status,
+ uint16_t bilog_flags = 0,
+ const ceph::real_time& expiration_time = ceph::real_time(),
+ rgw_zone_set *zones_trace = nullptr);
+
+ int delete_raw_obj(const rgw_raw_obj& obj);
+
+ /** Remove an object from the bucket index */
+ int delete_obj_index(const rgw_obj& obj, ceph::real_time mtime);
+
+ /**
+ * Set an attr on an object.
+ * bucket: name of the bucket holding the object
+ * obj: name of the object to set the attr on
+ * name: the attr to set
+ * bl: the contents of the attr
+ * Returns: 0 on success, -ERR# otherwise.
+ */
+ int set_attr(void *ctx, const RGWBucketInfo& bucket_info, rgw_obj& obj, const char *name, bufferlist& bl);
+
+ int set_attrs(void *ctx, const RGWBucketInfo& bucket_info, rgw_obj& obj,
+ map<string, bufferlist>& attrs,
+ map<string, bufferlist>* rmattrs);
+
+ int get_obj_state(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state,
+ bool follow_olh, bool assume_noent = false);
+ int get_obj_state(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state) {
+ return get_obj_state(rctx, bucket_info, obj, state, true);
+ }
+
+ using iterate_obj_cb = int (*)(const rgw_raw_obj&, off_t, off_t,
+ off_t, bool, RGWObjState*, void*);
+
+ int iterate_obj(RGWObjectCtx& ctx, const RGWBucketInfo& bucket_info,
+ const rgw_obj& obj, off_t ofs, off_t end,
+ uint64_t max_chunk_size, iterate_obj_cb cb, void *arg);
+
+ int flush_read_list(struct get_obj_data *d);
+
+ int get_obj_iterate_cb(const rgw_raw_obj& read_obj, off_t obj_ofs,
+ off_t read_ofs, off_t len, bool is_head_obj,
+ RGWObjState *astate, void *arg);
+
+ void get_obj_aio_completion_cb(librados::completion_t cb, void *arg);
+
+ /**
+ * a simple object read without keeping state
+ */
+
+ int raw_obj_stat(rgw_raw_obj& obj, uint64_t *psize, ceph::real_time *pmtime, uint64_t *epoch,
+ map<string, bufferlist> *attrs, bufferlist *first_chunk,
+ RGWObjVersionTracker *objv_tracker);
+
+ int obj_operate(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::ObjectWriteOperation *op);
+ int obj_operate(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::ObjectReadOperation *op);
+
+ int guard_reshard(BucketShard *bs,
+ const rgw_obj& obj_instance,
+ const RGWBucketInfo& bucket_info,
+ std::function<int(BucketShard *)> call);
+ int block_while_resharding(RGWRados::BucketShard *bs,
+ string *new_bucket_id,
+ const RGWBucketInfo& bucket_info,
+ optional_yield y);
+
+ void bucket_index_guard_olh_op(RGWObjState& olh_state, librados::ObjectOperation& op);
+ int olh_init_modification(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, string *op_tag);
+ int olh_init_modification_impl(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, string *op_tag);
+ int bucket_index_link_olh(const RGWBucketInfo& bucket_info, RGWObjState& olh_state,
+ const rgw_obj& obj_instance, bool delete_marker,
+ const string& op_tag, struct rgw_bucket_dir_entry_meta *meta,
+ uint64_t olh_epoch,
+ ceph::real_time unmod_since, bool high_precision_time,
+ rgw_zone_set *zones_trace = nullptr,
+ bool log_data_change = false);
+ int bucket_index_unlink_instance(const RGWBucketInfo& bucket_info, const rgw_obj& obj_instance, const string& op_tag, const string& olh_tag, uint64_t olh_epoch, rgw_zone_set *zones_trace = nullptr);
+ int bucket_index_read_olh_log(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& obj_instance, uint64_t ver_marker,
+ map<uint64_t, vector<rgw_bucket_olh_log_entry> > *log, bool *is_truncated);
+ int bucket_index_trim_olh_log(const RGWBucketInfo& bucket_info, RGWObjState& obj_state, const rgw_obj& obj_instance, uint64_t ver);
+ int bucket_index_clear_olh(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& obj_instance);
+ int apply_olh_log(RGWObjectCtx& ctx, RGWObjState& obj_state, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
+ bufferlist& obj_tag, map<uint64_t, vector<rgw_bucket_olh_log_entry> >& log,
+ uint64_t *plast_ver, rgw_zone_set *zones_trace = nullptr);
+ int update_olh(RGWObjectCtx& obj_ctx, RGWObjState *state, const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_zone_set *zones_trace = nullptr);
+ int set_olh(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, const rgw_obj& target_obj, bool delete_marker, rgw_bucket_dir_entry_meta *meta,
+ uint64_t olh_epoch, ceph::real_time unmod_since, bool high_precision_time,
+ rgw_zone_set *zones_trace = nullptr, bool log_data_change = false);
+ int repair_olh(RGWObjState* state, const RGWBucketInfo& bucket_info,
+ const rgw_obj& obj);
+ int unlink_obj_instance(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, const rgw_obj& target_obj,
+ uint64_t olh_epoch, rgw_zone_set *zones_trace = nullptr);
+
+ void check_pending_olh_entries(map<string, bufferlist>& pending_entries, map<string, bufferlist> *rm_pending_entries);
+ int remove_olh_pending_entries(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, map<string, bufferlist>& pending_attrs);
+ int follow_olh(const RGWBucketInfo& bucket_info, RGWObjectCtx& ctx, RGWObjState *state, const rgw_obj& olh_obj, rgw_obj *target);
+ int get_olh(const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWOLHInfo *olh);
+
+ void gen_rand_obj_instance_name(rgw_obj_key *target_key);
+ void gen_rand_obj_instance_name(rgw_obj *target);
+
+ int update_containers_stats(map<string, RGWBucketEnt>& m);
+ int append_async(rgw_raw_obj& obj, size_t size, bufferlist& bl);
+
+public:
+ void set_atomic(void *ctx, rgw_obj& obj) {
+ RGWObjectCtx *rctx = static_cast<RGWObjectCtx *>(ctx);
+ rctx->set_atomic(obj);
+ }
+ void set_prefetch_data(void *ctx, const rgw_obj& obj) {
+ RGWObjectCtx *rctx = static_cast<RGWObjectCtx *>(ctx);
+ rctx->set_prefetch_data(obj);
+ }
+ int decode_policy(bufferlist& bl, ACLOwner *owner);
+ int get_bucket_stats(RGWBucketInfo& bucket_info, int shard_id, string *bucket_ver, string *master_ver,
+ map<RGWObjCategory, RGWStorageStats>& stats, string *max_marker, bool* syncstopped = NULL);
+ int get_bucket_stats_async(RGWBucketInfo& bucket_info, int shard_id, RGWGetBucketStats_CB *cb);
+ int get_user_stats(const rgw_user& user, RGWStorageStats& stats);
+ int get_user_stats_async(const rgw_user& user, RGWGetUserStats_CB *cb);
+ void get_bucket_instance_obj(const rgw_bucket& bucket, rgw_raw_obj& obj);
+ void get_bucket_meta_oid(const rgw_bucket& bucket, string& oid);
+
+ int put_bucket_entrypoint_info(const string& tenant_name, const string& bucket_name, RGWBucketEntryPoint& entry_point,
+ bool exclusive, RGWObjVersionTracker& objv_tracker, ceph::real_time mtime,
+ map<string, bufferlist> *pattrs);
+ int put_bucket_instance_info(RGWBucketInfo& info, bool exclusive, ceph::real_time mtime, map<string, bufferlist> *pattrs);
+ int get_bucket_entrypoint_info(RGWSysObjectCtx& obj_ctx, const string& tenant_name, const string& bucket_name,
+ RGWBucketEntryPoint& entry_point, RGWObjVersionTracker *objv_tracker,
+ ceph::real_time *pmtime, map<string, bufferlist> *pattrs, rgw_cache_entry_info *cache_info = NULL,
+ boost::optional<obj_version> refresh_version = boost::none);
+ int get_bucket_instance_info(RGWSysObjectCtx& obj_ctx, const string& meta_key, RGWBucketInfo& info, ceph::real_time *pmtime, map<string, bufferlist> *pattrs);
+ int get_bucket_instance_info(RGWSysObjectCtx& obj_ctx, const rgw_bucket& bucket, RGWBucketInfo& info, ceph::real_time *pmtime, map<string, bufferlist> *pattrs);
+ int get_bucket_instance_from_oid(RGWSysObjectCtx& obj_ctx, const string& oid, RGWBucketInfo& info, ceph::real_time *pmtime, map<string, bufferlist> *pattrs,
+ rgw_cache_entry_info *cache_info = NULL,
+ boost::optional<obj_version> refresh_version = boost::none);
+
+ int convert_old_bucket_info(RGWSysObjectCtx& obj_ctx, const string& tenant_name, const string& bucket_name);
+ static void make_bucket_entry_name(const string& tenant_name, const string& bucket_name, string& bucket_entry);
+
+
+private:
+ int _get_bucket_info(RGWSysObjectCtx& obj_ctx, const string& tenant,
+ const string& bucket_name, RGWBucketInfo& info,
+ real_time *pmtime,
+ map<string, bufferlist> *pattrs,
+ boost::optional<obj_version> refresh_version);
+public:
+
+ bool call(std::string_view command, const cmdmap_t& cmdmap,
+ std::string_view format,
+ bufferlist& out) override final;
+
+protected:
+ // `call_list` must iterate over all cache entries and call
+ // `cache_list_dump_helper` with the supplied Formatter on any that
+ // include `filter` as a substring.
+ //
+ void call_list(const std::optional<std::string>& filter,
+ Formatter* format);
+ // `call_inspect` must look up the requested target and, if found,
+ // dump it to the supplied Formatter and return true. If not found,
+ // it must return false.
+ //
+ bool call_inspect(const std::string& target, Formatter* format);
+
+ // `call_erase` must erase the requested target and return true. If
+ // the requested target does not exist, it should return false.
+ bool call_erase(const std::string& target);
+
+ // `call_zap` must erase the cache.
+ void call_zap();
+public:
+
+ int get_bucket_info(RGWSysObjectCtx& obj_ctx,
+ const string& tenant_name, const string& bucket_name,
+ RGWBucketInfo& info,
+ ceph::real_time *pmtime, map<string, bufferlist> *pattrs = NULL);
+
+ // Returns 0 on successful refresh. Returns error code if there was
+ // an error or the version stored on the OSD is the same as that
+ // presented in the BucketInfo structure.
+ //
+ int try_refresh_bucket_info(RGWBucketInfo& info,
+ ceph::real_time *pmtime,
+ map<string, bufferlist> *pattrs = nullptr);
+
+ int put_linked_bucket_info(RGWBucketInfo& info, bool exclusive, ceph::real_time mtime, obj_version *pep_objv,
+ map<string, bufferlist> *pattrs, bool create_entry_point);
+
+ int cls_obj_prepare_op(BucketShard& bs, RGWModifyOp op, string& tag, rgw_obj& obj, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
+ int cls_obj_complete_op(BucketShard& bs, const rgw_obj& obj, RGWModifyOp op, string& tag, int64_t pool, uint64_t epoch,
+ rgw_bucket_dir_entry& ent, RGWObjCategory category, list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
+ int cls_obj_complete_add(BucketShard& bs, const rgw_obj& obj, string& tag, int64_t pool, uint64_t epoch, rgw_bucket_dir_entry& ent,
+ RGWObjCategory category, list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
+ int cls_obj_complete_del(BucketShard& bs, string& tag, int64_t pool, uint64_t epoch, rgw_obj& obj,
+ ceph::real_time& removed_mtime, list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
+ int cls_obj_complete_cancel(BucketShard& bs, string& tag, rgw_obj& obj, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
+ int cls_obj_set_bucket_tag_timeout(RGWBucketInfo& bucket_info, uint64_t timeout);
+ int cls_bucket_list_ordered(RGWBucketInfo& bucket_info,
+ const int shard_id,
+ const rgw_obj_index_key& start_after,
+ const string& prefix,
+ const uint32_t num_entries,
+ const bool list_versions,
+ const uint16_t exp_factor, // 0 means ignore
+ map<string, rgw_bucket_dir_entry>& m,
+ bool *is_truncated,
+ rgw_obj_index_key *last_entry,
+ bool (*force_check_filter)(const string& name) = nullptr);
+ int cls_bucket_list_unordered(RGWBucketInfo& bucket_info, int shard_id,
+ const rgw_obj_index_key& start,
+ const string& prefix,
+ uint32_t num_entries, bool list_versions,
+ vector<rgw_bucket_dir_entry>& ent_list,
+ bool *is_truncated, rgw_obj_index_key *last_entry,
+ bool (*force_check_filter)(const string& name) = nullptr);
+ int cls_bucket_head(const RGWBucketInfo& bucket_info, int shard_id, vector<rgw_bucket_dir_header>& headers, map<int, string> *bucket_instance_ids = NULL);
+ int cls_bucket_head_async(const RGWBucketInfo& bucket_info, int shard_id, RGWGetDirHeader_CB *ctx, int *num_aio);
+ int list_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id, string& marker, uint32_t max, std::list<rgw_bi_log_entry>& result, bool *truncated);
+ int trim_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id, string& marker, string& end_marker);
+ int resync_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id);
+ int stop_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id);
+ int get_bi_log_status(RGWBucketInfo& bucket_info, int shard_id, map<int, string>& max_marker);
+
+ int bi_get_instance(const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_bucket_dir_entry *dirent);
+ int bi_get_olh(const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_bucket_olh_entry *olh);
+ int bi_get(const RGWBucketInfo& bucket_info, const rgw_obj& obj, BIIndexType index_type, rgw_cls_bi_entry *entry);
+ void bi_put(librados::ObjectWriteOperation& op, BucketShard& bs, rgw_cls_bi_entry& entry);
+ int bi_put(BucketShard& bs, rgw_cls_bi_entry& entry);
+ int bi_put(rgw_bucket& bucket, rgw_obj& obj, rgw_cls_bi_entry& entry);
+ int bi_list(rgw_bucket& bucket, int shard_id, const string& filter_obj, const string& marker, uint32_t max, list<rgw_cls_bi_entry> *entries, bool *is_truncated);
+ int bi_list(BucketShard& bs, const string& filter_obj, const string& marker, uint32_t max, list<rgw_cls_bi_entry> *entries, bool *is_truncated);
+ int bi_list(rgw_bucket& bucket, const string& obj_name, const string& marker, uint32_t max,
+ list<rgw_cls_bi_entry> *entries, bool *is_truncated);
+ int bi_remove(BucketShard& bs);
+
+ int cls_obj_usage_log_add(const string& oid, rgw_usage_log_info& info);
+ int cls_obj_usage_log_read(const string& oid, const string& user, const string& bucket, uint64_t start_epoch,
+ uint64_t end_epoch, uint32_t max_entries, string& read_iter, map<rgw_user_bucket,
+ rgw_usage_log_entry>& usage, bool *is_truncated);
+ int cls_obj_usage_log_trim(const string& oid, const string& user, const string& bucket, uint64_t start_epoch,
+ uint64_t end_epoch);
+ int cls_obj_usage_log_clear(string& oid);
+
+ int key_to_shard_id(const string& key, int max_shards);
+ void shard_name(const string& prefix, unsigned max_shards, const string& key, string& name, int *shard_id);
+ void shard_name(const string& prefix, unsigned max_shards, const string& section, const string& key, string& name);
+ void shard_name(const string& prefix, unsigned shard_id, string& name);
+ int get_target_shard_id(const RGWBucketInfo& bucket_info, const string& obj_key, int *shard_id);
+ void time_log_prepare_entry(cls_log_entry& entry, const ceph::real_time& ut, const string& section, const string& key, bufferlist& bl);
+ int time_log_add_init(librados::IoCtx& io_ctx);
+ int time_log_add(const string& oid, list<cls_log_entry>& entries,
+ librados::AioCompletion *completion, bool monotonic_inc = true);
+ int time_log_add(const string& oid, const ceph::real_time& ut, const string& section, const string& key, bufferlist& bl);
+ int time_log_list(const string& oid, const ceph::real_time& start_time, const ceph::real_time& end_time,
+ int max_entries, list<cls_log_entry>& entries,
+ const string& marker, string *out_marker, bool *truncated);
+ int time_log_info(const string& oid, cls_log_header *header);
+ int time_log_info_async(librados::IoCtx& io_ctx, const string& oid, cls_log_header *header, librados::AioCompletion *completion);
+ int time_log_trim(const string& oid, const ceph::real_time& start_time, const ceph::real_time& end_time,
+ const string& from_marker, const string& to_marker,
+ librados::AioCompletion *completion = nullptr);
+
+ string objexp_hint_get_shardname(int shard_num);
+ int objexp_key_shard(const rgw_obj_index_key& key);
+ void objexp_get_shard(int shard_num,
+ string& shard); /* out */
+ int objexp_hint_add(const ceph::real_time& delete_at,
+ const string& tenant_name,
+ const string& bucket_name,
+ const string& bucket_id,
+ const rgw_obj_index_key& obj_key);
+ int objexp_hint_list(const string& oid,
+ const ceph::real_time& start_time,
+ const ceph::real_time& end_time,
+ const int max_entries,
+ const string& marker,
+ list<cls_timeindex_entry>& entries, /* out */
+ string *out_marker, /* out */
+ bool *truncated); /* out */
+ int objexp_hint_parse(cls_timeindex_entry &ti_entry,
+ objexp_hint_entry& hint_entry); /* out */
+ int objexp_hint_trim(const string& oid,
+ const ceph::real_time& start_time,
+ const ceph::real_time& end_time,
+ const string& from_marker = std::string(),
+ const string& to_marker = std::string());
+
+ int lock_exclusive(const rgw_pool& pool, const string& oid, ceph::timespan& duration, string& zone_id, string& owner_id);
+ int unlock(const rgw_pool& pool, const string& oid, string& zone_id, string& owner_id);
+
+ void update_gc_chain(rgw_obj& head_obj, RGWObjManifest& manifest, cls_rgw_obj_chain *chain);
+ int send_chain_to_gc(cls_rgw_obj_chain& chain, const string& tag, bool sync);
+ int gc_operate(string& oid, librados::ObjectWriteOperation *op);
+ int gc_aio_operate(string& oid, librados::ObjectWriteOperation *op, librados::AioCompletion **pc = nullptr);
+ int gc_operate(string& oid, librados::ObjectReadOperation *op, bufferlist *pbl);
+
+ int list_gc_objs(int *index, string& marker, uint32_t max, bool expired_only, std::list<cls_rgw_gc_obj_info>& result, bool *truncated);
+ int process_gc(bool expired_only);
+ bool process_expire_objects();
+ int defer_gc(void *ctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj);
+
+ int process_lc();
+ int list_lc_progress(const string& marker, uint32_t max_entries, map<string, int> *progress_map);
+
+ int bucket_check_index(RGWBucketInfo& bucket_info,
+ map<RGWObjCategory, RGWStorageStats> *existing_stats,
+ map<RGWObjCategory, RGWStorageStats> *calculated_stats);
+ int bucket_rebuild_index(RGWBucketInfo& bucket_info);
+ int bucket_set_reshard(const RGWBucketInfo& bucket_info, const cls_rgw_bucket_instance_entry& entry);
+ int remove_objs_from_index(RGWBucketInfo& bucket_info, list<rgw_obj_index_key>& oid_list);
+ int move_rados_obj(librados::IoCtx& src_ioctx,
+ const string& src_oid, const string& src_locator,
+ librados::IoCtx& dst_ioctx,
+ const string& dst_oid, const string& dst_locator);
+ int fix_head_obj_locator(const RGWBucketInfo& bucket_info, bool copy_obj, bool remove_bad, rgw_obj_key& key);
+ int fix_tail_obj_locator(const RGWBucketInfo& bucket_info, rgw_obj_key& key, bool fix, bool *need_fix);
+
+ int cls_user_get_header(const string& user_id, cls_user_header *header);
+ int cls_user_reset_stats(const string& user_id);
+ int cls_user_get_header_async(const string& user_id, RGWGetUserHeader_CB *ctx);
+ int cls_user_sync_bucket_stats(rgw_raw_obj& user_obj, const RGWBucketInfo& bucket_info);
+ int cls_user_list_buckets(rgw_raw_obj& obj,
+ const string& in_marker,
+ const string& end_marker,
+ int max_entries,
+ list<cls_user_bucket_entry>& entries,
+ string *out_marker,
+ bool *truncated);
+ int cls_user_add_bucket(rgw_raw_obj& obj, const cls_user_bucket_entry& entry);
+ int cls_user_update_buckets(rgw_raw_obj& obj, list<cls_user_bucket_entry>& entries, bool add);
+ int cls_user_complete_stats_sync(rgw_raw_obj& obj);
+ int complete_sync_user_stats(const rgw_user& user_id);
+ int cls_user_remove_bucket(rgw_raw_obj& obj, const cls_user_bucket& bucket);
+ int cls_user_get_bucket_stats(const rgw_bucket& bucket, cls_user_bucket_entry& entry);
+
+ int check_quota(const rgw_user& bucket_owner, rgw_bucket& bucket,
+ RGWQuotaInfo& user_quota, RGWQuotaInfo& bucket_quota, uint64_t obj_size, bool check_size_only = false);
+
+ int check_bucket_shards(const RGWBucketInfo& bucket_info, const rgw_bucket& bucket,
+ RGWQuotaInfo& bucket_quota);
+
+ int add_bucket_to_reshard(const RGWBucketInfo& bucket_info, uint32_t new_num_shards);
+
+ uint64_t instance_id();
+
+ librados::Rados* get_rados_handle();
+
+ int delete_raw_obj_aio(const rgw_raw_obj& obj, list<librados::AioCompletion *>& handles);
+ int delete_obj_aio(const rgw_obj& obj, RGWBucketInfo& info, RGWObjState *astate,
+ list<librados::AioCompletion *>& handles, bool keep_index_consistent);
+
+ /* mfa/totp stuff */
+ private:
+ void prepare_mfa_write(librados::ObjectWriteOperation *op,
+ RGWObjVersionTracker *objv_tracker,
+ const ceph::real_time& mtime);
+ public:
+ string get_mfa_oid(const rgw_user& user);
+ int get_mfa_ref(const rgw_user& user, rgw_rados_ref *ref);
+ int check_mfa(const rgw_user& user, const string& otp_id, const string& pin);
+ int create_mfa(const rgw_user& user, const rados::cls::otp::otp_info_t& config,
+ RGWObjVersionTracker *objv_tracker, const ceph::real_time& mtime);
+ int remove_mfa(const rgw_user& user, const string& id,
+ RGWObjVersionTracker *objv_tracker, const ceph::real_time& mtime);
+ int get_mfa(const rgw_user& user, const string& id, rados::cls::otp::otp_info_t *result);
+ int list_mfa(const rgw_user& user, list<rados::cls::otp::otp_info_t> *result);
+ int otp_get_current_time(const rgw_user& user, ceph::real_time *result);
+
+ /* mfa interfaces used by metadata engine */
+ int set_mfa(const string& oid, const list<rados::cls::otp::otp_info_t>& entries, bool reset_obj,
+ RGWObjVersionTracker *objv_tracker, const ceph::real_time& mtime);
+ int list_mfa(const string& oid, list<rados::cls::otp::otp_info_t> *result,
+ RGWObjVersionTracker *objv_tracker, ceph::real_time *pmtime);
+ private:
+ /**
+ * This is a helper method, it generates a list of bucket index objects with the given
+ * bucket base oid and number of shards.
+ *
+ * bucket_oid_base [in] - base name of the bucket index object;
+ * num_shards [in] - number of bucket index object shards.
+ * bucket_objs [out] - filled by this method, a list of bucket index objects.
+ */
+ void get_bucket_index_objects(const string& bucket_oid_base, uint32_t num_shards,
+ map<int, string>& bucket_objs, int shard_id = -1);
+
+ /**
+ * Get the bucket index object with the given base bucket index object and object key,
+ * and the number of bucket index shards.
+ *
+ * bucket_oid_base [in] - bucket object base name.
+ * obj_key [in] - object key.
+ * num_shards [in] - number of bucket index shards.
+ * hash_type [in] - type of hash to find the shard ID.
+ * bucket_obj [out] - the bucket index object for the given object.
+ *
+ * Return 0 on success, a failure code otherwise.
+ */
+ int get_bucket_index_object(const string& bucket_oid_base, const string& obj_key,
+ uint32_t num_shards, RGWBucketInfo::BIShardsHashType hash_type, string *bucket_obj, int *shard);
+
+ void get_bucket_index_object(const string& bucket_oid_base, uint32_t num_shards,
+ int shard_id, string *bucket_obj);
+
+ /**
+ * Check the actual on-disk state of the object specified
+ * by list_state, and fill in the time and size of object.
+ * Then append any changes to suggested_updates for
+ * the rgw class' dir_suggest_changes function.
+ *
+ * Note that this can maul list_state; don't use it afterwards. Also
+ * it expects object to already be filled in from list_state; it only
+ * sets the size and mtime.
+ *
+ * Returns 0 on success, -ENOENT if the object doesn't exist on disk,
+ * and -errno on other failures. (-ENOENT is not a failure, and it
+ * will encode that info as a suggested update.)
+ */
+ int check_disk_state(librados::IoCtx io_ctx,
+ const RGWBucketInfo& bucket_info,
+ rgw_bucket_dir_entry& list_state,
+ rgw_bucket_dir_entry& object,
+ bufferlist& suggested_updates);
+
+ /**
+ * Init pool iteration
+ * pool: pool to use for the ctx initialization
+ * ctx: context object to use for the iteration
+ * Returns: 0 on success, -ERR# otherwise.
+ */
+ int pool_iterate_begin(const rgw_pool& pool, RGWPoolIterCtx& ctx);
+
+ /**
+ * Init pool iteration
+ * pool: pool to use
+ * cursor: position to start iteration
+ * ctx: context object to use for the iteration
+ * Returns: 0 on success, -ERR# otherwise.
+ */
+ int pool_iterate_begin(const rgw_pool& pool, const string& cursor, RGWPoolIterCtx& ctx);
+
+ /**
+ * Get pool iteration position
+ * ctx: context object to use for the iteration
+ * Returns: string representation of position
+ */
+ string pool_iterate_get_cursor(RGWPoolIterCtx& ctx);
+
+ /**
+ * Iterate over pool return object names, use optional filter
+ * ctx: iteration context, initialized with pool_iterate_begin()
+ * num: max number of objects to return
+ * objs: a vector that the results will append into
+ * is_truncated: if not NULL, will hold true iff iteration is complete
+ * filter: if not NULL, will be used to filter returned objects
+ * Returns: 0 on success, -ERR# otherwise.
+ */
+ int pool_iterate(RGWPoolIterCtx& ctx, uint32_t num, vector<rgw_bucket_dir_entry>& objs,
+ bool *is_truncated, RGWAccessListFilter *filter);
+
+ uint64_t next_bucket_id();
+
+ /**
+ * This is broken out to facilitate unit testing.
+ */
+ static uint32_t calc_ordered_bucket_list_per_shard(uint32_t num_entries,
+ uint32_t num_shards);
+};
+
+class RGWStoreManager {
+public:
+ RGWStoreManager() {}
+ static RGWRados *get_storage(CephContext *cct, bool use_gc_thread, bool use_lc_thread, bool quota_threads,
+ bool run_sync_thread, bool run_reshard_thread, bool use_cache = true) {
+ RGWRados *store = init_storage_provider(cct, use_gc_thread, use_lc_thread, quota_threads, run_sync_thread,
+ run_reshard_thread, use_cache);
+ return store;
+ }
+ static RGWRados *get_raw_storage(CephContext *cct) {
+ RGWRados *store = init_raw_storage_provider(cct);
+ return store;
+ }
+ static RGWRados *init_storage_provider(CephContext *cct, bool use_gc_thread, bool use_lc_thread, bool quota_threads, bool run_sync_thread, bool run_reshard_thread, bool use_metadata_cache);
+ static RGWRados *init_raw_storage_provider(CephContext *cct);
+ static void close_storage(RGWRados *store);
+
+};
+
+class RGWMPObj {
+ string oid;
+ string prefix;
+ string meta;
+ string upload_id;
+public:
+ RGWMPObj() {}
+ RGWMPObj(const string& _oid, const string& _upload_id) {
+ init(_oid, _upload_id, _upload_id);
+ }
+ void init(const string& _oid, const string& _upload_id) {
+ init(_oid, _upload_id, _upload_id);
+ }
+ void init(const string& _oid, const string& _upload_id, const string& part_unique_str) {
+ if (_oid.empty()) {
+ clear();
+ return;
+ }
+ oid = _oid;
+ upload_id = _upload_id;
+ prefix = oid + ".";
+ meta = prefix + upload_id + MP_META_SUFFIX;
+ prefix.append(part_unique_str);
+ }
+ const string& get_meta() const { return meta; }
+ string get_part(int num) const {
+ char buf[16];
+ snprintf(buf, 16, ".%d", num);
+ string s = prefix;
+ s.append(buf);
+ return s;
+ }
+ string get_part(const string& part) const {
+ string s = prefix;
+ s.append(".");
+ s.append(part);
+ return s;
+ }
+ const string& get_upload_id() const {
+ return upload_id;
+ }
+ const string& get_key() const {
+ return oid;
+ }
+ bool from_meta(const string& meta) {
+ int end_pos = meta.rfind('.'); // search for ".meta"
+ if (end_pos < 0)
+ return false;
+ int mid_pos = meta.rfind('.', end_pos - 1); // <key>.<upload_id>
+ if (mid_pos < 0)
+ return false;
+ oid = meta.substr(0, mid_pos);
+ upload_id = meta.substr(mid_pos + 1, end_pos - mid_pos - 1);
+ init(oid, upload_id, upload_id);
+ return true;
+ }
+ void clear() {
+ oid = "";
+ prefix = "";
+ meta = "";
+ upload_id = "";
+ }
+ friend std::ostream& operator<<(std::ostream& out, const RGWMPObj& obj) {
+ return out << "RGWMPObj:{ prefix=" << std::quoted(obj.prefix) <<
+ ", meta=" << std::quoted(obj.meta) << " }";
+ }
+}; // class RGWMPObj
+
+
+class RGWRadosThread {
+ class Worker : public Thread {
+ CephContext *cct;
+ RGWRadosThread *processor;
+ Mutex lock;
+ Cond cond;
+
+ void wait() {
+ Mutex::Locker l(lock);
+ cond.Wait(lock);
+ };
+
+ void wait_interval(const utime_t& wait_time) {
+ Mutex::Locker l(lock);
+ cond.WaitInterval(lock, wait_time);
+ }
+
+ public:
+ Worker(CephContext *_cct, RGWRadosThread *_p) : cct(_cct), processor(_p), lock("RGWRadosThread::Worker") {}
+ void *entry() override;
+ void signal() {
+ Mutex::Locker l(lock);
+ cond.Signal();
+ }
+ };
+
+ Worker *worker;
+
+protected:
+ CephContext *cct;
+ RGWRados *store;
+
+ std::atomic<bool> down_flag = { false };
+
+ string thread_name;
+
+ virtual uint64_t interval_msec() = 0;
+ virtual void stop_process() {}
+public:
+ RGWRadosThread(RGWRados *_store, const string& thread_name = "radosgw")
+ : worker(NULL), cct(_store->ctx()), store(_store), thread_name(thread_name) {}
+ virtual ~RGWRadosThread() {
+ stop();
+ }
+
+ virtual int init() { return 0; }
+ virtual int process() = 0;
+
+ bool going_down() { return down_flag; }
+
+ void start();
+ void stop();
+
+ void signal() {
+ if (worker) {
+ worker->signal();
+ }
+ }
+};
+
+#endif
diff --git a/src/rgw/rgw_realm_reloader.cc b/src/rgw/rgw_realm_reloader.cc
new file mode 100644
index 00000000..1fd48db0
--- /dev/null
+++ b/src/rgw/rgw_realm_reloader.cc
@@ -0,0 +1,176 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "rgw_realm_reloader.h"
+#include "rgw_rados.h"
+
+#include "rgw_bucket.h"
+#include "rgw_log.h"
+#include "rgw_rest.h"
+#include "rgw_user.h"
+
+#include "services/svc_zone.h"
+
+#include "common/errno.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+#undef dout_prefix
+#define dout_prefix (*_dout << "rgw realm reloader: ")
+
+
+// safe callbacks from SafeTimer are unneccessary. reload() can take a long
+// time, so we don't want to hold the mutex and block handle_notify() for the
+// duration
+static constexpr bool USE_SAFE_TIMER_CALLBACKS = false;
+
+
+RGWRealmReloader::RGWRealmReloader(RGWRados*& store, std::map<std::string, std::string>& service_map_meta,
+ Pauser* frontends)
+ : store(store),
+ service_map_meta(service_map_meta),
+ frontends(frontends),
+ timer(store->ctx(), mutex, USE_SAFE_TIMER_CALLBACKS),
+ mutex("RGWRealmReloader"),
+ reload_scheduled(nullptr)
+{
+ timer.init();
+}
+
+RGWRealmReloader::~RGWRealmReloader()
+{
+ Mutex::Locker lock(mutex);
+ timer.shutdown();
+}
+
+class RGWRealmReloader::C_Reload : public Context {
+ RGWRealmReloader* reloader;
+ public:
+ explicit C_Reload(RGWRealmReloader* reloader) : reloader(reloader) {}
+ void finish(int r) override { reloader->reload(); }
+};
+
+void RGWRealmReloader::handle_notify(RGWRealmNotify type,
+ bufferlist::const_iterator& p)
+{
+ if (!store) {
+ /* we're in the middle of reload */
+ return;
+ }
+
+ CephContext *const cct = store->ctx();
+
+ Mutex::Locker lock(mutex);
+ if (reload_scheduled) {
+ ldout(cct, 4) << "Notification on realm, reconfiguration "
+ "already scheduled" << dendl;
+ return;
+ }
+
+ reload_scheduled = new C_Reload(this);
+ cond.SignalOne(); // wake reload() if it blocked on a bad configuration
+
+ // schedule reload() without delay
+ timer.add_event_after(0, reload_scheduled);
+
+ ldout(cct, 4) << "Notification on realm, reconfiguration scheduled" << dendl;
+}
+
+void RGWRealmReloader::reload()
+{
+ CephContext *const cct = store->ctx();
+ ldout(cct, 1) << "Pausing frontends for realm update..." << dendl;
+
+ frontends->pause();
+
+ ldout(cct, 1) << "Frontends paused" << dendl;
+
+ // TODO: make RGWRados responsible for rgw_log_usage lifetime
+ rgw_log_usage_finalize();
+
+ // destroy the existing store
+ RGWStoreManager::close_storage(store);
+ store = nullptr;
+
+ ldout(cct, 1) << "Store closed" << dendl;
+ {
+ // allow a new notify to reschedule us. it's important that we do this
+ // before we start loading the new realm, or we could miss some updates
+ Mutex::Locker lock(mutex);
+ reload_scheduled = nullptr;
+ }
+
+ while (!store) {
+ // recreate and initialize a new store
+ store =
+ RGWStoreManager::get_storage(cct,
+ cct->_conf->rgw_enable_gc_threads,
+ cct->_conf->rgw_enable_lc_threads,
+ cct->_conf->rgw_enable_quota_threads,
+ cct->_conf->rgw_run_sync_thread,
+ cct->_conf.get_val<bool>("rgw_dynamic_resharding"),
+ cct->_conf->rgw_cache_enabled);
+
+ ldout(cct, 1) << "Creating new store" << dendl;
+
+ RGWRados* store_cleanup = nullptr;
+ {
+ Mutex::Locker lock(mutex);
+
+ // failure to recreate RGWRados is not a recoverable error, but we
+ // don't want to assert or abort the entire cluster. instead, just
+ // sleep until we get another notification, and retry until we get
+ // a working configuration
+ if (store == nullptr) {
+ lderr(cct) << "Failed to reinitialize RGWRados after a realm "
+ "configuration update. Waiting for a new update." << dendl;
+
+ // sleep until another event is scheduled
+ while (!reload_scheduled)
+ cond.Wait(mutex);
+
+ ldout(cct, 1) << "Woke up with a new configuration, retrying "
+ "RGWRados initialization." << dendl;
+ }
+
+ if (reload_scheduled) {
+ // cancel the event; we'll handle it now
+ timer.cancel_event(reload_scheduled);
+ reload_scheduled = nullptr;
+
+ // if we successfully created a store, clean it up outside of the lock,
+ // then continue to loop and recreate another
+ std::swap(store, store_cleanup);
+ }
+ }
+
+ if (store_cleanup) {
+ ldout(cct, 4) << "Got another notification, restarting RGWRados "
+ "initialization." << dendl;
+
+ RGWStoreManager::close_storage(store_cleanup);
+ }
+ }
+
+ int r = store->register_to_service_map("rgw", service_map_meta);
+ if (r < 0) {
+ lderr(cct) << "ERROR: failed to register to service map: " << cpp_strerror(-r) << dendl;
+
+ /* ignore error */
+ }
+
+ ldout(cct, 1) << "Finishing initialization of new store" << dendl;
+ // finish initializing the new store
+ ldout(cct, 1) << " - REST subsystem init" << dendl;
+ rgw_rest_init(cct, store, store->svc.zone->get_zonegroup());
+ ldout(cct, 1) << " - user subsystem init" << dendl;
+ rgw_user_init(store);
+ ldout(cct, 1) << " - user subsystem init" << dendl;
+ rgw_bucket_init(store->meta_mgr);
+ ldout(cct, 1) << " - usage subsystem init" << dendl;
+ rgw_log_usage_init(cct, store);
+
+ ldout(cct, 1) << "Resuming frontends with new realm configuration." << dendl;
+
+ frontends->resume(store);
+}
diff --git a/src/rgw/rgw_realm_reloader.h b/src/rgw/rgw_realm_reloader.h
new file mode 100644
index 00000000..1277429e
--- /dev/null
+++ b/src/rgw/rgw_realm_reloader.h
@@ -0,0 +1,63 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RGW_REALM_RELOADER_H
+#define RGW_REALM_RELOADER_H
+
+#include "rgw_realm_watcher.h"
+#include "common/Cond.h"
+
+class RGWRados;
+
+/**
+ * RGWRealmReloader responds to new period notifications by recreating RGWRados
+ * with the updated realm configuration.
+ */
+class RGWRealmReloader : public RGWRealmWatcher::Watcher {
+ public:
+ /**
+ * Pauser is an interface to pause/resume frontends. Frontend cooperation
+ * is required to ensure that they stop issuing requests on the old
+ * RGWRados instance, and restart with the updated configuration.
+ *
+ * This abstraction avoids a dependency on class RGWFrontend.
+ */
+ class Pauser {
+ public:
+ virtual ~Pauser() = default;
+
+ /// pause all frontends while realm reconfiguration is in progress
+ virtual void pause() = 0;
+ /// resume all frontends with the given RGWRados instance
+ virtual void resume(RGWRados* store) = 0;
+ };
+
+ RGWRealmReloader(RGWRados*& store, std::map<std::string, std::string>& service_map_meta,
+ Pauser* frontends);
+ ~RGWRealmReloader() override;
+
+ /// respond to realm notifications by scheduling a reload()
+ void handle_notify(RGWRealmNotify type, bufferlist::const_iterator& p) override;
+
+ private:
+ /// pause frontends and replace the RGWRados instance
+ void reload();
+
+ class C_Reload; //< Context that calls reload()
+
+ /// main()'s RGWRados pointer as a reference, modified by reload()
+ RGWRados*& store;
+ std::map<std::string, std::string>& service_map_meta;
+ Pauser *const frontends;
+
+ /// reload() takes a significant amount of time, so we don't want to run
+ /// it in the handle_notify() thread. we choose a timer thread instead of a
+ /// Finisher because it allows us to cancel events that were scheduled while
+ /// reload() is still running
+ SafeTimer timer;
+ Mutex mutex; //< protects access to timer and reload_scheduled
+ Cond cond; //< to signal reload() after an invalid realm config
+ C_Reload* reload_scheduled; //< reload() context if scheduled
+};
+
+#endif // RGW_REALM_RELOADER_H
diff --git a/src/rgw/rgw_realm_watcher.cc b/src/rgw/rgw_realm_watcher.cc
new file mode 100644
index 00000000..ee154f0f
--- /dev/null
+++ b/src/rgw/rgw_realm_watcher.cc
@@ -0,0 +1,148 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/errno.h"
+
+#include "rgw_realm_watcher.h"
+#include "rgw_tools.h"
+#include "rgw_zone.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+#undef dout_prefix
+#define dout_prefix (*_dout << "rgw realm watcher: ")
+
+
+RGWRealmWatcher::RGWRealmWatcher(CephContext* cct, const RGWRealm& realm)
+ : cct(cct)
+{
+ // no default realm, nothing to watch
+ if (realm.get_id().empty()) {
+ ldout(cct, 4) << "No realm, disabling dynamic reconfiguration." << dendl;
+ return;
+ }
+
+ // establish the watch on RGWRealm
+ int r = watch_start(realm);
+ if (r < 0) {
+ lderr(cct) << "Failed to establish a watch on RGWRealm, "
+ "disabling dynamic reconfiguration." << dendl;
+ return;
+ }
+}
+
+RGWRealmWatcher::~RGWRealmWatcher()
+{
+ watch_stop();
+}
+
+void RGWRealmWatcher::add_watcher(RGWRealmNotify type, Watcher& watcher)
+{
+ watchers.emplace(type, watcher);
+}
+
+void RGWRealmWatcher::handle_notify(uint64_t notify_id, uint64_t cookie,
+ uint64_t notifier_id, bufferlist& bl)
+{
+ if (cookie != watch_handle)
+ return;
+
+ // send an empty notify ack
+ bufferlist reply;
+ pool_ctx.notify_ack(watch_oid, notify_id, cookie, reply);
+
+ try {
+ auto p = bl.cbegin();
+ while (!p.end()) {
+ RGWRealmNotify notify;
+ decode(notify, p);
+ auto watcher = watchers.find(notify);
+ if (watcher == watchers.end()) {
+ lderr(cct) << "Failed to find a watcher for notify type "
+ << static_cast<int>(notify) << dendl;
+ break;
+ }
+ watcher->second.handle_notify(notify, p);
+ }
+ } catch (const buffer::error &e) {
+ lderr(cct) << "Failed to decode realm notifications." << dendl;
+ }
+}
+
+void RGWRealmWatcher::handle_error(uint64_t cookie, int err)
+{
+ lderr(cct) << "RGWRealmWatcher::handle_error oid=" << watch_oid << " err=" << err << dendl;
+ if (cookie != watch_handle)
+ return;
+
+ watch_restart();
+}
+
+int RGWRealmWatcher::watch_start(const RGWRealm& realm)
+{
+ // initialize a Rados client
+ int r = rados.init_with_context(cct);
+ if (r < 0) {
+ lderr(cct) << "Rados client initialization failed with "
+ << cpp_strerror(-r) << dendl;
+ return r;
+ }
+ r = rados.connect();
+ if (r < 0) {
+ lderr(cct) << "Rados client connection failed with "
+ << cpp_strerror(-r) << dendl;
+ return r;
+ }
+
+ // open an IoCtx for the realm's pool
+ rgw_pool pool(realm.get_pool(cct));
+ r = rgw_init_ioctx(&rados, pool, pool_ctx);
+ if (r < 0) {
+ lderr(cct) << "Failed to open pool " << pool
+ << " with " << cpp_strerror(-r) << dendl;
+ rados.shutdown();
+ return r;
+ }
+
+ // register a watch on the realm's control object
+ auto oid = realm.get_control_oid();
+ r = pool_ctx.watch2(oid, &watch_handle, this);
+ if (r < 0) {
+ lderr(cct) << "Failed to watch " << oid
+ << " with " << cpp_strerror(-r) << dendl;
+ pool_ctx.close();
+ rados.shutdown();
+ return r;
+ }
+
+ ldout(cct, 10) << "Watching " << oid << dendl;
+ std::swap(watch_oid, oid);
+ return 0;
+}
+
+int RGWRealmWatcher::watch_restart()
+{
+ ceph_assert(!watch_oid.empty());
+ int r = pool_ctx.unwatch2(watch_handle);
+ if (r < 0) {
+ lderr(cct) << "Failed to unwatch on " << watch_oid
+ << " with " << cpp_strerror(-r) << dendl;
+ }
+ r = pool_ctx.watch2(watch_oid, &watch_handle, this);
+ if (r < 0) {
+ lderr(cct) << "Failed to restart watch on " << watch_oid
+ << " with " << cpp_strerror(-r) << dendl;
+ pool_ctx.close();
+ watch_oid.clear();
+ }
+ return r;
+}
+
+void RGWRealmWatcher::watch_stop()
+{
+ if (!watch_oid.empty()) {
+ pool_ctx.unwatch2(watch_handle);
+ pool_ctx.close();
+ watch_oid.clear();
+ }
+}
diff --git a/src/rgw/rgw_realm_watcher.h b/src/rgw/rgw_realm_watcher.h
new file mode 100644
index 00000000..03d7e939
--- /dev/null
+++ b/src/rgw/rgw_realm_watcher.h
@@ -0,0 +1,69 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RGW_REALM_WATCHER_H
+#define RGW_REALM_WATCHER_H
+
+#include "include/rados/librados.hpp"
+#include "include/ceph_assert.h"
+#include "common/Timer.h"
+#include "common/Cond.h"
+
+class RGWRados;
+class RGWRealm;
+
+enum class RGWRealmNotify {
+ Reload,
+ ZonesNeedPeriod,
+};
+WRITE_RAW_ENCODER(RGWRealmNotify);
+
+/**
+ * RGWRealmWatcher establishes a watch on the current RGWRealm's control object,
+ * and forwards notifications to registered observers.
+ */
+class RGWRealmWatcher : public librados::WatchCtx2 {
+ public:
+ /**
+ * Watcher is an interface that allows the RGWRealmWatcher to pass
+ * notifications on to other interested objects.
+ */
+ class Watcher {
+ public:
+ virtual ~Watcher() = default;
+
+ virtual void handle_notify(RGWRealmNotify type,
+ bufferlist::const_iterator& p) = 0;
+ };
+
+ RGWRealmWatcher(CephContext* cct, const RGWRealm& realm);
+ ~RGWRealmWatcher() override;
+
+ /// register a watcher for the given notification type
+ void add_watcher(RGWRealmNotify type, Watcher& watcher);
+
+ /// respond to realm notifications by calling the appropriate watcher
+ void handle_notify(uint64_t notify_id, uint64_t cookie,
+ uint64_t notifier_id, bufferlist& bl) override;
+
+ /// reestablish the watch if it gets disconnected
+ void handle_error(uint64_t cookie, int err) override;
+
+ private:
+ CephContext *const cct;
+
+ /// keep a separate Rados client whose lifetime is independent of RGWRados
+ /// so that we don't miss notifications during realm reconfiguration
+ librados::Rados rados;
+ librados::IoCtx pool_ctx;
+ uint64_t watch_handle = 0;
+ std::string watch_oid;
+
+ int watch_start(const RGWRealm& realm);
+ int watch_restart();
+ void watch_stop();
+
+ std::map<RGWRealmNotify, Watcher&> watchers;
+};
+
+#endif // RGW_REALM_WATCHER_H
diff --git a/src/rgw/rgw_request.h b/src/rgw/rgw_request.h
new file mode 100644
index 00000000..23483208
--- /dev/null
+++ b/src/rgw/rgw_request.h
@@ -0,0 +1,64 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RGW_REQUEST_H
+#define RGW_REQUEST_H
+
+#include "rgw_common.h"
+#include "rgw_rados.h"
+#include "rgw_acl.h"
+#include "rgw_user.h"
+#include "rgw_op.h"
+#if defined(WITH_RADOSGW_FCGI_FRONTEND)
+#include "rgw_fcgi.h"
+#endif
+
+#include "common/QueueRing.h"
+
+#include <atomic>
+
+struct RGWRequest
+{
+ uint64_t id;
+ struct req_state *s;
+ RGWOp *op;
+
+ explicit RGWRequest(uint64_t id) : id(id), s(NULL), op(NULL) {}
+
+ virtual ~RGWRequest() {}
+
+ void init_state(req_state *_s) {
+ s = _s;
+ }
+}; /* RGWRequest */
+
+#if defined(WITH_RADOSGW_FCGI_FRONTEND)
+struct RGWFCGXRequest : public RGWRequest {
+ FCGX_Request *fcgx;
+ QueueRing<FCGX_Request *> *qr;
+
+ RGWFCGXRequest(uint64_t req_id, QueueRing<FCGX_Request *> *_qr)
+ : RGWRequest(req_id), qr(_qr) {
+ qr->dequeue(&fcgx);
+ }
+
+ ~RGWFCGXRequest() override {
+ FCGX_Finish_r(fcgx);
+ qr->enqueue(fcgx);
+ }
+};
+#endif
+
+struct RGWLoadGenRequest : public RGWRequest {
+ string method;
+ string resource;
+ int content_length;
+ std::atomic<bool>* fail_flag = nullptr;
+
+RGWLoadGenRequest(uint64_t req_id, const string& _m, const string& _r, int _cl,
+ std::atomic<bool> *ff)
+ : RGWRequest(req_id), method(_m), resource(_r), content_length(_cl),
+ fail_flag(ff) {}
+};
+
+#endif /* RGW_REQUEST_H */
diff --git a/src/rgw/rgw_reshard.cc b/src/rgw/rgw_reshard.cc
new file mode 100644
index 00000000..eb86b220
--- /dev/null
+++ b/src/rgw/rgw_reshard.cc
@@ -0,0 +1,1177 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <limits>
+#include <sstream>
+
+#include "rgw_rados.h"
+#include "rgw_zone.h"
+#include "rgw_bucket.h"
+#include "rgw_reshard.h"
+#include "cls/rgw/cls_rgw_client.h"
+#include "cls/lock/cls_lock_client.h"
+#include "common/errno.h"
+#include "common/ceph_json.h"
+
+#include "common/dout.h"
+
+#include "services/svc_zone.h"
+#include "services/svc_sys_obj.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+const string reshard_oid_prefix = "reshard.";
+const string reshard_lock_name = "reshard_process";
+const string bucket_instance_lock_name = "bucket_instance_lock";
+
+
+class BucketReshardShard {
+ RGWRados *store;
+ const RGWBucketInfo& bucket_info;
+ int num_shard;
+ RGWRados::BucketShard bs;
+ vector<rgw_cls_bi_entry> entries;
+ map<RGWObjCategory, rgw_bucket_category_stats> stats;
+ deque<librados::AioCompletion *>& aio_completions;
+ uint64_t max_aio_completions;
+ uint64_t reshard_shard_batch_size;
+
+ int wait_next_completion() {
+ librados::AioCompletion *c = aio_completions.front();
+ aio_completions.pop_front();
+
+ c->wait_for_safe();
+
+ int ret = c->get_return_value();
+ c->release();
+
+ if (ret < 0) {
+ derr << "ERROR: reshard rados operation failed: " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ return 0;
+ }
+
+ int get_completion(librados::AioCompletion **c) {
+ if (aio_completions.size() >= max_aio_completions) {
+ int ret = wait_next_completion();
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ *c = librados::Rados::aio_create_completion(nullptr, nullptr, nullptr);
+ aio_completions.push_back(*c);
+
+ return 0;
+ }
+
+public:
+ BucketReshardShard(RGWRados *_store, const RGWBucketInfo& _bucket_info,
+ int _num_shard,
+ deque<librados::AioCompletion *>& _completions) :
+ store(_store), bucket_info(_bucket_info), bs(store),
+ aio_completions(_completions)
+ {
+ num_shard = (bucket_info.num_shards > 0 ? _num_shard : -1);
+ bs.init(bucket_info.bucket, num_shard, nullptr /* no RGWBucketInfo */);
+
+ max_aio_completions =
+ store->ctx()->_conf.get_val<uint64_t>("rgw_reshard_max_aio");
+ reshard_shard_batch_size =
+ store->ctx()->_conf.get_val<uint64_t>("rgw_reshard_batch_size");
+ }
+
+ int get_num_shard() {
+ return num_shard;
+ }
+
+ int add_entry(rgw_cls_bi_entry& entry, bool account, RGWObjCategory category,
+ const rgw_bucket_category_stats& entry_stats) {
+ entries.push_back(entry);
+ if (account) {
+ rgw_bucket_category_stats& target = stats[category];
+ target.num_entries += entry_stats.num_entries;
+ target.total_size += entry_stats.total_size;
+ target.total_size_rounded += entry_stats.total_size_rounded;
+ target.actual_size += entry_stats.actual_size;
+ }
+ if (entries.size() >= reshard_shard_batch_size) {
+ int ret = flush();
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ return 0;
+ }
+
+ int flush() {
+ if (entries.size() == 0) {
+ return 0;
+ }
+
+ librados::ObjectWriteOperation op;
+ for (auto& entry : entries) {
+ store->bi_put(op, bs, entry);
+ }
+ cls_rgw_bucket_update_stats(op, false, stats);
+
+ librados::AioCompletion *c;
+ int ret = get_completion(&c);
+ if (ret < 0) {
+ return ret;
+ }
+ ret = bs.index_ctx.aio_operate(bs.bucket_obj, c, &op);
+ if (ret < 0) {
+ derr << "ERROR: failed to store entries in target bucket shard (bs=" << bs.bucket << "/" << bs.shard_id << ") error=" << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ entries.clear();
+ stats.clear();
+ return 0;
+ }
+
+ int wait_all_aio() {
+ int ret = 0;
+ while (!aio_completions.empty()) {
+ int r = wait_next_completion();
+ if (r < 0) {
+ ret = r;
+ }
+ }
+ return ret;
+ }
+}; // class BucketReshardShard
+
+
+class BucketReshardManager {
+ RGWRados *store;
+ const RGWBucketInfo& target_bucket_info;
+ deque<librados::AioCompletion *> completions;
+ int num_target_shards;
+ vector<BucketReshardShard *> target_shards;
+
+public:
+ BucketReshardManager(RGWRados *_store,
+ const RGWBucketInfo& _target_bucket_info,
+ int _num_target_shards) :
+ store(_store), target_bucket_info(_target_bucket_info),
+ num_target_shards(_num_target_shards)
+ {
+ target_shards.resize(num_target_shards);
+ for (int i = 0; i < num_target_shards; ++i) {
+ target_shards[i] = new BucketReshardShard(store, target_bucket_info, i, completions);
+ }
+ }
+
+ ~BucketReshardManager() {
+ for (auto& shard : target_shards) {
+ int ret = shard->wait_all_aio();
+ if (ret < 0) {
+ ldout(store->ctx(), 20) << __func__ <<
+ ": shard->wait_all_aio() returned ret=" << ret << dendl;
+ }
+ }
+ }
+
+ int add_entry(int shard_index,
+ rgw_cls_bi_entry& entry, bool account, RGWObjCategory category,
+ const rgw_bucket_category_stats& entry_stats) {
+ int ret = target_shards[shard_index]->add_entry(entry, account, category,
+ entry_stats);
+ if (ret < 0) {
+ derr << "ERROR: target_shards.add_entry(" << entry.idx <<
+ ") returned error: " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ return 0;
+ }
+
+ int finish() {
+ int ret = 0;
+ for (auto& shard : target_shards) {
+ int r = shard->flush();
+ if (r < 0) {
+ derr << "ERROR: target_shards[" << shard->get_num_shard() << "].flush() returned error: " << cpp_strerror(-r) << dendl;
+ ret = r;
+ }
+ }
+ for (auto& shard : target_shards) {
+ int r = shard->wait_all_aio();
+ if (r < 0) {
+ derr << "ERROR: target_shards[" << shard->get_num_shard() << "].wait_all_aio() returned error: " << cpp_strerror(-r) << dendl;
+ ret = r;
+ }
+ delete shard;
+ }
+ target_shards.clear();
+ return ret;
+ }
+}; // class BucketReshardManager
+
+RGWBucketReshard::RGWBucketReshard(RGWRados *_store,
+ const RGWBucketInfo& _bucket_info,
+ const map<string, bufferlist>& _bucket_attrs,
+ RGWBucketReshardLock* _outer_reshard_lock) :
+ store(_store), bucket_info(_bucket_info), bucket_attrs(_bucket_attrs),
+ reshard_lock(store, bucket_info, true),
+ outer_reshard_lock(_outer_reshard_lock)
+{ }
+
+int RGWBucketReshard::set_resharding_status(RGWRados* store,
+ const RGWBucketInfo& bucket_info,
+ const string& new_instance_id,
+ int32_t num_shards,
+ cls_rgw_reshard_status status)
+{
+ if (new_instance_id.empty()) {
+ ldout(store->ctx(), 0) << __func__ << " missing new bucket instance id" << dendl;
+ return -EINVAL;
+ }
+
+ cls_rgw_bucket_instance_entry instance_entry;
+ instance_entry.set_status(new_instance_id, num_shards, status);
+
+ int ret = store->bucket_set_reshard(bucket_info, instance_entry);
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "RGWReshard::" << __func__ << " ERROR: error setting bucket resharding flag on bucket index: "
+ << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ return 0;
+}
+
+// reshard lock assumes lock is held
+int RGWBucketReshard::clear_resharding(RGWRados* store,
+ const RGWBucketInfo& bucket_info)
+{
+ int ret = clear_index_shard_reshard_status(store, bucket_info);
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "RGWBucketReshard::" << __func__ <<
+ " ERROR: error clearing reshard status from index shard " <<
+ cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ cls_rgw_bucket_instance_entry instance_entry;
+ ret = store->bucket_set_reshard(bucket_info, instance_entry);
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "RGWReshard::" << __func__ <<
+ " ERROR: error setting bucket resharding flag on bucket index: " <<
+ cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWBucketReshard::clear_index_shard_reshard_status(RGWRados* store,
+ const RGWBucketInfo& bucket_info)
+{
+ uint32_t num_shards = bucket_info.num_shards;
+
+ if (num_shards < std::numeric_limits<uint32_t>::max()) {
+ int ret = set_resharding_status(store, bucket_info,
+ bucket_info.bucket.bucket_id,
+ (num_shards < 1 ? 1 : num_shards),
+ CLS_RGW_RESHARD_NOT_RESHARDING);
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "RGWBucketReshard::" << __func__ <<
+ " ERROR: error clearing reshard status from index shard " <<
+ cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int create_new_bucket_instance(RGWRados *store,
+ int new_num_shards,
+ const RGWBucketInfo& bucket_info,
+ map<string, bufferlist>& attrs,
+ RGWBucketInfo& new_bucket_info)
+{
+ new_bucket_info = bucket_info;
+
+ store->create_bucket_id(&new_bucket_info.bucket.bucket_id);
+ new_bucket_info.bucket.oid.clear();
+
+ new_bucket_info.num_shards = new_num_shards;
+ new_bucket_info.objv_tracker.clear();
+
+ new_bucket_info.new_bucket_instance_id.clear();
+ new_bucket_info.reshard_status = 0;
+
+ int ret = store->init_bucket_index(new_bucket_info, new_bucket_info.num_shards);
+ if (ret < 0) {
+ cerr << "ERROR: failed to init new bucket indexes: " << cpp_strerror(-ret) << std::endl;
+ return ret;
+ }
+
+ ret = store->put_bucket_instance_info(new_bucket_info, true, real_time(), &attrs);
+ if (ret < 0) {
+ cerr << "ERROR: failed to store new bucket instance info: " << cpp_strerror(-ret) << std::endl;
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWBucketReshard::create_new_bucket_instance(int new_num_shards,
+ RGWBucketInfo& new_bucket_info)
+{
+ return ::create_new_bucket_instance(store, new_num_shards,
+ bucket_info, bucket_attrs, new_bucket_info);
+}
+
+int RGWBucketReshard::cancel()
+{
+ int ret = reshard_lock.lock();
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = clear_resharding();
+
+ reshard_lock.unlock();
+ return ret;
+}
+
+class BucketInfoReshardUpdate
+{
+ RGWRados *store;
+ RGWBucketInfo& bucket_info;
+ std::map<string, bufferlist> bucket_attrs;
+
+ bool in_progress{false};
+
+ int set_status(cls_rgw_reshard_status s) {
+ bucket_info.reshard_status = s;
+ int ret = store->put_bucket_instance_info(bucket_info, false, real_time(), &bucket_attrs);
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "ERROR: failed to write bucket info, ret=" << ret << dendl;
+ return ret;
+ }
+ return 0;
+ }
+
+public:
+ BucketInfoReshardUpdate(RGWRados *_store,
+ RGWBucketInfo& _bucket_info,
+ map<string, bufferlist>& _bucket_attrs,
+ const string& new_bucket_id) :
+ store(_store),
+ bucket_info(_bucket_info),
+ bucket_attrs(_bucket_attrs)
+ {
+ bucket_info.new_bucket_instance_id = new_bucket_id;
+ }
+
+ ~BucketInfoReshardUpdate() {
+ if (in_progress) {
+ // resharding must not have ended correctly, clean up
+ int ret =
+ RGWBucketReshard::clear_index_shard_reshard_status(store, bucket_info);
+ if (ret < 0) {
+ lderr(store->ctx()) << "Error: " << __func__ <<
+ " clear_index_shard_status returned " << ret << dendl;
+ }
+ bucket_info.new_bucket_instance_id.clear();
+ set_status(CLS_RGW_RESHARD_NOT_RESHARDING); // clears new_bucket_instance as well
+ }
+ }
+
+ int start() {
+ int ret = set_status(CLS_RGW_RESHARD_IN_PROGRESS);
+ if (ret < 0) {
+ return ret;
+ }
+ in_progress = true;
+ return 0;
+ }
+
+ int complete() {
+ int ret = set_status(CLS_RGW_RESHARD_DONE);
+ if (ret < 0) {
+ return ret;
+ }
+ in_progress = false;
+ return 0;
+ }
+};
+
+
+RGWBucketReshardLock::RGWBucketReshardLock(RGWRados* _store,
+ const std::string& reshard_lock_oid,
+ bool _ephemeral) :
+ store(_store),
+ lock_oid(reshard_lock_oid),
+ ephemeral(_ephemeral),
+ internal_lock(reshard_lock_name)
+{
+ const int lock_dur_secs = store->ctx()->_conf.get_val<uint64_t>(
+ "rgw_reshard_bucket_lock_duration");
+ duration = std::chrono::seconds(lock_dur_secs);
+
+#define COOKIE_LEN 16
+ char cookie_buf[COOKIE_LEN + 1];
+ gen_rand_alphanumeric(store->ctx(), cookie_buf, sizeof(cookie_buf) - 1);
+ cookie_buf[COOKIE_LEN] = '\0';
+
+ internal_lock.set_cookie(cookie_buf);
+ internal_lock.set_duration(duration);
+}
+
+int RGWBucketReshardLock::lock() {
+ internal_lock.set_must_renew(false);
+ int ret;
+ if (ephemeral) {
+ ret = internal_lock.lock_exclusive_ephemeral(&store->reshard_pool_ctx,
+ lock_oid);
+ } else {
+ ret = internal_lock.lock_exclusive(&store->reshard_pool_ctx, lock_oid);
+ }
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "RGWReshardLock::" << __func__ <<
+ " failed to acquire lock on " << lock_oid << " ret=" << ret << dendl;
+ return ret;
+ }
+ reset_time(Clock::now());
+
+ return 0;
+}
+
+void RGWBucketReshardLock::unlock() {
+ int ret = internal_lock.unlock(&store->reshard_pool_ctx, lock_oid);
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "WARNING: RGWBucketReshardLock::" << __func__ <<
+ " failed to drop lock on " << lock_oid << " ret=" << ret << dendl;
+ }
+}
+
+int RGWBucketReshardLock::renew(const Clock::time_point& now) {
+ internal_lock.set_must_renew(true);
+ int ret;
+ if (ephemeral) {
+ ret = internal_lock.lock_exclusive_ephemeral(&store->reshard_pool_ctx,
+ lock_oid);
+ } else {
+ ret = internal_lock.lock_exclusive(&store->reshard_pool_ctx, lock_oid);
+ }
+ if (ret < 0) { /* expired or already locked by another processor */
+ std::stringstream error_s;
+ if (-ENOENT == ret) {
+ error_s << "ENOENT (lock expired or never initially locked)";
+ } else {
+ error_s << ret << " (" << cpp_strerror(-ret) << ")";
+ }
+ ldout(store->ctx(), 5) << __func__ << "(): failed to renew lock on " <<
+ lock_oid << " with error " << error_s.str() << dendl;
+ return ret;
+ }
+ internal_lock.set_must_renew(false);
+
+ reset_time(now);
+ ldout(store->ctx(), 20) << __func__ << "(): successfully renewed lock on " <<
+ lock_oid << dendl;
+
+ return 0;
+}
+
+
+int RGWBucketReshard::do_reshard(int num_shards,
+ RGWBucketInfo& new_bucket_info,
+ int max_entries,
+ bool verbose,
+ ostream *out,
+ Formatter *formatter)
+{
+ rgw_bucket& bucket = bucket_info.bucket;
+
+ int ret = 0;
+
+ if (out) {
+ (*out) << "tenant: " << bucket_info.bucket.tenant << std::endl;
+ (*out) << "bucket name: " << bucket_info.bucket.name << std::endl;
+ (*out) << "old bucket instance id: " << bucket_info.bucket.bucket_id <<
+ std::endl;
+ (*out) << "new bucket instance id: " << new_bucket_info.bucket.bucket_id <<
+ std::endl;
+ }
+
+ /* update bucket info -- in progress*/
+ list<rgw_cls_bi_entry> entries;
+
+ if (max_entries < 0) {
+ ldout(store->ctx(), 0) << __func__ <<
+ ": can't reshard, negative max_entries" << dendl;
+ return -EINVAL;
+ }
+
+ // NB: destructor cleans up sharding state if reshard does not
+ // complete successfully
+ BucketInfoReshardUpdate bucket_info_updater(store, bucket_info, bucket_attrs, new_bucket_info.bucket.bucket_id);
+
+ ret = bucket_info_updater.start();
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << __func__ << ": failed to update bucket info ret=" << ret << dendl;
+ return ret;
+ }
+
+ int num_target_shards = (new_bucket_info.num_shards > 0 ? new_bucket_info.num_shards : 1);
+
+ BucketReshardManager target_shards_mgr(store, new_bucket_info, num_target_shards);
+
+ bool verbose_json_out = verbose && (formatter != nullptr) && (out != nullptr);
+
+ if (verbose_json_out) {
+ formatter->open_array_section("entries");
+ }
+
+ uint64_t total_entries = 0;
+
+ if (!verbose_json_out && out) {
+ (*out) << "total entries:";
+ }
+
+ const int num_source_shards =
+ (bucket_info.num_shards > 0 ? bucket_info.num_shards : 1);
+ string marker;
+ for (int i = 0; i < num_source_shards; ++i) {
+ bool is_truncated = true;
+ marker.clear();
+ while (is_truncated) {
+ entries.clear();
+ ret = store->bi_list(bucket, i, string(), marker, max_entries, &entries, &is_truncated);
+ if (ret < 0 && ret != -ENOENT) {
+ derr << "ERROR: bi_list(): " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ for (auto iter = entries.begin(); iter != entries.end(); ++iter) {
+ rgw_cls_bi_entry& entry = *iter;
+ if (verbose_json_out) {
+ formatter->open_object_section("entry");
+
+ encode_json("shard_id", i, formatter);
+ encode_json("num_entry", total_entries, formatter);
+ encode_json("entry", entry, formatter);
+ }
+ total_entries++;
+
+ marker = entry.idx;
+
+ int target_shard_id;
+ cls_rgw_obj_key cls_key;
+ RGWObjCategory category;
+ rgw_bucket_category_stats stats;
+ bool account = entry.get_info(&cls_key, &category, &stats);
+ rgw_obj_key key(cls_key);
+ rgw_obj obj(new_bucket_info.bucket, key);
+ RGWMPObj mp;
+ if (key.ns == RGW_OBJ_NS_MULTIPART && mp.from_meta(key.name)) {
+ // place the multipart .meta object on the same shard as its head object
+ obj.index_hash_source = mp.get_key();
+ }
+ int ret = store->get_target_shard_id(new_bucket_info, obj.get_hash_object(), &target_shard_id);
+ if (ret < 0) {
+ lderr(store->ctx()) << "ERROR: get_target_shard_id() returned ret=" << ret << dendl;
+ return ret;
+ }
+
+ int shard_index = (target_shard_id > 0 ? target_shard_id : 0);
+
+ ret = target_shards_mgr.add_entry(shard_index, entry, account,
+ category, stats);
+ if (ret < 0) {
+ return ret;
+ }
+
+ Clock::time_point now = Clock::now();
+ if (reshard_lock.should_renew(now)) {
+ // assume outer locks have timespans at least the size of ours, so
+ // can call inside conditional
+ if (outer_reshard_lock) {
+ ret = outer_reshard_lock->renew(now);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+ ret = reshard_lock.renew(now);
+ if (ret < 0) {
+ lderr(store->ctx()) << "Error renewing bucket lock: " << ret << dendl;
+ return ret;
+ }
+ }
+ if (verbose_json_out) {
+ formatter->close_section();
+ formatter->flush(*out);
+ } else if (out && !(total_entries % 1000)) {
+ (*out) << " " << total_entries;
+ }
+ } // entries loop
+ }
+ }
+
+ if (verbose_json_out) {
+ formatter->close_section();
+ formatter->flush(*out);
+ } else if (out) {
+ (*out) << " " << total_entries << std::endl;
+ }
+
+ ret = target_shards_mgr.finish();
+ if (ret < 0) {
+ lderr(store->ctx()) << "ERROR: failed to reshard" << dendl;
+ return -EIO;
+ }
+
+ ret = rgw_link_bucket(store, new_bucket_info.owner, new_bucket_info.bucket, bucket_info.creation_time);
+ if (ret < 0) {
+ lderr(store->ctx()) << "failed to link new bucket instance (bucket_id=" << new_bucket_info.bucket.bucket_id << ": " << cpp_strerror(-ret) << ")" << dendl;
+ return ret;
+ }
+
+ ret = bucket_info_updater.complete();
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << __func__ << ": failed to update bucket info ret=" << ret << dendl;
+ /* don't error out, reshard process succeeded */
+ }
+
+ return 0;
+ // NB: some error clean-up is done by ~BucketInfoReshardUpdate
+} // RGWBucketReshard::do_reshard
+
+int RGWBucketReshard::get_status(list<cls_rgw_bucket_instance_entry> *status)
+{
+ librados::IoCtx index_ctx;
+ map<int, string> bucket_objs;
+
+ int r = store->open_bucket_index(bucket_info, index_ctx, bucket_objs);
+ if (r < 0) {
+ return r;
+ }
+
+ for (auto i : bucket_objs) {
+ cls_rgw_bucket_instance_entry entry;
+
+ int ret = cls_rgw_get_bucket_resharding(index_ctx, i.second, &entry);
+ if (ret < 0 && ret != -ENOENT) {
+ lderr(store->ctx()) << "ERROR: " << __func__ << ": cls_rgw_get_bucket_resharding() returned ret=" << ret << dendl;
+ return ret;
+ }
+
+ status->push_back(entry);
+ }
+
+ return 0;
+}
+
+
+int RGWBucketReshard::execute(int num_shards, int max_op_entries,
+ bool verbose, ostream *out, Formatter *formatter,
+ RGWReshard* reshard_log)
+{
+ Clock::time_point now;
+
+ int ret = reshard_lock.lock();
+ if (ret < 0) {
+ return ret;
+ }
+
+ RGWBucketInfo new_bucket_info;
+ ret = create_new_bucket_instance(num_shards, new_bucket_info);
+ if (ret < 0) {
+ // shard state is uncertain, but this will attempt to remove them anyway
+ goto error_out;
+ }
+
+ if (reshard_log) {
+ ret = reshard_log->update(bucket_info, new_bucket_info);
+ if (ret < 0) {
+ goto error_out;
+ }
+ }
+
+ // set resharding status of current bucket_info & shards with
+ // information about planned resharding
+ ret = set_resharding_status(new_bucket_info.bucket.bucket_id,
+ num_shards, CLS_RGW_RESHARD_IN_PROGRESS);
+ if (ret < 0) {
+ reshard_lock.unlock();
+ return ret;
+ }
+
+ ret = do_reshard(num_shards,
+ new_bucket_info,
+ max_op_entries,
+ verbose, out, formatter);
+ if (ret < 0) {
+ goto error_out;
+ }
+
+ // at this point we've done the main work; we'll make a best-effort
+ // to clean-up but will not indicate any errors encountered
+
+ reshard_lock.unlock();
+
+ // resharding successful, so remove old bucket index shards; use
+ // best effort and don't report out an error; the lock isn't needed
+ // at this point since all we're using a best effor to to remove old
+ // shard objects
+ ret = store->clean_bucket_index(bucket_info, bucket_info.num_shards);
+ if (ret < 0) {
+ lderr(store->ctx()) << "Error: " << __func__ <<
+ " failed to clean up old shards; " <<
+ "RGWRados::clean_bucket_index returned " << ret << dendl;
+ }
+
+ ret = rgw_bucket_instance_remove_entry(store,
+ bucket_info.bucket.get_key(),
+ nullptr);
+ if (ret < 0) {
+ lderr(store->ctx()) << "Error: " << __func__ <<
+ " failed to clean old bucket info object \"" <<
+ bucket_info.bucket.get_key() <<
+ "\"created after successful resharding with error " << ret << dendl;
+ }
+
+ ldout(store->ctx(), 1) << __func__ <<
+ " INFO: reshard of bucket \"" << bucket_info.bucket.name << "\" from \"" <<
+ bucket_info.bucket.get_key() << "\" to \"" <<
+ new_bucket_info.bucket.get_key() << "\" completed successfully" << dendl;
+
+ return 0;
+
+error_out:
+
+ reshard_lock.unlock();
+
+ // since the real problem is the issue that led to this error code
+ // path, we won't touch ret and instead use another variable to
+ // temporarily error codes
+ int ret2 = store->clean_bucket_index(new_bucket_info,
+ new_bucket_info.num_shards);
+ if (ret2 < 0) {
+ lderr(store->ctx()) << "Error: " << __func__ <<
+ " failed to clean up shards from failed incomplete resharding; " <<
+ "RGWRados::clean_bucket_index returned " << ret2 << dendl;
+ }
+
+ ret2 = rgw_bucket_instance_remove_entry(store,
+ new_bucket_info.bucket.get_key(),
+ nullptr);
+ if (ret2 < 0) {
+ lderr(store->ctx()) << "Error: " << __func__ <<
+ " failed to clean bucket info object \"" <<
+ new_bucket_info.bucket.get_key() <<
+ "\"created during incomplete resharding with error " << ret2 << dendl;
+ }
+
+ return ret;
+} // execute
+
+
+RGWReshard::RGWReshard(RGWRados* _store, bool _verbose, ostream *_out,
+ Formatter *_formatter) :
+ store(_store), instance_lock(bucket_instance_lock_name),
+ verbose(_verbose), out(_out), formatter(_formatter)
+{
+ num_logshards = store->ctx()->_conf.get_val<uint64_t>("rgw_reshard_num_logs");
+}
+
+string RGWReshard::get_logshard_key(const string& tenant,
+ const string& bucket_name)
+{
+ return tenant + ":" + bucket_name;
+}
+
+#define MAX_RESHARD_LOGSHARDS_PRIME 7877
+
+void RGWReshard::get_bucket_logshard_oid(const string& tenant, const string& bucket_name, string *oid)
+{
+ string key = get_logshard_key(tenant, bucket_name);
+
+ uint32_t sid = ceph_str_hash_linux(key.c_str(), key.size());
+ uint32_t sid2 = sid ^ ((sid & 0xFF) << 24);
+ sid = sid2 % MAX_RESHARD_LOGSHARDS_PRIME % num_logshards;
+
+ get_logshard_oid(int(sid), oid);
+}
+
+int RGWReshard::add(cls_rgw_reshard_entry& entry)
+{
+ if (!store->svc.zone->can_reshard()) {
+ ldout(store->ctx(), 20) << __func__ << " Resharding is disabled" << dendl;
+ return 0;
+ }
+
+ string logshard_oid;
+
+ get_bucket_logshard_oid(entry.tenant, entry.bucket_name, &logshard_oid);
+
+ librados::ObjectWriteOperation op;
+ cls_rgw_reshard_add(op, entry);
+
+ int ret = store->reshard_pool_ctx.operate(logshard_oid, &op);
+ if (ret < 0) {
+ lderr(store->ctx()) << "ERROR: failed to add entry to reshard log, oid=" << logshard_oid << " tenant=" << entry.tenant << " bucket=" << entry.bucket_name << dendl;
+ return ret;
+ }
+ return 0;
+}
+
+int RGWReshard::update(const RGWBucketInfo& bucket_info, const RGWBucketInfo& new_bucket_info)
+{
+ cls_rgw_reshard_entry entry;
+ entry.bucket_name = bucket_info.bucket.name;
+ entry.bucket_id = bucket_info.bucket.bucket_id;
+ entry.tenant = bucket_info.owner.tenant;
+
+ int ret = get(entry);
+ if (ret < 0) {
+ return ret;
+ }
+
+ entry.new_instance_id = new_bucket_info.bucket.name + ":" + new_bucket_info.bucket.bucket_id;
+
+ ret = add(entry);
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << __func__ << ":Error in updating entry bucket " << entry.bucket_name << ": " <<
+ cpp_strerror(-ret) << dendl;
+ }
+
+ return ret;
+}
+
+
+int RGWReshard::list(int logshard_num, string& marker, uint32_t max, std::list<cls_rgw_reshard_entry>& entries, bool *is_truncated)
+{
+ string logshard_oid;
+
+ get_logshard_oid(logshard_num, &logshard_oid);
+
+ int ret = cls_rgw_reshard_list(store->reshard_pool_ctx, logshard_oid, marker, max, entries, is_truncated);
+
+ if (ret < 0) {
+ if (ret == -ENOENT) {
+ *is_truncated = false;
+ ret = 0;
+ }
+ lderr(store->ctx()) << "ERROR: failed to list reshard log entries, oid=" << logshard_oid << dendl;
+ if (ret == -EACCES) {
+ lderr(store->ctx()) << "access denied to pool " << store->svc.zone->get_zone_params().reshard_pool
+ << ". Fix the pool access permissions of your client" << dendl;
+ }
+ }
+
+ return ret;
+}
+
+int RGWReshard::get(cls_rgw_reshard_entry& entry)
+{
+ string logshard_oid;
+
+ get_bucket_logshard_oid(entry.tenant, entry.bucket_name, &logshard_oid);
+
+ int ret = cls_rgw_reshard_get(store->reshard_pool_ctx, logshard_oid, entry);
+ if (ret < 0) {
+ if (ret != -ENOENT) {
+ lderr(store->ctx()) << "ERROR: failed to get entry from reshard log, oid=" << logshard_oid << " tenant=" << entry.tenant <<
+ " bucket=" << entry.bucket_name << dendl;
+ }
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWReshard::remove(cls_rgw_reshard_entry& entry)
+{
+ string logshard_oid;
+
+ get_bucket_logshard_oid(entry.tenant, entry.bucket_name, &logshard_oid);
+
+ librados::ObjectWriteOperation op;
+ cls_rgw_reshard_remove(op, entry);
+
+ int ret = store->reshard_pool_ctx.operate(logshard_oid, &op);
+ if (ret < 0) {
+ lderr(store->ctx()) << "ERROR: failed to remove entry from reshard log, oid=" << logshard_oid << " tenant=" << entry.tenant << " bucket=" << entry.bucket_name << dendl;
+ return ret;
+ }
+
+ return ret;
+}
+
+int RGWReshard::clear_bucket_resharding(const string& bucket_instance_oid, cls_rgw_reshard_entry& entry)
+{
+ int ret = cls_rgw_clear_bucket_resharding(store->reshard_pool_ctx, bucket_instance_oid);
+ if (ret < 0) {
+ lderr(store->ctx()) << "ERROR: failed to clear bucket resharding, bucket_instance_oid=" << bucket_instance_oid << dendl;
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWReshardWait::wait(optional_yield y)
+{
+ std::unique_lock lock(mutex);
+
+ if (going_down) {
+ return -ECANCELED;
+ }
+
+#ifdef HAVE_BOOST_CONTEXT
+ if (y) {
+ auto& context = y.get_io_context();
+ auto& yield = y.get_yield_context();
+
+ Waiter waiter(context);
+ waiters.push_back(waiter);
+ lock.unlock();
+
+ waiter.timer.expires_after(duration);
+
+ boost::system::error_code ec;
+ waiter.timer.async_wait(yield[ec]);
+
+ lock.lock();
+ waiters.erase(waiters.iterator_to(waiter));
+ return -ec.value();
+ }
+#endif
+
+ cond.wait_for(lock, duration);
+
+ if (going_down) {
+ return -ECANCELED;
+ }
+
+ return 0;
+}
+
+void RGWReshardWait::stop()
+{
+ std::scoped_lock lock(mutex);
+ going_down = true;
+ cond.notify_all();
+ for (auto& waiter : waiters) {
+ // unblock any waiters with ECANCELED
+ waiter.timer.cancel();
+ }
+}
+
+int RGWReshard::process_single_logshard(int logshard_num)
+{
+ string marker;
+ bool truncated = true;
+
+ CephContext *cct = store->ctx();
+ constexpr uint32_t max_entries = 1000;
+
+ string logshard_oid;
+ get_logshard_oid(logshard_num, &logshard_oid);
+
+ RGWBucketReshardLock logshard_lock(store, logshard_oid, false);
+
+ int ret = logshard_lock.lock();
+ if (ret < 0) {
+ ldout(store->ctx(), 5) << __func__ << "(): failed to acquire lock on " <<
+ logshard_oid << ", ret = " << ret <<dendl;
+ return ret;
+ }
+
+ do {
+ std::list<cls_rgw_reshard_entry> entries;
+ ret = list(logshard_num, marker, max_entries, entries, &truncated);
+ if (ret < 0) {
+ ldout(cct, 10) << "cannot list all reshards in logshard oid=" <<
+ logshard_oid << dendl;
+ continue;
+ }
+
+ for(auto& entry: entries) { // logshard entries
+ if(entry.new_instance_id.empty()) {
+
+ ldout(store->ctx(), 20) << __func__ << " resharding " <<
+ entry.bucket_name << dendl;
+
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+ rgw_bucket bucket;
+ RGWBucketInfo bucket_info;
+ map<string, bufferlist> attrs;
+
+ ret = store->get_bucket_info(obj_ctx, entry.tenant, entry.bucket_name,
+ bucket_info, nullptr, &attrs);
+ if (ret < 0 || bucket_info.bucket.bucket_id != entry.bucket_id) {
+ if (ret < 0) {
+ ldout(cct, 0) << __func__ <<
+ ": Error in get_bucket_info for bucket " << entry.bucket_name <<
+ ": " << cpp_strerror(-ret) << dendl;
+ if (ret != -ENOENT) {
+ // any error other than ENOENT will abort
+ return ret;
+ }
+ } else {
+ ldout(cct,0) << __func__ <<
+ ": Bucket: " << entry.bucket_name <<
+ " already resharded by someone, skipping " << dendl;
+ }
+
+ // we've encountered a reshard queue entry for an apparently
+ // non-existent bucket; let's try to recover by cleaning up
+ ldout(cct, 0) << __func__ <<
+ ": removing reshard queue entry for a resharded or non-existent bucket" <<
+ entry.bucket_name << dendl;
+
+ ret = remove(entry);
+ if (ret < 0) {
+ ldout(cct, 0) << __func__ <<
+ ": Error removing non-existent bucket " <<
+ entry.bucket_name << " from resharding queue: " <<
+ cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ // we cleaned up, move on to the next entry
+ goto finished_entry;
+ }
+
+ RGWBucketReshard br(store, bucket_info, attrs, nullptr);
+ ret = br.execute(entry.new_num_shards, max_entries, false, nullptr,
+ nullptr, this);
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << __func__ <<
+ ": Error during resharding bucket " << entry.bucket_name << ":" <<
+ cpp_strerror(-ret)<< dendl;
+ return ret;
+ }
+
+ ldout(store->ctx(), 20) << __func__ <<
+ " removing reshard queue entry for bucket " << entry.bucket_name <<
+ dendl;
+
+ ret = remove(entry);
+ if (ret < 0) {
+ ldout(cct, 0) << __func__ << ": Error removing bucket " <<
+ entry.bucket_name << " from resharding queue: " <<
+ cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ } // if new instance id is empty
+
+ finished_entry:
+
+ Clock::time_point now = Clock::now();
+ if (logshard_lock.should_renew(now)) {
+ ret = logshard_lock.renew(now);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ entry.get_key(&marker);
+ } // entry for loop
+ } while (truncated);
+
+ logshard_lock.unlock();
+ return 0;
+}
+
+
+void RGWReshard::get_logshard_oid(int shard_num, string *logshard)
+{
+ char buf[32];
+ snprintf(buf, sizeof(buf), "%010u", (unsigned)shard_num);
+
+ string objname(reshard_oid_prefix);
+ *logshard = objname + buf;
+}
+
+int RGWReshard::process_all_logshards()
+{
+ if (!store->svc.zone->can_reshard()) {
+ ldout(store->ctx(), 20) << __func__ << " Resharding is disabled" << dendl;
+ return 0;
+ }
+ int ret = 0;
+
+ for (int i = 0; i < num_logshards; i++) {
+ string logshard;
+ get_logshard_oid(i, &logshard);
+
+ ldout(store->ctx(), 20) << "processing logshard = " << logshard << dendl;
+
+ ret = process_single_logshard(i);
+ if (ret <0) {
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+bool RGWReshard::going_down()
+{
+ return down_flag;
+}
+
+void RGWReshard::start_processor()
+{
+ worker = new ReshardWorker(store->ctx(), this);
+ worker->create("rgw_reshard");
+}
+
+void RGWReshard::stop_processor()
+{
+ down_flag = true;
+ if (worker) {
+ worker->stop();
+ worker->join();
+ }
+ delete worker;
+ worker = nullptr;
+}
+
+void *RGWReshard::ReshardWorker::entry() {
+ utime_t last_run;
+ do {
+ utime_t start = ceph_clock_now();
+ if (reshard->process_all_logshards()) {
+ /* All shards have been processed properly. Next time we can start
+ * from this moment. */
+ last_run = start;
+ }
+
+ if (reshard->going_down())
+ break;
+
+ utime_t end = ceph_clock_now();
+ end -= start;
+ int secs = cct->_conf.get_val<uint64_t>("rgw_reshard_thread_interval");
+
+ if (secs <= end.sec())
+ continue; // next round
+
+ secs -= end.sec();
+
+ lock.Lock();
+ cond.WaitInterval(lock, utime_t(secs, 0));
+ lock.Unlock();
+ } while (!reshard->going_down());
+
+ return NULL;
+}
+
+void RGWReshard::ReshardWorker::stop()
+{
+ Mutex::Locker l(lock);
+ cond.Signal();
+}
diff --git a/src/rgw/rgw_reshard.h b/src/rgw/rgw_reshard.h
new file mode 100644
index 00000000..213fc238
--- /dev/null
+++ b/src/rgw/rgw_reshard.h
@@ -0,0 +1,211 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RGW_RESHARD_H
+#define RGW_RESHARD_H
+
+#include <vector>
+#include <functional>
+
+#include <boost/intrusive/list.hpp>
+
+#include "include/rados/librados.hpp"
+#include "common/ceph_time.h"
+#include "cls/rgw/cls_rgw_types.h"
+#include "cls/lock/cls_lock_client.h"
+#include "rgw_bucket.h"
+
+
+class CephContext;
+class RGWRados;
+
+class RGWBucketReshardLock {
+ using Clock = ceph::coarse_mono_clock;
+
+ RGWRados* store;
+ const std::string lock_oid;
+ const bool ephemeral;
+ rados::cls::lock::Lock internal_lock;
+ std::chrono::seconds duration;
+
+ Clock::time_point start_time;
+ Clock::time_point renew_thresh;
+
+ void reset_time(const Clock::time_point& now) {
+ start_time = now;
+ renew_thresh = start_time + duration / 2;
+ }
+
+public:
+ RGWBucketReshardLock(RGWRados* _store,
+ const std::string& reshard_lock_oid,
+ bool _ephemeral);
+ RGWBucketReshardLock(RGWRados* _store,
+ const RGWBucketInfo& bucket_info,
+ bool _ephemeral) :
+ RGWBucketReshardLock(_store, bucket_info.bucket.get_key(':'), _ephemeral)
+ {}
+
+ int lock();
+ void unlock();
+ int renew(const Clock::time_point&);
+
+ bool should_renew(const Clock::time_point& now) const {
+ return now >= renew_thresh;
+ }
+}; // class RGWBucketReshardLock
+
+class RGWBucketReshard {
+public:
+
+ friend class RGWReshard;
+
+ using Clock = ceph::coarse_mono_clock;
+
+private:
+
+ RGWRados *store;
+ RGWBucketInfo bucket_info;
+ std::map<string, bufferlist> bucket_attrs;
+
+ RGWBucketReshardLock reshard_lock;
+ RGWBucketReshardLock* outer_reshard_lock;
+
+ int create_new_bucket_instance(int new_num_shards,
+ RGWBucketInfo& new_bucket_info);
+ int do_reshard(int num_shards,
+ RGWBucketInfo& new_bucket_info,
+ int max_entries,
+ bool verbose,
+ ostream *os,
+ Formatter *formatter);
+public:
+
+ // pass nullptr for the final parameter if no outer reshard lock to
+ // manage
+ RGWBucketReshard(RGWRados *_store, const RGWBucketInfo& _bucket_info,
+ const std::map<string, bufferlist>& _bucket_attrs,
+ RGWBucketReshardLock* _outer_reshard_lock);
+ int execute(int num_shards, int max_op_entries,
+ bool verbose = false, ostream *out = nullptr,
+ Formatter *formatter = nullptr,
+ RGWReshard *reshard_log = nullptr);
+ int get_status(std::list<cls_rgw_bucket_instance_entry> *status);
+ int cancel();
+ static int clear_resharding(RGWRados* store,
+ const RGWBucketInfo& bucket_info);
+ int clear_resharding() {
+ return clear_resharding(store, bucket_info);
+ }
+ static int clear_index_shard_reshard_status(RGWRados* store,
+ const RGWBucketInfo& bucket_info);
+ int clear_index_shard_reshard_status() {
+ return clear_index_shard_reshard_status(store, bucket_info);
+ }
+ static int set_resharding_status(RGWRados* store,
+ const RGWBucketInfo& bucket_info,
+ const string& new_instance_id,
+ int32_t num_shards,
+ cls_rgw_reshard_status status);
+ int set_resharding_status(const string& new_instance_id,
+ int32_t num_shards,
+ cls_rgw_reshard_status status) {
+ return set_resharding_status(store, bucket_info,
+ new_instance_id, num_shards, status);
+ }
+}; // RGWBucketReshard
+
+class RGWReshard {
+public:
+ using Clock = ceph::coarse_mono_clock;
+
+private:
+ RGWRados *store;
+ string lock_name;
+ rados::cls::lock::Lock instance_lock;
+ int num_logshards;
+
+ bool verbose;
+ ostream *out;
+ Formatter *formatter;
+
+ void get_logshard_oid(int shard_num, string *shard);
+protected:
+ class ReshardWorker : public Thread {
+ CephContext *cct;
+ RGWReshard *reshard;
+ Mutex lock;
+ Cond cond;
+
+ public:
+ ReshardWorker(CephContext * const _cct,
+ RGWReshard * const _reshard)
+ : cct(_cct),
+ reshard(_reshard),
+ lock("ReshardWorker") {
+ }
+
+ void *entry() override;
+ void stop();
+ };
+
+ ReshardWorker *worker = nullptr;
+ std::atomic<bool> down_flag = { false };
+
+ string get_logshard_key(const string& tenant, const string& bucket_name);
+ void get_bucket_logshard_oid(const string& tenant, const string& bucket_name, string *oid);
+
+public:
+ RGWReshard(RGWRados* _store, bool _verbose = false, ostream *_out = nullptr, Formatter *_formatter = nullptr);
+ int add(cls_rgw_reshard_entry& entry);
+ int update(const RGWBucketInfo& bucket_info, const RGWBucketInfo& new_bucket_info);
+ int get(cls_rgw_reshard_entry& entry);
+ int remove(cls_rgw_reshard_entry& entry);
+ int list(int logshard_num, string& marker, uint32_t max, std::list<cls_rgw_reshard_entry>& entries, bool *is_truncated);
+ int clear_bucket_resharding(const string& bucket_instance_oid, cls_rgw_reshard_entry& entry);
+
+ /* reshard thread */
+ int process_single_logshard(int logshard_num);
+ int process_all_logshards();
+ bool going_down();
+ void start_processor();
+ void stop_processor();
+};
+
+class RGWReshardWait {
+ public:
+ // the blocking wait uses std::condition_variable::wait_for(), which uses the
+ // std::chrono::steady_clock. use that for the async waits as well
+ using Clock = std::chrono::steady_clock;
+ private:
+ const ceph::timespan duration;
+ ceph::mutex mutex = ceph::make_mutex("RGWReshardWait::lock");
+ ceph::condition_variable cond;
+
+ struct Waiter : boost::intrusive::list_base_hook<> {
+#if BOOST_VERSION < 107000
+ using Timer = boost::asio::basic_waitable_timer<Clock>;
+#else
+ using Executor = boost::asio::io_context::executor_type;
+ using Timer = boost::asio::basic_waitable_timer<Clock,
+ boost::asio::wait_traits<Clock>, Executor>;
+#endif
+ Timer timer;
+ explicit Waiter(boost::asio::io_context& ioc) : timer(ioc) {}
+ };
+ boost::intrusive::list<Waiter> waiters;
+
+ bool going_down{false};
+
+public:
+ RGWReshardWait(ceph::timespan duration = std::chrono::seconds(5))
+ : duration(duration) {}
+ ~RGWReshardWait() {
+ ceph_assert(going_down);
+ }
+ int wait(optional_yield y);
+ // unblock any threads waiting on reshard
+ void stop();
+};
+
+#endif
diff --git a/src/rgw/rgw_resolve.cc b/src/rgw/rgw_resolve.cc
new file mode 100644
index 00000000..0e515962
--- /dev/null
+++ b/src/rgw/rgw_resolve.cc
@@ -0,0 +1,44 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <resolv.h>
+
+#include "acconfig.h"
+
+#ifdef HAVE_ARPA_NAMESER_COMPAT_H
+#include <arpa/nameser_compat.h>
+#endif
+
+#include "rgw_common.h"
+#include "rgw_resolve.h"
+#include "common/dns_resolve.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+
+RGWResolver::~RGWResolver() {
+}
+
+RGWResolver::RGWResolver() {
+ resolver = DNSResolver::get_instance();
+}
+
+int RGWResolver::resolve_cname(const string& hostname, string& cname, bool *found) {
+ return resolver->resolve_cname(g_ceph_context, hostname, &cname, found);
+}
+
+RGWResolver *rgw_resolver;
+
+
+void rgw_init_resolver()
+{
+ rgw_resolver = new RGWResolver();
+}
+
+void rgw_shutdown_resolver()
+{
+ delete rgw_resolver;
+}
diff --git a/src/rgw/rgw_resolve.h b/src/rgw/rgw_resolve.h
new file mode 100644
index 00000000..6e00aaa6
--- /dev/null
+++ b/src/rgw/rgw_resolve.h
@@ -0,0 +1,27 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_RESOLVE_H
+#define CEPH_RGW_RESOLVE_H
+
+#include "rgw_common.h"
+
+namespace ceph {
+ class DNSResolver;
+}
+
+class RGWResolver {
+ DNSResolver *resolver;
+
+public:
+ ~RGWResolver();
+ RGWResolver();
+ int resolve_cname(const string& hostname, string& cname, bool *found);
+};
+
+
+extern void rgw_init_resolver(void);
+extern void rgw_shutdown_resolver(void);
+extern RGWResolver *rgw_resolver;
+
+#endif
diff --git a/src/rgw/rgw_rest.cc b/src/rgw/rgw_rest.cc
new file mode 100644
index 00000000..9a52af5d
--- /dev/null
+++ b/src/rgw/rgw_rest.cc
@@ -0,0 +1,2302 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+
+#include <errno.h>
+#include <limits.h>
+
+#include <boost/algorithm/string.hpp>
+#include <boost/tokenizer.hpp>
+#include "common/Formatter.h"
+#include "common/HTMLFormatter.h"
+#include "common/utf8.h"
+#include "include/str_list.h"
+#include "rgw_common.h"
+#include "rgw_rados.h"
+#include "rgw_zone.h"
+#include "rgw_auth_s3.h"
+#include "rgw_formats.h"
+#include "rgw_op.h"
+#include "rgw_rest.h"
+#include "rgw_rest_swift.h"
+#include "rgw_rest_s3.h"
+#include "rgw_swift_auth.h"
+#include "rgw_cors_s3.h"
+#include "rgw_perf_counters.h"
+
+#include "rgw_client_io.h"
+#include "rgw_resolve.h"
+
+#include <numeric>
+
+#define dout_subsys ceph_subsys_rgw
+
+struct rgw_http_status_code {
+ int code;
+ const char *name;
+};
+
+const static struct rgw_http_status_code http_codes[] = {
+ { 100, "Continue" },
+ { 200, "OK" },
+ { 201, "Created" },
+ { 202, "Accepted" },
+ { 204, "No Content" },
+ { 205, "Reset Content" },
+ { 206, "Partial Content" },
+ { 207, "Multi Status" },
+ { 208, "Already Reported" },
+ { 300, "Multiple Choices" },
+ { 301, "Moved Permanently" },
+ { 302, "Found" },
+ { 303, "See Other" },
+ { 304, "Not Modified" },
+ { 305, "User Proxy" },
+ { 306, "Switch Proxy" },
+ { 307, "Temporary Redirect" },
+ { 308, "Permanent Redirect" },
+ { 400, "Bad Request" },
+ { 401, "Unauthorized" },
+ { 402, "Payment Required" },
+ { 403, "Forbidden" },
+ { 404, "Not Found" },
+ { 405, "Method Not Allowed" },
+ { 406, "Not Acceptable" },
+ { 407, "Proxy Authentication Required" },
+ { 408, "Request Timeout" },
+ { 409, "Conflict" },
+ { 410, "Gone" },
+ { 411, "Length Required" },
+ { 412, "Precondition Failed" },
+ { 413, "Request Entity Too Large" },
+ { 414, "Request-URI Too Long" },
+ { 415, "Unsupported Media Type" },
+ { 416, "Requested Range Not Satisfiable" },
+ { 417, "Expectation Failed" },
+ { 422, "Unprocessable Entity" },
+ { 498, "Rate Limited"},
+ { 500, "Internal Server Error" },
+ { 501, "Not Implemented" },
+ { 503, "Slow Down"},
+ { 0, NULL },
+};
+
+struct rgw_http_attr {
+ const char *rgw_attr;
+ const char *http_attr;
+};
+
+/*
+ * mapping between rgw object attrs and output http fields
+ */
+static const struct rgw_http_attr base_rgw_to_http_attrs[] = {
+ { RGW_ATTR_CONTENT_LANG, "Content-Language" },
+ { RGW_ATTR_EXPIRES, "Expires" },
+ { RGW_ATTR_CACHE_CONTROL, "Cache-Control" },
+ { RGW_ATTR_CONTENT_DISP, "Content-Disposition" },
+ { RGW_ATTR_CONTENT_ENC, "Content-Encoding" },
+ { RGW_ATTR_USER_MANIFEST, "X-Object-Manifest" },
+ { RGW_ATTR_X_ROBOTS_TAG , "X-Robots-Tag" },
+ { RGW_ATTR_STORAGE_CLASS , "X-Amz-Storage-Class" },
+ /* RGW_ATTR_AMZ_WEBSITE_REDIRECT_LOCATION header depends on access mode:
+ * S3 endpoint: x-amz-website-redirect-location
+ * S3Website endpoint: Location
+ */
+ { RGW_ATTR_AMZ_WEBSITE_REDIRECT_LOCATION, "x-amz-website-redirect-location" },
+};
+
+
+struct generic_attr {
+ const char *http_header;
+ const char *rgw_attr;
+};
+
+/*
+ * mapping between http env fields and rgw object attrs
+ */
+static const struct generic_attr generic_attrs[] = {
+ { "CONTENT_TYPE", RGW_ATTR_CONTENT_TYPE },
+ { "HTTP_CONTENT_LANGUAGE", RGW_ATTR_CONTENT_LANG },
+ { "HTTP_EXPIRES", RGW_ATTR_EXPIRES },
+ { "HTTP_CACHE_CONTROL", RGW_ATTR_CACHE_CONTROL },
+ { "HTTP_CONTENT_DISPOSITION", RGW_ATTR_CONTENT_DISP },
+ { "HTTP_CONTENT_ENCODING", RGW_ATTR_CONTENT_ENC },
+ { "HTTP_X_ROBOTS_TAG", RGW_ATTR_X_ROBOTS_TAG },
+};
+
+map<string, string> rgw_to_http_attrs;
+static map<string, string> generic_attrs_map;
+map<int, const char *> http_status_names;
+
+/*
+ * make attrs look_like_this
+ * converts dashes to underscores
+ */
+string lowercase_underscore_http_attr(const string& orig)
+{
+ const char *s = orig.c_str();
+ char buf[orig.size() + 1];
+ buf[orig.size()] = '\0';
+
+ for (size_t i = 0; i < orig.size(); ++i, ++s) {
+ switch (*s) {
+ case '-':
+ buf[i] = '_';
+ break;
+ default:
+ buf[i] = tolower(*s);
+ }
+ }
+ return string(buf);
+}
+
+/*
+ * make attrs LOOK_LIKE_THIS
+ * converts dashes to underscores
+ */
+string uppercase_underscore_http_attr(const string& orig)
+{
+ const char *s = orig.c_str();
+ char buf[orig.size() + 1];
+ buf[orig.size()] = '\0';
+
+ for (size_t i = 0; i < orig.size(); ++i, ++s) {
+ switch (*s) {
+ case '-':
+ buf[i] = '_';
+ break;
+ default:
+ buf[i] = toupper(*s);
+ }
+ }
+ return string(buf);
+}
+
+/* avoid duplicate hostnames in hostnames lists */
+static set<string> hostnames_set;
+static set<string> hostnames_s3website_set;
+
+void rgw_rest_init(CephContext *cct, RGWRados *store, const RGWZoneGroup& zone_group)
+{
+ for (const auto& rgw2http : base_rgw_to_http_attrs) {
+ rgw_to_http_attrs[rgw2http.rgw_attr] = rgw2http.http_attr;
+ }
+
+ for (const auto& http2rgw : generic_attrs) {
+ generic_attrs_map[http2rgw.http_header] = http2rgw.rgw_attr;
+ }
+
+ list<string> extended_http_attrs;
+ get_str_list(cct->_conf->rgw_extended_http_attrs, extended_http_attrs);
+
+ list<string>::iterator iter;
+ for (iter = extended_http_attrs.begin(); iter != extended_http_attrs.end(); ++iter) {
+ string rgw_attr = RGW_ATTR_PREFIX;
+ rgw_attr.append(lowercase_underscore_http_attr(*iter));
+
+ rgw_to_http_attrs[rgw_attr] = camelcase_dash_http_attr(*iter);
+
+ string http_header = "HTTP_";
+ http_header.append(uppercase_underscore_http_attr(*iter));
+
+ generic_attrs_map[http_header] = rgw_attr;
+ }
+
+ for (const struct rgw_http_status_code *h = http_codes; h->code; h++) {
+ http_status_names[h->code] = h->name;
+ }
+
+ hostnames_set.insert(cct->_conf->rgw_dns_name);
+ hostnames_set.insert(zone_group.hostnames.begin(), zone_group.hostnames.end());
+ hostnames_set.erase(""); // filter out empty hostnames
+ ldout(cct, 20) << "RGW hostnames: " << hostnames_set << dendl;
+ /* TODO: We should have a sanity check that no hostname matches the end of
+ * any other hostname, otherwise we will get ambigious results from
+ * rgw_find_host_in_domains.
+ * Eg:
+ * Hostnames: [A, B.A]
+ * Inputs: [Z.A, X.B.A]
+ * Z.A clearly splits to subdomain=Z, domain=Z
+ * X.B.A ambigously splits to both {X, B.A} and {X.B, A}
+ */
+
+ hostnames_s3website_set.insert(cct->_conf->rgw_dns_s3website_name);
+ hostnames_s3website_set.insert(zone_group.hostnames_s3website.begin(), zone_group.hostnames_s3website.end());
+ hostnames_s3website_set.erase(""); // filter out empty hostnames
+ ldout(cct, 20) << "RGW S3website hostnames: " << hostnames_s3website_set << dendl;
+ /* TODO: we should repeat the hostnames_set sanity check here
+ * and ALSO decide about overlap, if any
+ */
+}
+
+static bool str_ends_with_nocase(const string& s, const string& suffix, size_t *pos)
+{
+ size_t len = suffix.size();
+ if (len > (size_t)s.size()) {
+ return false;
+ }
+
+ ssize_t p = s.size() - len;
+ if (pos) {
+ *pos = p;
+ }
+
+ return boost::algorithm::iends_with(s, suffix);
+}
+
+static bool rgw_find_host_in_domains(const string& host, string *domain, string *subdomain,
+ const set<string>& valid_hostnames_set)
+{
+ set<string>::iterator iter;
+ /** TODO, Future optimization
+ * store hostnames_set elements _reversed_, and look for a prefix match,
+ * which is much faster than a suffix match.
+ */
+ for (iter = valid_hostnames_set.begin(); iter != valid_hostnames_set.end(); ++iter) {
+ size_t pos;
+ if (!str_ends_with_nocase(host, *iter, &pos))
+ continue;
+
+ if (pos == 0) {
+ *domain = host;
+ subdomain->clear();
+ } else {
+ if (host[pos - 1] != '.') {
+ continue;
+ }
+
+ *domain = host.substr(pos);
+ *subdomain = host.substr(0, pos - 1);
+ }
+ return true;
+ }
+ return false;
+}
+
+static void dump_status(struct req_state *s, int status,
+ const char *status_name)
+{
+ s->formatter->set_status(status, status_name);
+ try {
+ RESTFUL_IO(s)->send_status(status, status_name);
+ } catch (rgw::io::Exception& e) {
+ ldout(s->cct, 0) << "ERROR: s->cio->send_status() returned err="
+ << e.what() << dendl;
+ }
+}
+
+void rgw_flush_formatter_and_reset(struct req_state *s, Formatter *formatter)
+{
+ std::ostringstream oss;
+ formatter->output_footer();
+ formatter->flush(oss);
+ std::string outs(oss.str());
+ if (!outs.empty() && s->op != OP_HEAD) {
+ dump_body(s, outs);
+ }
+
+ s->formatter->reset();
+}
+
+void rgw_flush_formatter(struct req_state *s, Formatter *formatter)
+{
+ std::ostringstream oss;
+ formatter->flush(oss);
+ std::string outs(oss.str());
+ if (!outs.empty() && s->op != OP_HEAD) {
+ dump_body(s, outs);
+ }
+}
+
+void dump_errno(int http_ret, string& out) {
+ stringstream ss;
+
+ ss << http_ret << " " << http_status_names[http_ret];
+ out = ss.str();
+}
+
+void dump_errno(const struct rgw_err &err, string& out) {
+ dump_errno(err.http_ret, out);
+}
+
+void dump_errno(struct req_state *s)
+{
+ dump_status(s, s->err.http_ret, http_status_names[s->err.http_ret]);
+}
+
+void dump_errno(struct req_state *s, int http_ret)
+{
+ dump_status(s, http_ret, http_status_names[http_ret]);
+}
+
+void dump_header(struct req_state* const s,
+ const boost::string_ref& name,
+ const boost::string_ref& val)
+{
+ try {
+ RESTFUL_IO(s)->send_header(name, val);
+ } catch (rgw::io::Exception& e) {
+ ldout(s->cct, 0) << "ERROR: s->cio->send_header() returned err="
+ << e.what() << dendl;
+ }
+}
+
+void dump_header(struct req_state* const s,
+ const boost::string_ref& name,
+ ceph::buffer::list& bl)
+{
+ return dump_header(s, name, rgw_sanitized_hdrval(bl));
+}
+
+void dump_header(struct req_state* const s,
+ const boost::string_ref& name,
+ const long long val)
+{
+ char buf[32];
+ const auto len = snprintf(buf, sizeof(buf), "%lld", val);
+
+ return dump_header(s, name, boost::string_ref(buf, len));
+}
+
+void dump_header(struct req_state* const s,
+ const boost::string_ref& name,
+ const utime_t& ut)
+{
+ char buf[32];
+ const auto len = snprintf(buf, sizeof(buf), "%lld.%05d",
+ static_cast<long long>(ut.sec()),
+ static_cast<int>(ut.usec() / 10));
+
+ return dump_header(s, name, boost::string_ref(buf, len));
+}
+
+void dump_content_length(struct req_state* const s, const uint64_t len)
+{
+ try {
+ RESTFUL_IO(s)->send_content_length(len);
+ } catch (rgw::io::Exception& e) {
+ ldout(s->cct, 0) << "ERROR: s->cio->send_content_length() returned err="
+ << e.what() << dendl;
+ }
+ dump_header(s, "Accept-Ranges", "bytes");
+}
+
+static void dump_chunked_encoding(struct req_state* const s)
+{
+ try {
+ RESTFUL_IO(s)->send_chunked_transfer_encoding();
+ } catch (rgw::io::Exception& e) {
+ ldout(s->cct, 0) << "ERROR: RESTFUL_IO(s)->send_chunked_transfer_encoding()"
+ << " returned err=" << e.what() << dendl;
+ }
+}
+
+void dump_etag(struct req_state* const s,
+ const boost::string_ref& etag,
+ const bool quoted)
+{
+ if (etag.empty()) {
+ return;
+ }
+
+ if (s->prot_flags & RGW_REST_SWIFT && ! quoted) {
+ return dump_header(s, "etag", etag);
+ } else {
+ return dump_header_quoted(s, "ETag", etag);
+ }
+}
+
+void dump_bucket_from_state(struct req_state *s)
+{
+ if (g_conf()->rgw_expose_bucket && ! s->bucket_name.empty()) {
+ if (! s->bucket_tenant.empty()) {
+ dump_header(s, "Bucket",
+ url_encode(s->bucket_tenant + "/" + s->bucket_name));
+ } else {
+ dump_header(s, "Bucket", url_encode(s->bucket_name));
+ }
+ }
+}
+
+void dump_redirect(struct req_state * const s, const std::string& redirect)
+{
+ return dump_header_if_nonempty(s, "Location", redirect);
+}
+
+static size_t dump_time_header_impl(char (&timestr)[TIME_BUF_SIZE],
+ const real_time t)
+{
+ const utime_t ut(t);
+ time_t secs = static_cast<time_t>(ut.sec());
+
+ struct tm result;
+ const struct tm * const tmp = gmtime_r(&secs, &result);
+ if (tmp == nullptr) {
+ return 0;
+ }
+
+ return strftime(timestr, sizeof(timestr),
+ "%a, %d %b %Y %H:%M:%S %Z", tmp);
+}
+
+void dump_time_header(struct req_state *s, const char *name, real_time t)
+{
+ char timestr[TIME_BUF_SIZE];
+
+ const size_t len = dump_time_header_impl(timestr, t);
+ if (len == 0) {
+ return;
+ }
+
+ return dump_header(s, name, boost::string_ref(timestr, len));
+}
+
+std::string dump_time_to_str(const real_time& t)
+{
+ char timestr[TIME_BUF_SIZE];
+ dump_time_header_impl(timestr, t);
+
+ return timestr;
+}
+
+
+void dump_last_modified(struct req_state *s, real_time t)
+{
+ dump_time_header(s, "Last-Modified", t);
+}
+
+void dump_epoch_header(struct req_state *s, const char *name, real_time t)
+{
+ utime_t ut(t);
+ char buf[65];
+ const auto len = snprintf(buf, sizeof(buf), "%lld.%09lld",
+ (long long)ut.sec(),
+ (long long)ut.nsec());
+
+ return dump_header(s, name, boost::string_ref(buf, len));
+}
+
+void dump_time(struct req_state *s, const char *name, real_time *t)
+{
+ char buf[TIME_BUF_SIZE];
+ rgw_to_iso8601(*t, buf, sizeof(buf));
+
+ s->formatter->dump_string(name, buf);
+}
+
+void dump_owner(struct req_state *s, const rgw_user& id, string& name,
+ const char *section)
+{
+ if (!section)
+ section = "Owner";
+ s->formatter->open_object_section(section);
+ s->formatter->dump_string("ID", id.to_str());
+ s->formatter->dump_string("DisplayName", name);
+ s->formatter->close_section();
+}
+
+void dump_access_control(struct req_state *s, const char *origin,
+ const char *meth,
+ const char *hdr, const char *exp_hdr,
+ uint32_t max_age) {
+ if (origin && (origin[0] != '\0')) {
+ dump_header(s, "Access-Control-Allow-Origin", origin);
+ /* If the server specifies an origin host rather than "*",
+ * then it must also include Origin in the Vary response header
+ * to indicate to clients that server responses will differ
+ * based on the value of the Origin request header.
+ */
+ if (strcmp(origin, "*") != 0) {
+ dump_header(s, "Vary", "Origin");
+ }
+
+ if (meth && (meth[0] != '\0')) {
+ dump_header(s, "Access-Control-Allow-Methods", meth);
+ }
+ if (hdr && (hdr[0] != '\0')) {
+ dump_header(s, "Access-Control-Allow-Headers", hdr);
+ }
+ if (exp_hdr && (exp_hdr[0] != '\0')) {
+ dump_header(s, "Access-Control-Expose-Headers", exp_hdr);
+ }
+ if (max_age != CORS_MAX_AGE_INVALID) {
+ dump_header(s, "Access-Control-Max-Age", max_age);
+ }
+ }
+}
+
+void dump_access_control(req_state *s, RGWOp *op)
+{
+ string origin;
+ string method;
+ string header;
+ string exp_header;
+ unsigned max_age = CORS_MAX_AGE_INVALID;
+
+ if (!op->generate_cors_headers(origin, method, header, exp_header, &max_age))
+ return;
+
+ dump_access_control(s, origin.c_str(), method.c_str(), header.c_str(),
+ exp_header.c_str(), max_age);
+}
+
+void dump_start(struct req_state *s)
+{
+ if (!s->content_started) {
+ s->formatter->output_header();
+ s->content_started = true;
+ }
+}
+
+void dump_trans_id(req_state *s)
+{
+ if (s->prot_flags & RGW_REST_SWIFT) {
+ dump_header(s, "X-Trans-Id", s->trans_id);
+ dump_header(s, "X-Openstack-Request-Id", s->trans_id);
+ } else if (s->trans_id.length()) {
+ dump_header(s, "x-amz-request-id", s->trans_id);
+ }
+}
+
+void end_header(struct req_state* s, RGWOp* op, const char *content_type,
+ const int64_t proposed_content_length, bool force_content_type,
+ bool force_no_error)
+{
+ string ctype;
+
+ dump_trans_id(s);
+
+ if ((!s->is_err()) &&
+ (s->bucket_info.owner != s->user->user_id) &&
+ (s->bucket_info.requester_pays)) {
+ dump_header(s, "x-amz-request-charged", "requester");
+ }
+
+ if (op) {
+ dump_access_control(s, op);
+ }
+
+ if (s->prot_flags & RGW_REST_SWIFT && !content_type) {
+ force_content_type = true;
+ }
+
+ /* do not send content type if content length is zero
+ and the content type was not set by the user */
+ if (force_content_type ||
+ (!content_type && s->formatter->get_len() != 0) || s->is_err()){
+ switch (s->format) {
+ case RGW_FORMAT_XML:
+ ctype = "application/xml";
+ break;
+ case RGW_FORMAT_JSON:
+ ctype = "application/json";
+ break;
+ case RGW_FORMAT_HTML:
+ ctype = "text/html";
+ break;
+ default:
+ ctype = "text/plain";
+ break;
+ }
+ if (s->prot_flags & RGW_REST_SWIFT)
+ ctype.append("; charset=utf-8");
+ content_type = ctype.c_str();
+ }
+ if (!force_no_error && s->is_err()) {
+ dump_start(s);
+ dump(s);
+ dump_content_length(s, s->formatter->get_len());
+ } else {
+ if (proposed_content_length == CHUNKED_TRANSFER_ENCODING) {
+ dump_chunked_encoding(s);
+ } else if (proposed_content_length != NO_CONTENT_LENGTH) {
+ dump_content_length(s, proposed_content_length);
+ }
+ }
+
+ if (content_type) {
+ dump_header(s, "Content-Type", content_type);
+ }
+ dump_header_if_nonempty(s, "Server", g_conf()->rgw_service_provider_name);
+
+ try {
+ RESTFUL_IO(s)->complete_header();
+ } catch (rgw::io::Exception& e) {
+ ldout(s->cct, 0) << "ERROR: RESTFUL_IO(s)->complete_header() returned err="
+ << e.what() << dendl;
+ }
+
+ ACCOUNTING_IO(s)->set_account(true);
+ rgw_flush_formatter_and_reset(s, s->formatter);
+}
+
+static void build_redirect_url(req_state *s, const string& redirect_base, string *redirect_url)
+{
+ string& dest_uri = *redirect_url;
+
+ dest_uri = redirect_base;
+ /*
+ * reqest_uri is always start with slash, so we need to remove
+ * the unnecessary slash at the end of dest_uri.
+ */
+ if (dest_uri[dest_uri.size() - 1] == '/') {
+ dest_uri = dest_uri.substr(0, dest_uri.size() - 1);
+ }
+ dest_uri += s->info.request_uri;
+ dest_uri += "?";
+ dest_uri += s->info.request_params;
+}
+
+void abort_early(struct req_state *s, RGWOp* op, int err_no,
+ RGWHandler* handler)
+{
+ string error_content("");
+ if (!s->formatter) {
+ s->formatter = new JSONFormatter;
+ s->format = RGW_FORMAT_JSON;
+ }
+
+ // op->error_handler is responsible for calling it's handler error_handler
+ if (op != NULL) {
+ int new_err_no;
+ new_err_no = op->error_handler(err_no, &error_content);
+ ldout(s->cct, 20) << "op->ERRORHANDLER: err_no=" << err_no
+ << " new_err_no=" << new_err_no << dendl;
+ err_no = new_err_no;
+ } else if (handler != NULL) {
+ int new_err_no;
+ new_err_no = handler->error_handler(err_no, &error_content);
+ ldout(s->cct, 20) << "handler->ERRORHANDLER: err_no=" << err_no
+ << " new_err_no=" << new_err_no << dendl;
+ err_no = new_err_no;
+ }
+
+ // If the error handler(s) above dealt with it completely, they should have
+ // returned 0. If non-zero, we need to continue here.
+ if (err_no) {
+ // Watch out, we might have a custom error state already set!
+ if (!s->err.http_ret || s->err.http_ret == 200) {
+ set_req_state_err(s, err_no);
+ }
+
+ if (s->err.http_ret == 404 && !s->redirect_zone_endpoint.empty()) {
+ s->err.http_ret = 301;
+ err_no = -ERR_PERMANENT_REDIRECT;
+ build_redirect_url(s, s->redirect_zone_endpoint, &s->redirect);
+ }
+
+ dump_errno(s);
+ dump_bucket_from_state(s);
+ if (err_no == -ERR_PERMANENT_REDIRECT || err_no == -ERR_WEBSITE_REDIRECT) {
+ string dest_uri;
+ if (!s->redirect.empty()) {
+ dest_uri = s->redirect;
+ } else if (!s->zonegroup_endpoint.empty()) {
+ build_redirect_url(s, s->zonegroup_endpoint, &dest_uri);
+ }
+
+ if (!dest_uri.empty()) {
+ dump_redirect(s, dest_uri);
+ }
+ }
+
+ if (!error_content.empty()) {
+ /*
+ * TODO we must add all error entries as headers here:
+ * when having a working errordoc, then the s3 error fields are
+ * rendered as HTTP headers, e.g.:
+ * x-amz-error-code: NoSuchKey
+ * x-amz-error-message: The specified key does not exist.
+ * x-amz-error-detail-Key: foo
+ */
+ end_header(s, op, NULL, error_content.size(), false, true);
+ RESTFUL_IO(s)->send_body(error_content.c_str(), error_content.size());
+ } else {
+ end_header(s, op);
+ }
+ }
+ perfcounter->inc(l_rgw_failed_req);
+}
+
+void dump_continue(struct req_state * const s)
+{
+ try {
+ RESTFUL_IO(s)->send_100_continue();
+ } catch (rgw::io::Exception& e) {
+ ldout(s->cct, 0) << "ERROR: RESTFUL_IO(s)->send_100_continue() returned err="
+ << e.what() << dendl;
+ }
+}
+
+void dump_range(struct req_state* const s,
+ const uint64_t ofs,
+ const uint64_t end,
+ const uint64_t total)
+{
+ /* dumping range into temp buffer first, as libfcgi will fail to digest
+ * %lld */
+ char range_buf[128];
+ size_t len;
+
+ if (! total) {
+ len = snprintf(range_buf, sizeof(range_buf), "bytes */%lld",
+ static_cast<long long>(total));
+ } else {
+ len = snprintf(range_buf, sizeof(range_buf), "bytes %lld-%lld/%lld",
+ static_cast<long long>(ofs),
+ static_cast<long long>(end),
+ static_cast<long long>(total));
+ }
+
+ return dump_header(s, "Content-Range", boost::string_ref(range_buf, len));
+}
+
+
+int dump_body(struct req_state* const s,
+ const char* const buf,
+ const size_t len)
+{
+ try {
+ return RESTFUL_IO(s)->send_body(buf, len);
+ } catch (rgw::io::Exception& e) {
+ return -e.code().value();
+ }
+}
+
+int dump_body(struct req_state* const s, /* const */ ceph::buffer::list& bl)
+{
+ return dump_body(s, bl.c_str(), bl.length());
+}
+
+int dump_body(struct req_state* const s, const std::string& str)
+{
+ return dump_body(s, str.c_str(), str.length());
+}
+
+int recv_body(struct req_state* const s,
+ char* const buf,
+ const size_t max)
+{
+ try {
+ return RESTFUL_IO(s)->recv_body(buf, max);
+ } catch (rgw::io::Exception& e) {
+ return -e.code().value();
+ }
+}
+
+int RGWGetObj_ObjStore::get_params()
+{
+ range_str = s->info.env->get("HTTP_RANGE");
+ if_mod = s->info.env->get("HTTP_IF_MODIFIED_SINCE");
+ if_unmod = s->info.env->get("HTTP_IF_UNMODIFIED_SINCE");
+ if_match = s->info.env->get("HTTP_IF_MATCH");
+ if_nomatch = s->info.env->get("HTTP_IF_NONE_MATCH");
+
+ if (s->system_request) {
+ mod_zone_id = s->info.env->get_int("HTTP_DEST_ZONE_SHORT_ID", 0);
+ mod_pg_ver = s->info.env->get_int("HTTP_DEST_PG_VER", 0);
+ rgwx_stat = s->info.args.exists(RGW_SYS_PARAM_PREFIX "stat");
+ get_data &= (!rgwx_stat);
+ }
+
+ if (s->info.args.exists(GET_TORRENT)) {
+ return torrent.get_params();
+ }
+ return 0;
+}
+
+int RESTArgs::get_string(struct req_state *s, const string& name,
+ const string& def_val, string *val, bool *existed)
+{
+ bool exists;
+ *val = s->info.args.get(name, &exists);
+
+ if (existed)
+ *existed = exists;
+
+ if (!exists) {
+ *val = def_val;
+ return 0;
+ }
+
+ return 0;
+}
+
+int RESTArgs::get_uint64(struct req_state *s, const string& name,
+ uint64_t def_val, uint64_t *val, bool *existed)
+{
+ bool exists;
+ string sval = s->info.args.get(name, &exists);
+
+ if (existed)
+ *existed = exists;
+
+ if (!exists) {
+ *val = def_val;
+ return 0;
+ }
+
+ int r = stringtoull(sval, val);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int RESTArgs::get_int64(struct req_state *s, const string& name,
+ int64_t def_val, int64_t *val, bool *existed)
+{
+ bool exists;
+ string sval = s->info.args.get(name, &exists);
+
+ if (existed)
+ *existed = exists;
+
+ if (!exists) {
+ *val = def_val;
+ return 0;
+ }
+
+ int r = stringtoll(sval, val);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int RESTArgs::get_uint32(struct req_state *s, const string& name,
+ uint32_t def_val, uint32_t *val, bool *existed)
+{
+ bool exists;
+ string sval = s->info.args.get(name, &exists);
+
+ if (existed)
+ *existed = exists;
+
+ if (!exists) {
+ *val = def_val;
+ return 0;
+ }
+
+ int r = stringtoul(sval, val);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int RESTArgs::get_int32(struct req_state *s, const string& name,
+ int32_t def_val, int32_t *val, bool *existed)
+{
+ bool exists;
+ string sval = s->info.args.get(name, &exists);
+
+ if (existed)
+ *existed = exists;
+
+ if (!exists) {
+ *val = def_val;
+ return 0;
+ }
+
+ int r = stringtol(sval, val);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int RESTArgs::get_time(struct req_state *s, const string& name,
+ const utime_t& def_val, utime_t *val, bool *existed)
+{
+ bool exists;
+ string sval = s->info.args.get(name, &exists);
+
+ if (existed)
+ *existed = exists;
+
+ if (!exists) {
+ *val = def_val;
+ return 0;
+ }
+
+ uint64_t epoch, nsec;
+
+ int r = utime_t::parse_date(sval, &epoch, &nsec);
+ if (r < 0)
+ return r;
+
+ *val = utime_t(epoch, nsec);
+
+ return 0;
+}
+
+int RESTArgs::get_epoch(struct req_state *s, const string& name, uint64_t def_val, uint64_t *epoch, bool *existed)
+{
+ bool exists;
+ string date = s->info.args.get(name, &exists);
+
+ if (existed)
+ *existed = exists;
+
+ if (!exists) {
+ *epoch = def_val;
+ return 0;
+ }
+
+ int r = utime_t::parse_date(date, epoch, NULL);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int RESTArgs::get_bool(struct req_state *s, const string& name, bool def_val, bool *val, bool *existed)
+{
+ bool exists;
+ string sval = s->info.args.get(name, &exists);
+
+ if (existed)
+ *existed = exists;
+
+ if (!exists) {
+ *val = def_val;
+ return 0;
+ }
+
+ const char *str = sval.c_str();
+
+ if (sval.empty() ||
+ strcasecmp(str, "true") == 0 ||
+ sval.compare("1") == 0) {
+ *val = true;
+ return 0;
+ }
+
+ if (strcasecmp(str, "false") != 0 &&
+ sval.compare("0") != 0) {
+ *val = def_val;
+ return -EINVAL;
+ }
+
+ *val = false;
+ return 0;
+}
+
+
+void RGWRESTFlusher::do_start(int ret)
+{
+ set_req_state_err(s, ret); /* no going back from here */
+ dump_errno(s);
+ dump_start(s);
+ end_header(s, op);
+ rgw_flush_formatter_and_reset(s, s->formatter);
+}
+
+void RGWRESTFlusher::do_flush()
+{
+ rgw_flush_formatter(s, s->formatter);
+}
+
+int RGWPutObj_ObjStore::verify_params()
+{
+ if (s->length) {
+ off_t len = atoll(s->length);
+ if (len > (off_t)(s->cct->_conf->rgw_max_put_size)) {
+ return -ERR_TOO_LARGE;
+ }
+ }
+
+ return 0;
+}
+
+int RGWPutObj_ObjStore::get_params()
+{
+ /* start gettorrent */
+ if (s->cct->_conf->rgw_torrent_flag)
+ {
+ int ret = 0;
+ ret = torrent.get_params();
+ ldout(s->cct, 5) << "NOTICE: open produce torrent file " << dendl;
+ if (ret < 0)
+ {
+ return ret;
+ }
+ torrent.set_info_name((s->object).name);
+ }
+ /* end gettorrent */
+ supplied_md5_b64 = s->info.env->get("HTTP_CONTENT_MD5");
+
+ return 0;
+}
+
+int RGWPutObj_ObjStore::get_data(bufferlist& bl)
+{
+ size_t cl;
+ uint64_t chunk_size = s->cct->_conf->rgw_max_chunk_size;
+ if (s->length) {
+ cl = atoll(s->length) - ofs;
+ if (cl > chunk_size)
+ cl = chunk_size;
+ } else {
+ cl = chunk_size;
+ }
+
+ int len = 0;
+ {
+ ACCOUNTING_IO(s)->set_account(true);
+ bufferptr bp(cl);
+
+ const auto read_len = recv_body(s, bp.c_str(), cl);
+ if (read_len < 0) {
+ return read_len;
+ }
+
+ len = read_len;
+ bl.append(bp, 0, len);
+
+ ACCOUNTING_IO(s)->set_account(false);
+ }
+
+ if ((uint64_t)ofs + len > s->cct->_conf->rgw_max_put_size) {
+ return -ERR_TOO_LARGE;
+ }
+
+ return len;
+}
+
+
+/*
+ * parses params in the format: 'first; param1=foo; param2=bar'
+ */
+void RGWPostObj_ObjStore::parse_boundary_params(const std::string& params_str,
+ std::string& first,
+ std::map<std::string,
+ std::string>& params)
+{
+ size_t pos = params_str.find(';');
+ if (std::string::npos == pos) {
+ first = rgw_trim_whitespace(params_str);
+ return;
+ }
+
+ first = rgw_trim_whitespace(params_str.substr(0, pos));
+ pos++;
+
+ while (pos < params_str.size()) {
+ size_t end = params_str.find(';', pos);
+ if (std::string::npos == end) {
+ end = params_str.size();
+ }
+
+ std::string param = params_str.substr(pos, end - pos);
+ size_t eqpos = param.find('=');
+
+ if (std::string::npos != eqpos) {
+ std::string param_name = rgw_trim_whitespace(param.substr(0, eqpos));
+ std::string val = rgw_trim_quotes(param.substr(eqpos + 1));
+ params[std::move(param_name)] = std::move(val);
+ } else {
+ params[rgw_trim_whitespace(param)] = "";
+ }
+
+ pos = end + 1;
+ }
+}
+
+int RGWPostObj_ObjStore::parse_part_field(const std::string& line,
+ std::string& field_name, /* out */
+ post_part_field& field) /* out */
+{
+ size_t pos = line.find(':');
+ if (pos == string::npos)
+ return -EINVAL;
+
+ field_name = line.substr(0, pos);
+ if (pos >= line.size() - 1)
+ return 0;
+
+ parse_boundary_params(line.substr(pos + 1), field.val, field.params);
+
+ return 0;
+}
+
+static bool is_crlf(const char *s)
+{
+ return (*s == '\r' && *(s + 1) == '\n');
+}
+
+/*
+ * find the index of the boundary, if exists, or optionally the next end of line
+ * also returns how many bytes to skip
+ */
+static int index_of(ceph::bufferlist& bl,
+ uint64_t max_len,
+ const std::string& str,
+ const bool check_crlf,
+ bool& reached_boundary,
+ int& skip)
+{
+ reached_boundary = false;
+ skip = 0;
+
+ if (str.size() < 2) // we assume boundary is at least 2 chars (makes it easier with crlf checks)
+ return -EINVAL;
+
+ if (bl.length() < str.size())
+ return -1;
+
+ const char *buf = bl.c_str();
+ const char *s = str.c_str();
+
+ if (max_len > bl.length())
+ max_len = bl.length();
+
+ for (uint64_t i = 0; i < max_len; i++, buf++) {
+ if (check_crlf &&
+ i >= 1 &&
+ is_crlf(buf - 1)) {
+ return i + 1; // skip the crlf
+ }
+ if ((i < max_len - str.size() + 1) &&
+ (buf[0] == s[0] && buf[1] == s[1]) &&
+ (strncmp(buf, s, str.size()) == 0)) {
+ reached_boundary = true;
+ skip = str.size();
+
+ /* oh, great, now we need to swallow the preceding crlf
+ * if exists
+ */
+ if ((i >= 2) &&
+ is_crlf(buf - 2)) {
+ i -= 2;
+ skip += 2;
+ }
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+int RGWPostObj_ObjStore::read_with_boundary(ceph::bufferlist& bl,
+ uint64_t max,
+ const bool check_crlf,
+ bool& reached_boundary,
+ bool& done)
+{
+ uint64_t cl = max + 2 + boundary.size();
+
+ if (max > in_data.length()) {
+ uint64_t need_to_read = cl - in_data.length();
+
+ bufferptr bp(need_to_read);
+
+ const auto read_len = recv_body(s, bp.c_str(), need_to_read);
+ if (read_len < 0) {
+ return read_len;
+ }
+ in_data.append(bp, 0, read_len);
+ }
+
+ done = false;
+ int skip;
+ const int index = index_of(in_data, cl, boundary, check_crlf,
+ reached_boundary, skip);
+ if (index >= 0) {
+ max = index;
+ }
+
+ if (max > in_data.length()) {
+ max = in_data.length();
+ }
+
+ bl.substr_of(in_data, 0, max);
+
+ ceph::bufferlist new_read_data;
+
+ /*
+ * now we need to skip boundary for next time, also skip any crlf, or
+ * check to see if it's the last final boundary (marked with "--" at the end
+ */
+ if (reached_boundary) {
+ int left = in_data.length() - max;
+ if (left < skip + 2) {
+ int need = skip + 2 - left;
+ bufferptr boundary_bp(need);
+ const int r = recv_body(s, boundary_bp.c_str(), need);
+ if (r < 0) {
+ return r;
+ }
+ in_data.append(boundary_bp);
+ }
+ max += skip; // skip boundary for next time
+ if (in_data.length() >= max + 2) {
+ const char *data = in_data.c_str();
+ if (is_crlf(data + max)) {
+ max += 2;
+ } else {
+ if (*(data + max) == '-' &&
+ *(data + max + 1) == '-') {
+ done = true;
+ max += 2;
+ }
+ }
+ }
+ }
+
+ new_read_data.substr_of(in_data, max, in_data.length() - max);
+ in_data = new_read_data;
+
+ return 0;
+}
+
+int RGWPostObj_ObjStore::read_line(ceph::bufferlist& bl,
+ const uint64_t max,
+ bool& reached_boundary,
+ bool& done)
+{
+ return read_with_boundary(bl, max, true, reached_boundary, done);
+}
+
+int RGWPostObj_ObjStore::read_data(ceph::bufferlist& bl,
+ const uint64_t max,
+ bool& reached_boundary,
+ bool& done)
+{
+ return read_with_boundary(bl, max, false, reached_boundary, done);
+}
+
+
+int RGWPostObj_ObjStore::read_form_part_header(struct post_form_part* const part,
+ bool& done)
+{
+ bufferlist bl;
+ bool reached_boundary;
+ uint64_t chunk_size = s->cct->_conf->rgw_max_chunk_size;
+ int r = read_line(bl, chunk_size, reached_boundary, done);
+ if (r < 0) {
+ return r;
+ }
+
+ if (done) {
+ return 0;
+ }
+
+ if (reached_boundary) { // skip the first boundary
+ r = read_line(bl, chunk_size, reached_boundary, done);
+ if (r < 0) {
+ return r;
+ } else if (done) {
+ return 0;
+ }
+ }
+
+ while (true) {
+ /*
+ * iterate through fields
+ */
+ std::string line = rgw_trim_whitespace(string(bl.c_str(), bl.length()));
+
+ if (line.empty()) {
+ break;
+ }
+
+ struct post_part_field field;
+
+ string field_name;
+ r = parse_part_field(line, field_name, field);
+ if (r < 0) {
+ return r;
+ }
+
+ part->fields[field_name] = field;
+
+ if (stringcasecmp(field_name, "Content-Disposition") == 0) {
+ part->name = field.params["name"];
+ }
+
+ if (reached_boundary) {
+ break;
+ }
+
+ r = read_line(bl, chunk_size, reached_boundary, done);
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+bool RGWPostObj_ObjStore::part_str(parts_collection_t& parts,
+ const std::string& name,
+ std::string* val)
+{
+ const auto iter = parts.find(name);
+ if (std::end(parts) == iter) {
+ return false;
+ }
+
+ ceph::bufferlist& data = iter->second.data;
+ std::string str = string(data.c_str(), data.length());
+ *val = rgw_trim_whitespace(str);
+ return true;
+}
+
+std::string RGWPostObj_ObjStore::get_part_str(parts_collection_t& parts,
+ const std::string& name,
+ const std::string& def_val)
+{
+ std::string val;
+
+ if (part_str(parts, name, &val)) {
+ return val;
+ } else {
+ return rgw_trim_whitespace(def_val);
+ }
+}
+
+bool RGWPostObj_ObjStore::part_bl(parts_collection_t& parts,
+ const std::string& name,
+ ceph::bufferlist* pbl)
+{
+ const auto iter = parts.find(name);
+ if (std::end(parts) == iter) {
+ return false;
+ }
+
+ *pbl = iter->second.data;
+ return true;
+}
+
+int RGWPostObj_ObjStore::verify_params()
+{
+ /* check that we have enough memory to store the object
+ note that this test isn't exact and may fail unintentionally
+ for large requests is */
+ if (!s->length) {
+ return -ERR_LENGTH_REQUIRED;
+ }
+ off_t len = atoll(s->length);
+ if (len > (off_t)(s->cct->_conf->rgw_max_put_size)) {
+ return -ERR_TOO_LARGE;
+ }
+
+ supplied_md5_b64 = s->info.env->get("HTTP_CONTENT_MD5");
+
+ return 0;
+}
+
+int RGWPostObj_ObjStore::get_params()
+{
+ if (s->expect_cont) {
+ /* OK, here it really gets ugly. With POST, the params are embedded in the
+ * request body, so we need to continue before being able to actually look
+ * at them. This diverts from the usual request flow. */
+ dump_continue(s);
+ s->expect_cont = false;
+ }
+
+ std::string req_content_type_str = s->info.env->get("CONTENT_TYPE", "");
+ std::string req_content_type;
+ std::map<std::string, std::string> params;
+ parse_boundary_params(req_content_type_str, req_content_type, params);
+
+ if (req_content_type.compare("multipart/form-data") != 0) {
+ err_msg = "Request Content-Type is not multipart/form-data";
+ return -EINVAL;
+ }
+
+ if (s->cct->_conf->subsys.should_gather<ceph_subsys_rgw, 20>()) {
+ ldout(s->cct, 20) << "request content_type_str="
+ << req_content_type_str << dendl;
+ ldout(s->cct, 20) << "request content_type params:" << dendl;
+
+ for (const auto& pair : params) {
+ ldout(s->cct, 20) << " " << pair.first << " -> " << pair.second
+ << dendl;
+ }
+ }
+
+ const auto iter = params.find("boundary");
+ if (std::end(params) == iter) {
+ err_msg = "Missing multipart boundary specification";
+ return -EINVAL;
+ }
+
+ /* Create the boundary. */
+ boundary = "--";
+ boundary.append(iter->second);
+
+ return 0;
+}
+
+
+int RGWPutACLs_ObjStore::get_params()
+{
+ const auto max_size = s->cct->_conf->rgw_max_put_param_size;
+ std::tie(op_ret, data) = rgw_rest_read_all_input(s, max_size, false);
+ ldout(s->cct, 0) << "RGWPutACLs_ObjStore::get_params read data is: " << data.c_str() << dendl;
+ return op_ret;
+}
+
+int RGWPutLC_ObjStore::get_params()
+{
+ const auto max_size = s->cct->_conf->rgw_max_put_param_size;
+ std::tie(op_ret, data) = rgw_rest_read_all_input(s, max_size, false);
+ return op_ret;
+}
+
+int RGWPutBucketObjectLock_ObjStore::get_params()
+{
+ const auto max_size = s->cct->_conf->rgw_max_put_param_size;
+ std::tie(op_ret, data) = rgw_rest_read_all_input(s, max_size, false);
+ return op_ret;
+}
+
+int RGWPutObjLegalHold_ObjStore::get_params()
+{
+ const auto max_size = s->cct->_conf->rgw_max_put_param_size;
+ std::tie(op_ret, data) = rgw_rest_read_all_input(s, max_size, false);
+ return op_ret;
+}
+
+
+static std::tuple<int, bufferlist> read_all_chunked_input(req_state *s, const uint64_t max_read)
+{
+#define READ_CHUNK 4096
+#define MAX_READ_CHUNK (128 * 1024)
+ int need_to_read = READ_CHUNK;
+ int total = need_to_read;
+ bufferlist bl;
+
+ int read_len = 0, len = 0;
+ do {
+ bufferptr bp(need_to_read + 1);
+ read_len = recv_body(s, bp.c_str(), need_to_read);
+ if (read_len < 0) {
+ return std::make_tuple(read_len, std::move(bl));
+ }
+
+ bp.c_str()[read_len] = '\0';
+ bp.set_length(read_len);
+ bl.append(bp);
+ len += read_len;
+
+ if (read_len == need_to_read) {
+ if (need_to_read < MAX_READ_CHUNK)
+ need_to_read *= 2;
+
+ if ((unsigned)total > max_read) {
+ return std::make_tuple(-ERANGE, std::move(bl));
+ }
+ total += need_to_read;
+ } else {
+ break;
+ }
+ } while (true);
+
+ return std::make_tuple(0, std::move(bl));
+}
+
+std::tuple<int, bufferlist > rgw_rest_read_all_input(struct req_state *s,
+ const uint64_t max_len,
+ const bool allow_chunked)
+{
+ size_t cl = 0;
+ int len = 0;
+ bufferlist bl;
+
+ if (s->length)
+ cl = atoll(s->length);
+ else if (!allow_chunked)
+ return std::make_tuple(-ERR_LENGTH_REQUIRED, std::move(bl));
+
+ if (cl) {
+ if (cl > (size_t)max_len) {
+ return std::make_tuple(-ERANGE, std::move(bl));
+ }
+
+ bufferptr bp(cl + 1);
+
+ len = recv_body(s, bp.c_str(), cl);
+ if (len < 0) {
+ return std::make_tuple(len, std::move(bl));
+ }
+
+ bp.c_str()[len] = '\0';
+ bp.set_length(len);
+ bl.append(bp);
+
+ } else if (allow_chunked && !s->length) {
+ const char *encoding = s->info.env->get("HTTP_TRANSFER_ENCODING");
+ if (!encoding || strcmp(encoding, "chunked") != 0)
+ return std::make_tuple(-ERR_LENGTH_REQUIRED, std::move(bl));
+
+ int ret = 0;
+ std::tie(ret, bl) = read_all_chunked_input(s, max_len);
+ if (ret < 0)
+ return std::make_tuple(ret, std::move(bl));
+ }
+
+ return std::make_tuple(0, std::move(bl));
+}
+
+int RGWCompleteMultipart_ObjStore::get_params()
+{
+ upload_id = s->info.args.get("uploadId");
+
+ if (upload_id.empty()) {
+ op_ret = -ENOTSUP;
+ return op_ret;
+ }
+
+#define COMPLETE_MULTIPART_MAX_LEN (1024 * 1024) /* api defines max 10,000 parts, this should be enough */
+ std::tie(op_ret, data) = rgw_rest_read_all_input(s, COMPLETE_MULTIPART_MAX_LEN);
+ if (op_ret < 0)
+ return op_ret;
+
+ return 0;
+}
+
+int RGWListMultipart_ObjStore::get_params()
+{
+ upload_id = s->info.args.get("uploadId");
+
+ if (upload_id.empty()) {
+ op_ret = -ENOTSUP;
+ }
+ string marker_str = s->info.args.get("part-number-marker");
+
+ if (!marker_str.empty()) {
+ string err;
+ marker = strict_strtol(marker_str.c_str(), 10, &err);
+ if (!err.empty()) {
+ ldout(s->cct, 20) << "bad marker: " << marker << dendl;
+ op_ret = -EINVAL;
+ return op_ret;
+ }
+ }
+
+ string str = s->info.args.get("max-parts");
+ op_ret = parse_value_and_bound(str, max_parts, 0,
+ g_conf().get_val<uint64_t>("rgw_max_listing_results"),
+ max_parts);
+
+ return op_ret;
+}
+
+int RGWListBucketMultiparts_ObjStore::get_params()
+{
+ delimiter = s->info.args.get("delimiter");
+ prefix = s->info.args.get("prefix");
+ string str = s->info.args.get("max-uploads");
+ op_ret = parse_value_and_bound(str, max_uploads, 0,
+ g_conf().get_val<uint64_t>("rgw_max_listing_results"),
+ default_max);
+ if (op_ret < 0) {
+ return op_ret;
+ }
+
+ string key_marker = s->info.args.get("key-marker");
+ string upload_id_marker = s->info.args.get("upload-id-marker");
+ if (!key_marker.empty())
+ marker.init(key_marker, upload_id_marker);
+
+ return 0;
+}
+
+int RGWDeleteMultiObj_ObjStore::get_params()
+{
+
+ if (s->bucket_name.empty()) {
+ op_ret = -EINVAL;
+ return op_ret;
+ }
+
+ // everything is probably fine, set the bucket
+ bucket = s->bucket;
+
+ const auto max_size = s->cct->_conf->rgw_max_put_param_size;
+ std::tie(op_ret, data) = rgw_rest_read_all_input(s, max_size, false);
+ return op_ret;
+}
+
+
+void RGWRESTOp::send_response()
+{
+ if (!flusher.did_start()) {
+ set_req_state_err(s, http_ret);
+ dump_errno(s);
+ end_header(s, this);
+ }
+ flusher.flush();
+}
+
+int RGWRESTOp::verify_permission()
+{
+ return check_caps(s->user->caps);
+}
+
+RGWOp* RGWHandler_REST::get_op(RGWRados* store)
+{
+ RGWOp *op;
+ switch (s->op) {
+ case OP_GET:
+ op = op_get();
+ break;
+ case OP_PUT:
+ op = op_put();
+ break;
+ case OP_DELETE:
+ op = op_delete();
+ break;
+ case OP_HEAD:
+ op = op_head();
+ break;
+ case OP_POST:
+ op = op_post();
+ break;
+ case OP_COPY:
+ op = op_copy();
+ break;
+ case OP_OPTIONS:
+ op = op_options();
+ break;
+ default:
+ return NULL;
+ }
+
+ if (op) {
+ op->init(store, s, this);
+ }
+ return op;
+} /* get_op */
+
+void RGWHandler_REST::put_op(RGWOp* op)
+{
+ delete op;
+} /* put_op */
+
+int RGWHandler_REST::allocate_formatter(struct req_state *s,
+ int default_type,
+ bool configurable)
+{
+ s->format = -1; // set to invalid value to allocation happens anyway
+ auto type = default_type;
+ if (configurable) {
+ string format_str = s->info.args.get("format");
+ if (format_str.compare("xml") == 0) {
+ type = RGW_FORMAT_XML;
+ } else if (format_str.compare("json") == 0) {
+ type = RGW_FORMAT_JSON;
+ } else if (format_str.compare("html") == 0) {
+ type = RGW_FORMAT_HTML;
+ } else {
+ const char *accept = s->info.env->get("HTTP_ACCEPT");
+ if (accept) {
+ char format_buf[64];
+ unsigned int i = 0;
+ for (; i < sizeof(format_buf) - 1 && accept[i] && accept[i] != ';'; ++i) {
+ format_buf[i] = accept[i];
+ }
+ format_buf[i] = 0;
+ if ((strcmp(format_buf, "text/xml") == 0) || (strcmp(format_buf, "application/xml") == 0)) {
+ type = RGW_FORMAT_XML;
+ } else if (strcmp(format_buf, "application/json") == 0) {
+ type = RGW_FORMAT_JSON;
+ } else if (strcmp(format_buf, "text/html") == 0) {
+ type = RGW_FORMAT_HTML;
+ }
+ }
+ }
+ }
+ return RGWHandler_REST::reallocate_formatter(s, type);
+}
+
+int RGWHandler_REST::reallocate_formatter(struct req_state *s, int type)
+{
+ if (s->format == type) {
+ // do nothing, just reset
+ ceph_assert(s->formatter);
+ s->formatter->reset();
+ return 0;
+ }
+
+ delete s->formatter;
+ s->formatter = nullptr;
+ s->format = type;
+
+ const string& mm = s->info.args.get("multipart-manifest");
+ const bool multipart_delete = (mm.compare("delete") == 0);
+ const bool swift_bulkupload = s->prot_flags & RGW_REST_SWIFT &&
+ s->info.args.exists("extract-archive");
+ switch (s->format) {
+ case RGW_FORMAT_PLAIN:
+ {
+ const bool use_kv_syntax = s->info.args.exists("bulk-delete") ||
+ multipart_delete || swift_bulkupload;
+ s->formatter = new RGWFormatter_Plain(use_kv_syntax);
+ break;
+ }
+ case RGW_FORMAT_XML:
+ {
+ const bool lowercase_underscore = s->info.args.exists("bulk-delete") ||
+ multipart_delete || swift_bulkupload;
+
+ s->formatter = new XMLFormatter(false, lowercase_underscore);
+ break;
+ }
+ case RGW_FORMAT_JSON:
+ s->formatter = new JSONFormatter(false);
+ break;
+ case RGW_FORMAT_HTML:
+ s->formatter = new HTMLFormatter(s->prot_flags & RGW_REST_WEBSITE);
+ break;
+ default:
+ return -EINVAL;
+
+ };
+ //s->formatter->reset(); // All formatters should reset on create already
+
+ return 0;
+}
+
+// This function enforces Amazon's spec for bucket names.
+// (The requirements, not the recommendations.)
+int RGWHandler_REST::validate_bucket_name(const string& bucket)
+{
+ int len = bucket.size();
+ if (len < 3) {
+ if (len == 0) {
+ // This request doesn't specify a bucket at all
+ return 0;
+ }
+ // Name too short
+ return -ERR_INVALID_BUCKET_NAME;
+ }
+ else if (len > MAX_BUCKET_NAME_LEN) {
+ // Name too long
+ return -ERR_INVALID_BUCKET_NAME;
+ }
+
+ const char *s = bucket.c_str();
+ for (int i = 0; i < len; ++i, ++s) {
+ if (*(unsigned char *)s == 0xff)
+ return -ERR_INVALID_BUCKET_NAME;
+ if (*(unsigned char *)s == '/')
+ return -ERR_INVALID_BUCKET_NAME;
+ }
+
+ return 0;
+}
+
+// "The name for a key is a sequence of Unicode characters whose UTF-8 encoding
+// is at most 1024 bytes long."
+// However, we can still have control characters and other nasties in there.
+// Just as long as they're utf-8 nasties.
+int RGWHandler_REST::validate_object_name(const string& object)
+{
+ int len = object.size();
+ if (len > MAX_OBJ_NAME_LEN) {
+ // Name too long
+ return -ERR_INVALID_OBJECT_NAME;
+ }
+
+ if (check_utf8(object.c_str(), len)) {
+ // Object names must be valid UTF-8.
+ return -ERR_INVALID_OBJECT_NAME;
+ }
+ return 0;
+}
+
+static http_op op_from_method(const char *method)
+{
+ if (!method)
+ return OP_UNKNOWN;
+ if (strcmp(method, "GET") == 0)
+ return OP_GET;
+ if (strcmp(method, "PUT") == 0)
+ return OP_PUT;
+ if (strcmp(method, "DELETE") == 0)
+ return OP_DELETE;
+ if (strcmp(method, "HEAD") == 0)
+ return OP_HEAD;
+ if (strcmp(method, "POST") == 0)
+ return OP_POST;
+ if (strcmp(method, "COPY") == 0)
+ return OP_COPY;
+ if (strcmp(method, "OPTIONS") == 0)
+ return OP_OPTIONS;
+
+ return OP_UNKNOWN;
+}
+
+int RGWHandler_REST::init_permissions(RGWOp* op)
+{
+ if (op->get_type() == RGW_OP_CREATE_BUCKET) {
+ // We don't need user policies in case of STS token returned by AssumeRole, hence the check for user type
+ if (! s->user->user_id.empty() && s->auth.identity->get_identity_type() != TYPE_ROLE) {
+ try {
+ map<string, bufferlist> uattrs;
+ if (auto ret = rgw_get_user_attrs_by_uid(store, s->user->user_id, uattrs); ! ret) {
+ if (s->iam_user_policies.empty()) {
+ s->iam_user_policies = get_iam_user_policy_from_attr(s->cct, store, uattrs, s->user->user_id.tenant);
+ } else {
+ // This scenario can happen when a STS token has a policy, then we need to append other user policies
+ // to the existing ones. (e.g. token returned by GetSessionToken)
+ auto user_policies = get_iam_user_policy_from_attr(s->cct, store, uattrs, s->user->user_id.tenant);
+ s->iam_user_policies.insert(s->iam_user_policies.end(), user_policies.begin(), user_policies.end());
+ }
+ }
+ } catch (const std::exception& e) {
+ lderr(s->cct) << "Error reading IAM User Policy: " << e.what() << dendl;
+ }
+ }
+ rgw_build_iam_environment(store, s);
+ return 0;
+ }
+
+ return do_init_permissions();
+}
+
+int RGWHandler_REST::read_permissions(RGWOp* op_obj)
+{
+ bool only_bucket = false;
+
+ switch (s->op) {
+ case OP_HEAD:
+ case OP_GET:
+ only_bucket = false;
+ break;
+ case OP_PUT:
+ case OP_POST:
+ case OP_COPY:
+ /* is it a 'multi-object delete' request? */
+ if (s->info.args.exists("delete")) {
+ only_bucket = true;
+ break;
+ }
+ if (is_obj_update_op()) {
+ only_bucket = false;
+ break;
+ }
+ /* is it a 'create bucket' request? */
+ if (op_obj->get_type() == RGW_OP_CREATE_BUCKET)
+ return 0;
+ only_bucket = true;
+ break;
+ case OP_DELETE:
+ if (!s->info.args.exists("tagging")){
+ only_bucket = true;
+ }
+ break;
+ case OP_OPTIONS:
+ only_bucket = true;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return do_read_permissions(op_obj, only_bucket);
+}
+
+void RGWRESTMgr::register_resource(string resource, RGWRESTMgr *mgr)
+{
+ string r = "/";
+ r.append(resource);
+
+ /* do we have a resource manager registered for this entry point? */
+ map<string, RGWRESTMgr *>::iterator iter = resource_mgrs.find(r);
+ if (iter != resource_mgrs.end()) {
+ delete iter->second;
+ }
+ resource_mgrs[r] = mgr;
+ resources_by_size.insert(pair<size_t, string>(r.size(), r));
+
+ /* now build default resource managers for the path (instead of nested entry points)
+ * e.g., if the entry point is /auth/v1.0/ then we'd want to create a default
+ * manager for /auth/
+ */
+
+ size_t pos = r.find('/', 1);
+
+ while (pos != r.size() - 1 && pos != string::npos) {
+ string s = r.substr(0, pos);
+
+ iter = resource_mgrs.find(s);
+ if (iter == resource_mgrs.end()) { /* only register it if one does not exist */
+ resource_mgrs[s] = new RGWRESTMgr; /* a default do-nothing manager */
+ resources_by_size.insert(pair<size_t, string>(s.size(), s));
+ }
+
+ pos = r.find('/', pos + 1);
+ }
+}
+
+void RGWRESTMgr::register_default_mgr(RGWRESTMgr *mgr)
+{
+ delete default_mgr;
+ default_mgr = mgr;
+}
+
+RGWRESTMgr* RGWRESTMgr::get_resource_mgr(struct req_state* const s,
+ const std::string& uri,
+ std::string* const out_uri)
+{
+ *out_uri = uri;
+
+ multimap<size_t, string>::reverse_iterator iter;
+
+ for (iter = resources_by_size.rbegin(); iter != resources_by_size.rend(); ++iter) {
+ string& resource = iter->second;
+ if (uri.compare(0, iter->first, resource) == 0 &&
+ (uri.size() == iter->first ||
+ uri[iter->first] == '/')) {
+ std::string suffix = uri.substr(iter->first);
+ return resource_mgrs[resource]->get_resource_mgr(s, suffix, out_uri);
+ }
+ }
+
+ if (default_mgr) {
+ return default_mgr->get_resource_mgr_as_default(s, uri, out_uri);
+ }
+
+ return this;
+}
+
+void RGWREST::register_x_headers(const string& s_headers)
+{
+ std::vector<std::string> hdrs = get_str_vec(s_headers);
+ for (auto& hdr : hdrs) {
+ boost::algorithm::to_upper(hdr); // XXX
+ (void) x_headers.insert(hdr);
+ }
+}
+
+RGWRESTMgr::~RGWRESTMgr()
+{
+ map<string, RGWRESTMgr *>::iterator iter;
+ for (iter = resource_mgrs.begin(); iter != resource_mgrs.end(); ++iter) {
+ delete iter->second;
+ }
+ delete default_mgr;
+}
+
+int64_t parse_content_length(const char *content_length)
+{
+ int64_t len = -1;
+
+ if (*content_length == '\0') {
+ len = 0;
+ } else {
+ string err;
+ len = strict_strtoll(content_length, 10, &err);
+ if (!err.empty()) {
+ len = -1;
+ }
+ }
+
+ return len;
+}
+
+int RGWREST::preprocess(struct req_state *s, rgw::io::BasicClient* cio)
+{
+ req_info& info = s->info;
+
+ /* save the request uri used to hash on the client side. request_uri may suffer
+ modifications as part of the bucket encoding in the subdomain calling format.
+ request_uri_aws4 will be used under aws4 auth */
+ s->info.request_uri_aws4 = s->info.request_uri;
+
+ s->cio = cio;
+
+ // We need to know if this RGW instance is running the s3website API with a
+ // higher priority than regular S3 API, or possibly in place of the regular
+ // S3 API.
+ // Map the listing of rgw_enable_apis in REVERSE order, so that items near
+ // the front of the list have a higher number assigned (and -1 for items not in the list).
+ list<string> apis;
+ get_str_list(g_conf()->rgw_enable_apis, apis);
+ int api_priority_s3 = -1;
+ int api_priority_s3website = -1;
+ auto api_s3website_priority_rawpos = std::find(apis.begin(), apis.end(), "s3website");
+ auto api_s3_priority_rawpos = std::find(apis.begin(), apis.end(), "s3");
+ if (api_s3_priority_rawpos != apis.end()) {
+ api_priority_s3 = apis.size() - std::distance(apis.begin(), api_s3_priority_rawpos);
+ }
+ if (api_s3website_priority_rawpos != apis.end()) {
+ api_priority_s3website = apis.size() - std::distance(apis.begin(), api_s3website_priority_rawpos);
+ }
+ ldout(s->cct, 10) << "rgw api priority: s3=" << api_priority_s3 << " s3website=" << api_priority_s3website << dendl;
+ bool s3website_enabled = api_priority_s3website >= 0;
+
+ if (info.host.size()) {
+ ssize_t pos;
+ if (info.host.find('[') == 0) {
+ pos = info.host.find(']');
+ if (pos >=1) {
+ info.host = info.host.substr(1, pos-1);
+ }
+ } else {
+ pos = info.host.find(':');
+ if (pos >= 0) {
+ info.host = info.host.substr(0, pos);
+ }
+ }
+ ldout(s->cct, 10) << "host=" << info.host << dendl;
+ string domain;
+ string subdomain;
+ bool in_hosted_domain_s3website = false;
+ bool in_hosted_domain = rgw_find_host_in_domains(info.host, &domain, &subdomain, hostnames_set);
+
+ string s3website_domain;
+ string s3website_subdomain;
+
+ if (s3website_enabled) {
+ in_hosted_domain_s3website = rgw_find_host_in_domains(info.host, &s3website_domain, &s3website_subdomain, hostnames_s3website_set);
+ if (in_hosted_domain_s3website) {
+ in_hosted_domain = true; // TODO: should hostnames be a strict superset of hostnames_s3website?
+ domain = s3website_domain;
+ subdomain = s3website_subdomain;
+ }
+ }
+
+ ldout(s->cct, 20)
+ << "subdomain=" << subdomain
+ << " domain=" << domain
+ << " in_hosted_domain=" << in_hosted_domain
+ << " in_hosted_domain_s3website=" << in_hosted_domain_s3website
+ << dendl;
+
+ if (g_conf()->rgw_resolve_cname
+ && !in_hosted_domain
+ && !in_hosted_domain_s3website) {
+ string cname;
+ bool found;
+ int r = rgw_resolver->resolve_cname(info.host, cname, &found);
+ if (r < 0) {
+ ldout(s->cct, 0)
+ << "WARNING: rgw_resolver->resolve_cname() returned r=" << r
+ << dendl;
+ }
+
+ if (found) {
+ ldout(s->cct, 5) << "resolved host cname " << info.host << " -> "
+ << cname << dendl;
+ in_hosted_domain =
+ rgw_find_host_in_domains(cname, &domain, &subdomain, hostnames_set);
+
+ if (s3website_enabled
+ && !in_hosted_domain_s3website) {
+ in_hosted_domain_s3website =
+ rgw_find_host_in_domains(cname, &s3website_domain,
+ &s3website_subdomain,
+ hostnames_s3website_set);
+ if (in_hosted_domain_s3website) {
+ in_hosted_domain = true; // TODO: should hostnames be a
+ // strict superset of hostnames_s3website?
+ domain = s3website_domain;
+ subdomain = s3website_subdomain;
+ }
+ }
+
+ ldout(s->cct, 20)
+ << "subdomain=" << subdomain
+ << " domain=" << domain
+ << " in_hosted_domain=" << in_hosted_domain
+ << " in_hosted_domain_s3website=" << in_hosted_domain_s3website
+ << dendl;
+ }
+ }
+
+ // Handle A/CNAME records that point to the RGW storage, but do match the
+ // CNAME test above, per issue http://tracker.ceph.com/issues/15975
+ // If BOTH domain & subdomain variables are empty, then none of the above
+ // cases matched anything, and we should fall back to using the Host header
+ // directly as the bucket name.
+ // As additional checks:
+ // - if the Host header is an IP, we're using path-style access without DNS
+ // - Also check that the Host header is a valid bucket name before using it.
+ // - Don't enable virtual hosting if no hostnames are configured
+ if (subdomain.empty()
+ && (domain.empty() || domain != info.host)
+ && !looks_like_ip_address(info.host.c_str())
+ && RGWHandler_REST::validate_bucket_name(info.host) == 0
+ && !(hostnames_set.empty() && hostnames_s3website_set.empty())) {
+ subdomain.append(info.host);
+ in_hosted_domain = 1;
+ }
+
+ if (s3website_enabled && api_priority_s3website > api_priority_s3) {
+ in_hosted_domain_s3website = 1;
+ }
+
+ if (in_hosted_domain_s3website) {
+ s->prot_flags |= RGW_REST_WEBSITE;
+ }
+
+
+ if (in_hosted_domain && !subdomain.empty()) {
+ string encoded_bucket = "/";
+ encoded_bucket.append(subdomain);
+ if (s->info.request_uri[0] != '/')
+ encoded_bucket.append("/");
+ encoded_bucket.append(s->info.request_uri);
+ s->info.request_uri = encoded_bucket;
+ }
+
+ if (!domain.empty()) {
+ s->info.domain = domain;
+ }
+
+ ldout(s->cct, 20)
+ << "final domain/bucket"
+ << " subdomain=" << subdomain
+ << " domain=" << domain
+ << " in_hosted_domain=" << in_hosted_domain
+ << " in_hosted_domain_s3website=" << in_hosted_domain_s3website
+ << " s->info.domain=" << s->info.domain
+ << " s->info.request_uri=" << s->info.request_uri
+ << dendl;
+ }
+
+ if (s->info.domain.empty()) {
+ s->info.domain = s->cct->_conf->rgw_dns_name;
+ }
+
+ s->decoded_uri = url_decode(s->info.request_uri);
+ /* Validate for being free of the '\0' buried in the middle of the string. */
+ if (std::strlen(s->decoded_uri.c_str()) != s->decoded_uri.length()) {
+ return -ERR_ZERO_IN_URL;
+ }
+
+ /* FastCGI specification, section 6.3
+ * http://www.fastcgi.com/devkit/doc/fcgi-spec.html#S6.3
+ * ===
+ * The Authorizer application receives HTTP request information from the Web
+ * server on the FCGI_PARAMS stream, in the same format as a Responder. The
+ * Web server does not send CONTENT_LENGTH, PATH_INFO, PATH_TRANSLATED, and
+ * SCRIPT_NAME headers.
+ * ===
+ * Ergo if we are in Authorizer role, we MUST look at HTTP_CONTENT_LENGTH
+ * instead of CONTENT_LENGTH for the Content-Length.
+ *
+ * There is one slight wrinkle in this, and that's older versions of
+ * nginx/lighttpd/apache setting BOTH headers. As a result, we have to check
+ * both headers and can't always simply pick A or B.
+ */
+ const char* content_length = info.env->get("CONTENT_LENGTH");
+ const char* http_content_length = info.env->get("HTTP_CONTENT_LENGTH");
+ if (!http_content_length != !content_length) {
+ /* Easy case: one or the other is missing */
+ s->length = (content_length ? content_length : http_content_length);
+ } else if (s->cct->_conf->rgw_content_length_compat &&
+ content_length && http_content_length) {
+ /* Hard case: Both are set, we have to disambiguate */
+ int64_t content_length_i, http_content_length_i;
+
+ content_length_i = parse_content_length(content_length);
+ http_content_length_i = parse_content_length(http_content_length);
+
+ // Now check them:
+ if (http_content_length_i < 0) {
+ // HTTP_CONTENT_LENGTH is invalid, ignore it
+ } else if (content_length_i < 0) {
+ // CONTENT_LENGTH is invalid, and HTTP_CONTENT_LENGTH is valid
+ // Swap entries
+ content_length = http_content_length;
+ } else {
+ // both CONTENT_LENGTH and HTTP_CONTENT_LENGTH are valid
+ // Let's pick the larger size
+ if (content_length_i < http_content_length_i) {
+ // prefer the larger value
+ content_length = http_content_length;
+ }
+ }
+ s->length = content_length;
+ // End of: else if (s->cct->_conf->rgw_content_length_compat &&
+ // content_length &&
+ // http_content_length)
+ } else {
+ /* no content length was defined */
+ s->length = NULL;
+ }
+
+ if (s->length) {
+ if (*s->length == '\0') {
+ s->content_length = 0;
+ } else {
+ string err;
+ s->content_length = strict_strtoll(s->length, 10, &err);
+ if (!err.empty()) {
+ ldout(s->cct, 10) << "bad content length, aborting" << dendl;
+ return -EINVAL;
+ }
+ }
+ }
+
+ if (s->content_length < 0) {
+ ldout(s->cct, 10) << "negative content length, aborting" << dendl;
+ return -EINVAL;
+ }
+
+ map<string, string>::iterator giter;
+ for (giter = generic_attrs_map.begin(); giter != generic_attrs_map.end();
+ ++giter) {
+ const char *env = info.env->get(giter->first.c_str());
+ if (env) {
+ s->generic_attrs[giter->second] = env;
+ }
+ }
+
+ if (g_conf()->rgw_print_continue) {
+ const char *expect = info.env->get("HTTP_EXPECT");
+ s->expect_cont = (expect && !strcasecmp(expect, "100-continue"));
+ }
+ s->op = op_from_method(info.method);
+
+ info.init_meta_info(&s->has_bad_meta);
+
+ return 0;
+}
+
+RGWHandler_REST* RGWREST::get_handler(
+ RGWRados * const store,
+ struct req_state* const s,
+ const rgw::auth::StrategyRegistry& auth_registry,
+ const std::string& frontend_prefix,
+ RGWRestfulIO* const rio,
+ RGWRESTMgr** const pmgr,
+ int* const init_error
+) {
+ *init_error = preprocess(s, rio);
+ if (*init_error < 0) {
+ return nullptr;
+ }
+
+ RGWRESTMgr *m = mgr.get_manager(s, frontend_prefix, s->decoded_uri,
+ &s->relative_uri);
+ if (! m) {
+ *init_error = -ERR_METHOD_NOT_ALLOWED;
+ return nullptr;
+ }
+
+ if (pmgr) {
+ *pmgr = m;
+ }
+
+ RGWHandler_REST* handler = m->get_handler(s, auth_registry, frontend_prefix);
+ if (! handler) {
+ *init_error = -ERR_METHOD_NOT_ALLOWED;
+ return NULL;
+ }
+ *init_error = handler->init(store, s, rio);
+ if (*init_error < 0) {
+ m->put_handler(handler);
+ return nullptr;
+ }
+
+ return handler;
+} /* get stream handler */
diff --git a/src/rgw/rgw_rest.h b/src/rgw/rgw_rest.h
new file mode 100644
index 00000000..f755af31
--- /dev/null
+++ b/src/rgw/rgw_rest.h
@@ -0,0 +1,816 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_REST_H
+#define CEPH_RGW_REST_H
+
+#define TIME_BUF_SIZE 128
+
+#include <boost/utility/string_ref.hpp>
+#include <boost/container/flat_set.hpp>
+#include "common/sstring.hh"
+#include "common/ceph_json.h"
+#include "include/ceph_assert.h" /* needed because of common/ceph_json.h */
+#include "rgw_op.h"
+#include "rgw_formats.h"
+#include "rgw_client_io.h"
+
+extern std::map<std::string, std::string> rgw_to_http_attrs;
+
+extern void rgw_rest_init(CephContext *cct, RGWRados *store, const RGWZoneGroup& zone_group);
+
+extern void rgw_flush_formatter_and_reset(struct req_state *s,
+ ceph::Formatter *formatter);
+
+extern void rgw_flush_formatter(struct req_state *s,
+ ceph::Formatter *formatter);
+
+std::tuple<int, bufferlist > rgw_rest_read_all_input(struct req_state *s,
+ const uint64_t max_len,
+ const bool allow_chunked=true);
+
+static inline boost::string_ref rgw_sanitized_hdrval(ceph::buffer::list& raw)
+{
+ /* std::string and thus boost::string_ref ARE OBLIGED to carry multiple
+ * 0x00 and count them to the length of a string. We need to take that
+ * into consideration and sanitize the size of a ceph::buffer::list used
+ * to store metadata values (x-amz-meta-*, X-Container-Meta-*, etags).
+ * Otherwise we might send 0x00 to clients. */
+ const char* const data = raw.c_str();
+ size_t len = raw.length();
+
+ if (len && data[len - 1] == '\0') {
+ /* That's the case - the null byte has been included at the last position
+ * of the bufferlist. We need to restore the proper string length we'll
+ * pass to string_ref. */
+ len--;
+ }
+
+ return boost::string_ref(data, len);
+}
+
+template <class T>
+int rgw_rest_get_json_input(CephContext *cct, req_state *s, T& out,
+ uint64_t max_len, bool *empty)
+{
+ if (empty)
+ *empty = false;
+
+ int rv = 0;
+ bufferlist data;
+ std::tie(rv, data) = rgw_rest_read_all_input(s, max_len);
+ if (rv < 0) {
+ return rv;
+ }
+
+ if (!data.length()) {
+ if (empty) {
+ *empty = true;
+ }
+
+ return -EINVAL;
+ }
+
+ JSONParser parser;
+
+ if (!parser.parse(data.c_str(), data.length())) {
+ return -EINVAL;
+ }
+
+ try {
+ decode_json_obj(out, &parser);
+ } catch (JSONDecoder::err& e) {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+template <class T>
+std::tuple<int, bufferlist > rgw_rest_get_json_input_keep_data(CephContext *cct, req_state *s, T& out, uint64_t max_len)
+{
+ int rv = 0;
+ bufferlist data;
+ std::tie(rv, data) = rgw_rest_read_all_input(s, max_len);
+ if (rv < 0) {
+ return std::make_tuple(rv, std::move(data));
+ }
+
+ if (!data.length()) {
+ return std::make_tuple(-EINVAL, std::move(data));
+ }
+
+ JSONParser parser;
+
+ if (!parser.parse(data.c_str(), data.length())) {
+ return std::make_tuple(-EINVAL, std::move(data));
+ }
+
+ try {
+ decode_json_obj(out, &parser);
+ } catch (JSONDecoder::err& e) {
+ return std::make_tuple(-EINVAL, std::move(data));
+ }
+
+ return std::make_tuple(0, std::move(data));
+}
+
+class RESTArgs {
+public:
+ static int get_string(struct req_state *s, const string& name,
+ const string& def_val, string *val,
+ bool *existed = NULL);
+ static int get_uint64(struct req_state *s, const string& name,
+ uint64_t def_val, uint64_t *val, bool *existed = NULL);
+ static int get_int64(struct req_state *s, const string& name,
+ int64_t def_val, int64_t *val, bool *existed = NULL);
+ static int get_uint32(struct req_state *s, const string& name,
+ uint32_t def_val, uint32_t *val, bool *existed = NULL);
+ static int get_int32(struct req_state *s, const string& name,
+ int32_t def_val, int32_t *val, bool *existed = NULL);
+ static int get_time(struct req_state *s, const string& name,
+ const utime_t& def_val, utime_t *val,
+ bool *existed = NULL);
+ static int get_epoch(struct req_state *s, const string& name,
+ uint64_t def_val, uint64_t *epoch,
+ bool *existed = NULL);
+ static int get_bool(struct req_state *s, const string& name, bool def_val,
+ bool *val, bool *existed = NULL);
+};
+
+class RGWRESTFlusher : public RGWFormatterFlusher {
+ struct req_state *s;
+ RGWOp *op;
+protected:
+ void do_flush() override;
+ void do_start(int ret) override;
+public:
+ RGWRESTFlusher(struct req_state *_s, RGWOp *_op) :
+ RGWFormatterFlusher(_s->formatter), s(_s), op(_op) {}
+ RGWRESTFlusher() : RGWFormatterFlusher(NULL), s(NULL), op(NULL) {}
+
+ void init(struct req_state *_s, RGWOp *_op) {
+ s = _s;
+ op = _op;
+ set_formatter(s->formatter);
+ }
+};
+
+class RGWGetObj_ObjStore : public RGWGetObj
+{
+protected:
+ bool sent_header;
+public:
+ RGWGetObj_ObjStore() : sent_header(false) {}
+
+ void init(RGWRados *store, struct req_state *s, RGWHandler *h) override {
+ RGWGetObj::init(store, s, h);
+ sent_header = false;
+ }
+
+ int get_params() override;
+};
+
+class RGWGetObjTags_ObjStore : public RGWGetObjTags {
+public:
+ RGWGetObjTags_ObjStore() {};
+ ~RGWGetObjTags_ObjStore() {};
+};
+
+class RGWPutObjTags_ObjStore: public RGWPutObjTags {
+public:
+ RGWPutObjTags_ObjStore() {};
+ ~RGWPutObjTags_ObjStore() {};
+};
+
+class RGWListBuckets_ObjStore : public RGWListBuckets {
+public:
+ RGWListBuckets_ObjStore() {}
+ ~RGWListBuckets_ObjStore() override {}
+};
+
+class RGWGetUsage_ObjStore : public RGWGetUsage {
+public:
+ RGWGetUsage_ObjStore() {}
+ ~RGWGetUsage_ObjStore() override {}
+};
+
+class RGWListBucket_ObjStore : public RGWListBucket {
+public:
+ RGWListBucket_ObjStore() {}
+ ~RGWListBucket_ObjStore() override {}
+};
+
+class RGWStatAccount_ObjStore : public RGWStatAccount {
+public:
+ RGWStatAccount_ObjStore() {}
+ ~RGWStatAccount_ObjStore() override {}
+};
+
+class RGWStatBucket_ObjStore : public RGWStatBucket {
+public:
+ RGWStatBucket_ObjStore() {}
+ ~RGWStatBucket_ObjStore() override {}
+};
+
+class RGWCreateBucket_ObjStore : public RGWCreateBucket {
+public:
+ RGWCreateBucket_ObjStore() {}
+ ~RGWCreateBucket_ObjStore() override {}
+};
+
+class RGWDeleteBucket_ObjStore : public RGWDeleteBucket {
+public:
+ RGWDeleteBucket_ObjStore() {}
+ ~RGWDeleteBucket_ObjStore() override {}
+};
+
+class RGWPutObj_ObjStore : public RGWPutObj
+{
+public:
+ RGWPutObj_ObjStore() {}
+ ~RGWPutObj_ObjStore() override {}
+
+ int verify_params() override;
+ int get_params() override;
+ int get_data(bufferlist& bl) override;
+};
+
+class RGWPostObj_ObjStore : public RGWPostObj
+{
+ std::string boundary;
+
+public:
+ struct post_part_field {
+ std::string val;
+ std::map<std::string, std::string> params;
+ };
+
+ struct post_form_part {
+ std::string name;
+ std::map<std::string, post_part_field, ltstr_nocase> fields;
+ ceph::bufferlist data;
+ };
+
+protected:
+ using parts_collection_t = \
+ std::map<std::string, post_form_part, const ltstr_nocase>;
+
+ std::string err_msg;
+ ceph::bufferlist in_data;
+
+ int read_with_boundary(ceph::bufferlist& bl,
+ uint64_t max,
+ bool check_eol,
+ bool& reached_boundary,
+ bool& done);
+
+ int read_line(ceph::bufferlist& bl,
+ uint64_t max,
+ bool& reached_boundary,
+ bool& done);
+
+ int read_data(ceph::bufferlist& bl,
+ uint64_t max,
+ bool& reached_boundary,
+ bool& done);
+
+ int read_form_part_header(struct post_form_part *part, bool& done);
+
+ int get_params() override;
+
+ static int parse_part_field(const std::string& line,
+ std::string& field_name, /* out */
+ post_part_field& field); /* out */
+
+ static void parse_boundary_params(const std::string& params_str,
+ std::string& first,
+ std::map<std::string, std::string>& params);
+
+ static bool part_str(parts_collection_t& parts,
+ const std::string& name,
+ std::string *val);
+
+ static std::string get_part_str(parts_collection_t& parts,
+ const std::string& name,
+ const std::string& def_val = std::string());
+
+ static bool part_bl(parts_collection_t& parts,
+ const std::string& name,
+ ceph::bufferlist *pbl);
+
+public:
+ RGWPostObj_ObjStore() {}
+ ~RGWPostObj_ObjStore() override {}
+
+ int verify_params() override;
+};
+
+
+class RGWPutMetadataAccount_ObjStore : public RGWPutMetadataAccount
+{
+public:
+ RGWPutMetadataAccount_ObjStore() {}
+ ~RGWPutMetadataAccount_ObjStore() override {}
+};
+
+class RGWPutMetadataBucket_ObjStore : public RGWPutMetadataBucket
+{
+public:
+ RGWPutMetadataBucket_ObjStore() {}
+ ~RGWPutMetadataBucket_ObjStore() override {}
+};
+
+class RGWPutMetadataObject_ObjStore : public RGWPutMetadataObject
+{
+public:
+ RGWPutMetadataObject_ObjStore() {}
+ ~RGWPutMetadataObject_ObjStore() override {}
+};
+
+class RGWDeleteObj_ObjStore : public RGWDeleteObj {
+public:
+ RGWDeleteObj_ObjStore() {}
+ ~RGWDeleteObj_ObjStore() override {}
+};
+
+class RGWGetCrossDomainPolicy_ObjStore : public RGWGetCrossDomainPolicy {
+public:
+ RGWGetCrossDomainPolicy_ObjStore() = default;
+ ~RGWGetCrossDomainPolicy_ObjStore() override = default;
+};
+
+class RGWGetHealthCheck_ObjStore : public RGWGetHealthCheck {
+public:
+ RGWGetHealthCheck_ObjStore() = default;
+ ~RGWGetHealthCheck_ObjStore() override = default;
+};
+
+class RGWCopyObj_ObjStore : public RGWCopyObj {
+public:
+ RGWCopyObj_ObjStore() {}
+ ~RGWCopyObj_ObjStore() override {}
+};
+
+class RGWGetACLs_ObjStore : public RGWGetACLs {
+public:
+ RGWGetACLs_ObjStore() {}
+ ~RGWGetACLs_ObjStore() override {}
+};
+
+class RGWPutACLs_ObjStore : public RGWPutACLs {
+public:
+ RGWPutACLs_ObjStore() {}
+ ~RGWPutACLs_ObjStore() override {}
+
+ int get_params() override;
+};
+
+class RGWGetLC_ObjStore : public RGWGetLC {
+public:
+ RGWGetLC_ObjStore() {}
+ ~RGWGetLC_ObjStore() override {}
+};
+
+class RGWPutLC_ObjStore : public RGWPutLC {
+public:
+ RGWPutLC_ObjStore() {}
+ ~RGWPutLC_ObjStore() override {}
+
+ int get_params() override;
+};
+
+class RGWDeleteLC_ObjStore : public RGWDeleteLC {
+public:
+ RGWDeleteLC_ObjStore() {}
+ ~RGWDeleteLC_ObjStore() override {}
+
+};
+
+class RGWGetCORS_ObjStore : public RGWGetCORS {
+public:
+ RGWGetCORS_ObjStore() {}
+ ~RGWGetCORS_ObjStore() override {}
+};
+
+class RGWPutCORS_ObjStore : public RGWPutCORS {
+public:
+ RGWPutCORS_ObjStore() {}
+ ~RGWPutCORS_ObjStore() override {}
+};
+
+class RGWDeleteCORS_ObjStore : public RGWDeleteCORS {
+public:
+ RGWDeleteCORS_ObjStore() {}
+ ~RGWDeleteCORS_ObjStore() override {}
+};
+
+class RGWOptionsCORS_ObjStore : public RGWOptionsCORS {
+public:
+ RGWOptionsCORS_ObjStore() {}
+ ~RGWOptionsCORS_ObjStore() override {}
+};
+
+class RGWInitMultipart_ObjStore : public RGWInitMultipart {
+public:
+ RGWInitMultipart_ObjStore() {}
+ ~RGWInitMultipart_ObjStore() override {}
+};
+
+class RGWCompleteMultipart_ObjStore : public RGWCompleteMultipart {
+public:
+ RGWCompleteMultipart_ObjStore() {}
+ ~RGWCompleteMultipart_ObjStore() override {}
+
+ int get_params() override;
+};
+
+class RGWAbortMultipart_ObjStore : public RGWAbortMultipart {
+public:
+ RGWAbortMultipart_ObjStore() {}
+ ~RGWAbortMultipart_ObjStore() override {}
+};
+
+class RGWListMultipart_ObjStore : public RGWListMultipart {
+public:
+ RGWListMultipart_ObjStore() {}
+ ~RGWListMultipart_ObjStore() override {}
+
+ int get_params() override;
+};
+
+class RGWListBucketMultiparts_ObjStore : public RGWListBucketMultiparts {
+public:
+ RGWListBucketMultiparts_ObjStore() {}
+ ~RGWListBucketMultiparts_ObjStore() override {}
+
+ int get_params() override;
+};
+
+class RGWBulkDelete_ObjStore : public RGWBulkDelete {
+public:
+ RGWBulkDelete_ObjStore() {}
+ ~RGWBulkDelete_ObjStore() override {}
+};
+
+class RGWBulkUploadOp_ObjStore : public RGWBulkUploadOp {
+public:
+ RGWBulkUploadOp_ObjStore() = default;
+ ~RGWBulkUploadOp_ObjStore() = default;
+};
+
+class RGWDeleteMultiObj_ObjStore : public RGWDeleteMultiObj {
+public:
+ RGWDeleteMultiObj_ObjStore() {}
+ ~RGWDeleteMultiObj_ObjStore() override {}
+
+ int get_params() override;
+};
+
+class RGWInfo_ObjStore : public RGWInfo {
+public:
+ RGWInfo_ObjStore() = default;
+ ~RGWInfo_ObjStore() override = default;
+};
+
+class RGWPutBucketObjectLock_ObjStore : public RGWPutBucketObjectLock {
+public:
+ RGWPutBucketObjectLock_ObjStore() = default;
+ ~RGWPutBucketObjectLock_ObjStore() = default;
+ int get_params() override;
+};
+
+class RGWGetBucketObjectLock_ObjStore : public RGWGetBucketObjectLock {
+public:
+ RGWGetBucketObjectLock_ObjStore() = default;
+ ~RGWGetBucketObjectLock_ObjStore() override = default;
+};
+
+class RGWPutObjRetention_ObjStore : public RGWPutObjRetention {
+public:
+ RGWPutObjRetention_ObjStore() = default;
+ ~RGWPutObjRetention_ObjStore() override = default;
+};
+
+class RGWGetObjRetention_ObjStore : public RGWGetObjRetention {
+public:
+ RGWGetObjRetention_ObjStore() = default;
+ ~RGWGetObjRetention_ObjStore() = default;
+};
+
+class RGWPutObjLegalHold_ObjStore : public RGWPutObjLegalHold {
+public:
+ RGWPutObjLegalHold_ObjStore() = default;
+ ~RGWPutObjLegalHold_ObjStore() override = default;
+ int get_params() override;
+};
+
+class RGWGetObjLegalHold_ObjStore : public RGWGetObjLegalHold {
+public:
+ RGWGetObjLegalHold_ObjStore() = default;
+ ~RGWGetObjLegalHold_ObjStore() = default;
+};
+
+class RGWRESTOp : public RGWOp {
+protected:
+ int http_ret;
+ RGWRESTFlusher flusher;
+public:
+ RGWRESTOp() : http_ret(0) {}
+ void init(RGWRados *store, struct req_state *s,
+ RGWHandler *dialect_handler) override {
+ RGWOp::init(store, s, dialect_handler);
+ flusher.init(s, this);
+ }
+ void send_response() override;
+ virtual int check_caps(RGWUserCaps& caps)
+ { return -EPERM; } /* should to be implemented! */
+ int verify_permission() override;
+ dmc::client_id dmclock_client() override { return dmc::client_id::admin; }
+};
+
+class RGWHandler_REST : public RGWHandler {
+protected:
+
+ virtual bool is_obj_update_op() { return false; }
+ virtual RGWOp *op_get() { return NULL; }
+ virtual RGWOp *op_put() { return NULL; }
+ virtual RGWOp *op_delete() { return NULL; }
+ virtual RGWOp *op_head() { return NULL; }
+ virtual RGWOp *op_post() { return NULL; }
+ virtual RGWOp *op_copy() { return NULL; }
+ virtual RGWOp *op_options() { return NULL; }
+
+public:
+ static int allocate_formatter(struct req_state *s, int default_formatter,
+ bool configurable);
+
+ static constexpr int MAX_BUCKET_NAME_LEN = 255;
+ static constexpr int MAX_OBJ_NAME_LEN = 1024;
+
+ RGWHandler_REST() {}
+ ~RGWHandler_REST() override {}
+
+ static int validate_bucket_name(const string& bucket);
+ static int validate_object_name(const string& object);
+ static int reallocate_formatter(struct req_state *s, int type);
+
+ int init_permissions(RGWOp* op) override;
+ int read_permissions(RGWOp* op) override;
+
+ virtual RGWOp* get_op(RGWRados* store);
+ virtual void put_op(RGWOp* op);
+};
+
+class RGWHandler_REST_SWIFT;
+class RGWHandler_SWIFT_Auth;
+class RGWHandler_REST_S3;
+
+namespace rgw {
+namespace auth {
+
+class StrategyRegistry;
+
+}
+}
+
+class RGWRESTMgr {
+ bool should_log;
+
+protected:
+ std::map<std::string, RGWRESTMgr*> resource_mgrs;
+ std::multimap<size_t, std::string> resources_by_size;
+ RGWRESTMgr* default_mgr;
+
+ virtual RGWRESTMgr* get_resource_mgr(struct req_state* s,
+ const std::string& uri,
+ std::string* out_uri);
+
+ virtual RGWRESTMgr* get_resource_mgr_as_default(struct req_state* const s,
+ const std::string& uri,
+ std::string* our_uri) {
+ return this;
+ }
+
+public:
+ RGWRESTMgr()
+ : should_log(false),
+ default_mgr(nullptr) {
+ }
+ virtual ~RGWRESTMgr();
+
+ void register_resource(std::string resource, RGWRESTMgr* mgr);
+ void register_default_mgr(RGWRESTMgr* mgr);
+
+ virtual RGWRESTMgr* get_manager(struct req_state* const s,
+ /* Prefix to be concatenated with @uri
+ * during the lookup. */
+ const std::string& frontend_prefix,
+ const std::string& uri,
+ std::string* out_uri) final {
+ return get_resource_mgr(s, frontend_prefix + uri, out_uri);
+ }
+
+ virtual RGWHandler_REST* get_handler(
+ struct req_state* const s,
+ const rgw::auth::StrategyRegistry& auth_registry,
+ const std::string& frontend_prefix
+ ) {
+ return nullptr;
+ }
+
+ virtual void put_handler(RGWHandler_REST* const handler) {
+ delete handler;
+ }
+
+ void set_logging(bool _should_log) {
+ should_log = _should_log;
+ }
+
+ bool get_logging() const {
+ return should_log;
+ }
+};
+
+class RGWLibIO;
+class RGWRestfulIO;
+
+class RGWREST {
+ using x_header = basic_sstring<char, uint16_t, 32>;
+ boost::container::flat_set<x_header> x_headers;
+ RGWRESTMgr mgr;
+
+ static int preprocess(struct req_state *s, rgw::io::BasicClient* rio);
+public:
+ RGWREST() {}
+ RGWHandler_REST *get_handler(RGWRados *store,
+ struct req_state *s,
+ const rgw::auth::StrategyRegistry& auth_registry,
+ const std::string& frontend_prefix,
+ RGWRestfulIO *rio,
+ RGWRESTMgr **pmgr,
+ int *init_error);
+#if 0
+ RGWHandler *get_handler(RGWRados *store, struct req_state *s,
+ RGWLibIO *io, RGWRESTMgr **pmgr,
+ int *init_error);
+#endif
+
+ void put_handler(RGWHandler_REST *handler) {
+ mgr.put_handler(handler);
+ }
+
+ void register_resource(string resource, RGWRESTMgr *m,
+ bool register_empty = false) {
+ if (!register_empty && resource.empty())
+ return;
+
+ mgr.register_resource(resource, m);
+ }
+
+ void register_default_mgr(RGWRESTMgr *m) {
+ mgr.register_default_mgr(m);
+ }
+
+ void register_x_headers(const std::string& headers);
+
+ bool log_x_headers(void) {
+ return (x_headers.size() > 0);
+ }
+
+ bool log_x_header(const std::string& header) {
+ return (x_headers.find(header) != x_headers.end());
+ }
+};
+
+static constexpr int64_t NO_CONTENT_LENGTH = -1;
+static constexpr int64_t CHUNKED_TRANSFER_ENCODING = -2;
+
+extern void dump_errno(int http_ret, string& out);
+extern void dump_errno(const struct rgw_err &err, string& out);
+extern void dump_errno(struct req_state *s);
+extern void dump_errno(struct req_state *s, int http_ret);
+extern void end_header(struct req_state *s,
+ RGWOp* op = nullptr,
+ const char *content_type = nullptr,
+ const int64_t proposed_content_length =
+ NO_CONTENT_LENGTH,
+ bool force_content_type = false,
+ bool force_no_error = false);
+extern void dump_start(struct req_state *s);
+extern void list_all_buckets_start(struct req_state *s);
+extern void dump_owner(struct req_state *s, const rgw_user& id, string& name,
+ const char *section = NULL);
+extern void dump_header(struct req_state* s,
+ const boost::string_ref& name,
+ const boost::string_ref& val);
+extern void dump_header(struct req_state* s,
+ const boost::string_ref& name,
+ ceph::buffer::list& bl);
+extern void dump_header(struct req_state* s,
+ const boost::string_ref& name,
+ long long val);
+extern void dump_header(struct req_state* s,
+ const boost::string_ref& name,
+ const utime_t& val);
+
+template <class... Args>
+static inline void dump_header_prefixed(struct req_state* s,
+ const boost::string_ref& name_prefix,
+ const boost::string_ref& name,
+ Args&&... args) {
+ char full_name_buf[name_prefix.size() + name.size() + 1];
+ const auto len = snprintf(full_name_buf, sizeof(full_name_buf), "%.*s%.*s",
+ static_cast<int>(name_prefix.length()),
+ name_prefix.data(),
+ static_cast<int>(name.length()),
+ name.data());
+ boost::string_ref full_name(full_name_buf, len);
+ return dump_header(s, std::move(full_name), std::forward<Args>(args)...);
+}
+
+template <class... Args>
+static inline void dump_header_infixed(struct req_state* s,
+ const boost::string_ref& prefix,
+ const boost::string_ref& infix,
+ const boost::string_ref& sufix,
+ Args&&... args) {
+ char full_name_buf[prefix.size() + infix.size() + sufix.size() + 1];
+ const auto len = snprintf(full_name_buf, sizeof(full_name_buf), "%.*s%.*s%.*s",
+ static_cast<int>(prefix.length()),
+ prefix.data(),
+ static_cast<int>(infix.length()),
+ infix.data(),
+ static_cast<int>(sufix.length()),
+ sufix.data());
+ boost::string_ref full_name(full_name_buf, len);
+ return dump_header(s, std::move(full_name), std::forward<Args>(args)...);
+}
+
+template <class... Args>
+static inline void dump_header_quoted(struct req_state* s,
+ const boost::string_ref& name,
+ const boost::string_ref& val) {
+ /* We need two extra bytes for quotes. */
+ char qvalbuf[val.size() + 2 + 1];
+ const auto len = snprintf(qvalbuf, sizeof(qvalbuf), "\"%.*s\"",
+ static_cast<int>(val.length()), val.data());
+ return dump_header(s, name, boost::string_ref(qvalbuf, len));
+}
+
+template <class ValueT>
+static inline void dump_header_if_nonempty(struct req_state* s,
+ const boost::string_ref& name,
+ const ValueT& value) {
+ if (name.length() > 0 && value.length() > 0) {
+ return dump_header(s, name, value);
+ }
+}
+
+static inline std::string compute_domain_uri(const struct req_state *s) {
+ std::string uri = (!s->info.domain.empty()) ? s->info.domain :
+ [&s]() -> std::string {
+ RGWEnv const &env(*(s->info.env));
+ std::string uri =
+ env.get("SERVER_PORT_SECURE") ? "https://" : "http://";
+ if (env.exists("SERVER_NAME")) {
+ uri.append(env.get("SERVER_NAME", "<SERVER_NAME>"));
+ } else {
+ uri.append(env.get("HTTP_HOST", "<HTTP_HOST>"));
+ }
+ return uri;
+ }();
+ return uri;
+}
+
+extern void dump_content_length(struct req_state *s, uint64_t len);
+extern int64_t parse_content_length(const char *content_length);
+extern void dump_etag(struct req_state *s,
+ const boost::string_ref& etag,
+ bool quoted = false);
+extern void dump_epoch_header(struct req_state *s, const char *name, real_time t);
+extern void dump_time_header(struct req_state *s, const char *name, real_time t);
+extern void dump_last_modified(struct req_state *s, real_time t);
+extern void abort_early(struct req_state* s, RGWOp* op, int err,
+ RGWHandler* handler);
+extern void dump_range(struct req_state* s, uint64_t ofs, uint64_t end,
+ uint64_t total_size);
+extern void dump_continue(struct req_state *s);
+extern void list_all_buckets_end(struct req_state *s);
+extern void dump_time(struct req_state *s, const char *name, real_time *t);
+extern std::string dump_time_to_str(const real_time& t);
+extern void dump_bucket_from_state(struct req_state *s);
+extern void dump_redirect(struct req_state *s, const string& redirect);
+extern bool is_valid_url(const char *url);
+extern void dump_access_control(struct req_state *s, const char *origin,
+ const char *meth,
+ const char *hdr, const char *exp_hdr,
+ uint32_t max_age);
+extern void dump_access_control(req_state *s, RGWOp *op);
+
+extern int dump_body(struct req_state* s, const char* buf, size_t len);
+extern int dump_body(struct req_state* s, /* const */ ceph::buffer::list& bl);
+extern int dump_body(struct req_state* s, const std::string& str);
+extern int recv_body(struct req_state* s, char* buf, size_t max);
+
+#endif /* CEPH_RGW_REST_H */
diff --git a/src/rgw/rgw_rest_admin.h b/src/rgw/rgw_rest_admin.h
new file mode 100644
index 00000000..d23dd9d3
--- /dev/null
+++ b/src/rgw/rgw_rest_admin.h
@@ -0,0 +1,15 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_REST_ADMIN_H
+#define CEPH_RGW_REST_ADMIN_H
+
+
+class RGWRESTMgr_Admin : public RGWRESTMgr {
+public:
+ RGWRESTMgr_Admin() {}
+ ~RGWRESTMgr_Admin() override {}
+};
+
+
+#endif
diff --git a/src/rgw/rgw_rest_bucket.cc b/src/rgw/rgw_rest_bucket.cc
new file mode 100644
index 00000000..857d0c9d
--- /dev/null
+++ b/src/rgw/rgw_rest_bucket.cc
@@ -0,0 +1,350 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "rgw_op.h"
+#include "rgw_bucket.h"
+#include "rgw_rest_bucket.h"
+
+#include "include/str_list.h"
+
+#include "services/svc_sys_obj.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+class RGWOp_Bucket_Info : public RGWRESTOp {
+
+public:
+ RGWOp_Bucket_Info() {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("buckets", RGW_CAP_READ);
+ }
+
+ void execute() override;
+
+ const char* name() const override { return "get_bucket_info"; }
+};
+
+void RGWOp_Bucket_Info::execute()
+{
+ RGWBucketAdminOpState op_state;
+
+ bool fetch_stats;
+
+ std::string bucket;
+
+ string uid_str;
+
+ RESTArgs::get_string(s, "uid", uid_str, &uid_str);
+ rgw_user uid(uid_str);
+
+ RESTArgs::get_string(s, "bucket", bucket, &bucket);
+ RESTArgs::get_bool(s, "stats", false, &fetch_stats);
+
+ op_state.set_user_id(uid);
+ op_state.set_bucket_name(bucket);
+ op_state.set_fetch_stats(fetch_stats);
+
+ http_ret = RGWBucketAdminOp::info(store, op_state, flusher);
+}
+
+class RGWOp_Get_Policy : public RGWRESTOp {
+
+public:
+ RGWOp_Get_Policy() {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("buckets", RGW_CAP_READ);
+ }
+
+ void execute() override;
+
+ const char* name() const override { return "get_policy"; }
+};
+
+void RGWOp_Get_Policy::execute()
+{
+ RGWBucketAdminOpState op_state;
+
+ std::string bucket;
+ std::string object;
+
+ RESTArgs::get_string(s, "bucket", bucket, &bucket);
+ RESTArgs::get_string(s, "object", object, &object);
+
+ op_state.set_bucket_name(bucket);
+ op_state.set_object(object);
+
+ http_ret = RGWBucketAdminOp::get_policy(store, op_state, flusher);
+}
+
+class RGWOp_Check_Bucket_Index : public RGWRESTOp {
+
+public:
+ RGWOp_Check_Bucket_Index() {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("buckets", RGW_CAP_WRITE);
+ }
+
+ void execute() override;
+
+ const char* name() const override { return "check_bucket_index"; }
+};
+
+void RGWOp_Check_Bucket_Index::execute()
+{
+ std::string bucket;
+
+ bool fix_index;
+ bool check_objects;
+
+ RGWBucketAdminOpState op_state;
+
+ RESTArgs::get_string(s, "bucket", bucket, &bucket);
+ RESTArgs::get_bool(s, "fix", false, &fix_index);
+ RESTArgs::get_bool(s, "check-objects", false, &check_objects);
+
+ op_state.set_bucket_name(bucket);
+ op_state.set_fix_index(fix_index);
+ op_state.set_check_objects(check_objects);
+
+ http_ret = RGWBucketAdminOp::check_index(store, op_state, flusher);
+}
+
+class RGWOp_Bucket_Link : public RGWRESTOp {
+
+public:
+ RGWOp_Bucket_Link() {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("buckets", RGW_CAP_WRITE);
+ }
+
+ void execute() override;
+
+ const char* name() const override { return "link_bucket"; }
+};
+
+void RGWOp_Bucket_Link::execute()
+{
+ std::string uid_str;
+ std::string bucket;
+ std::string bucket_id;
+
+ RGWBucketAdminOpState op_state;
+
+ RESTArgs::get_string(s, "uid", uid_str, &uid_str);
+ RESTArgs::get_string(s, "bucket", bucket, &bucket);
+ RESTArgs::get_string(s, "bucket-id", bucket_id, &bucket_id);
+
+ rgw_user uid(uid_str);
+ op_state.set_user_id(uid);
+ op_state.set_bucket_name(bucket);
+ op_state.set_bucket_id(bucket_id);
+
+ http_ret = RGWBucketAdminOp::link(store, op_state);
+}
+
+class RGWOp_Bucket_Unlink : public RGWRESTOp {
+
+public:
+ RGWOp_Bucket_Unlink() {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("buckets", RGW_CAP_WRITE);
+ }
+
+ void execute() override;
+
+ const char* name() const override { return "unlink_bucket"; }
+};
+
+void RGWOp_Bucket_Unlink::execute()
+{
+ std::string uid_str;
+ std::string bucket;
+
+ RGWBucketAdminOpState op_state;
+
+ RESTArgs::get_string(s, "uid", uid_str, &uid_str);
+ rgw_user uid(uid_str);
+
+ RESTArgs::get_string(s, "bucket", bucket, &bucket);
+
+ op_state.set_user_id(uid);
+ op_state.set_bucket_name(bucket);
+
+ http_ret = RGWBucketAdminOp::unlink(store, op_state);
+}
+
+class RGWOp_Bucket_Remove : public RGWRESTOp {
+
+public:
+ RGWOp_Bucket_Remove() {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("buckets", RGW_CAP_WRITE);
+ }
+
+ void execute() override;
+
+ const char* name() const override { return "remove_bucket"; }
+};
+
+void RGWOp_Bucket_Remove::execute()
+{
+ std::string bucket;
+ bool delete_children;
+
+ RGWBucketAdminOpState op_state;
+
+ RESTArgs::get_string(s, "bucket", bucket, &bucket);
+ RESTArgs::get_bool(s, "purge-objects", false, &delete_children);
+
+ op_state.set_bucket_name(bucket);
+ op_state.set_delete_children(delete_children);
+
+ http_ret = RGWBucketAdminOp::remove_bucket(store, op_state);
+}
+
+class RGWOp_Set_Bucket_Quota : public RGWRESTOp {
+
+public:
+ RGWOp_Set_Bucket_Quota() {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("buckets", RGW_CAP_WRITE);
+ }
+
+ void execute() override;
+
+ const char* name() const override { return "set_bucket_quota"; }
+};
+
+#define QUOTA_INPUT_MAX_LEN 1024
+
+void RGWOp_Set_Bucket_Quota::execute()
+{
+ bool uid_arg_existed = false;
+ std::string uid_str;
+ RESTArgs::get_string(s, "uid", uid_str, &uid_str, &uid_arg_existed);
+ if (! uid_arg_existed) {
+ http_ret = -EINVAL;
+ return;
+ }
+ rgw_user uid(uid_str);
+ bool bucket_arg_existed = false;
+ std::string bucket;
+ RESTArgs::get_string(s, "bucket", bucket, &bucket, &bucket_arg_existed);
+ if (! bucket_arg_existed) {
+ http_ret = -EINVAL;
+ return;
+ }
+
+ bool use_http_params;
+
+ if (s->content_length > 0) {
+ use_http_params = false;
+ } else {
+ const char *encoding = s->info.env->get("HTTP_TRANSFER_ENCODING");
+ use_http_params = (!encoding || strcmp(encoding, "chunked") != 0);
+ }
+ RGWQuotaInfo quota;
+ if (!use_http_params) {
+ bool empty;
+ http_ret = rgw_rest_get_json_input(store->ctx(), s, quota, QUOTA_INPUT_MAX_LEN, &empty);
+ if (http_ret < 0) {
+ if (!empty)
+ return;
+ /* was probably chunked input, but no content provided, configure via http params */
+ use_http_params = true;
+ }
+ }
+ if (use_http_params) {
+ RGWBucketInfo bucket_info;
+ map<string, bufferlist> attrs;
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+ http_ret = store->get_bucket_info(obj_ctx, uid.tenant, bucket, bucket_info, NULL, &attrs);
+ if (http_ret < 0) {
+ return;
+ }
+ RGWQuotaInfo *old_quota = &bucket_info.quota;
+ int64_t old_max_size_kb = rgw_rounded_kb(old_quota->max_size);
+ int64_t max_size_kb;
+ RESTArgs::get_int64(s, "max-objects", old_quota->max_objects, &quota.max_objects);
+ RESTArgs::get_int64(s, "max-size-kb", old_max_size_kb, &max_size_kb);
+ quota.max_size = max_size_kb * 1024;
+ RESTArgs::get_bool(s, "enabled", old_quota->enabled, &quota.enabled);
+ }
+
+ RGWBucketAdminOpState op_state;
+ op_state.set_user_id(uid);
+ op_state.set_bucket_name(bucket);
+ op_state.set_quota(quota);
+
+ http_ret = RGWBucketAdminOp::set_quota(store, op_state);
+}
+
+class RGWOp_Object_Remove: public RGWRESTOp {
+
+public:
+ RGWOp_Object_Remove() {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("buckets", RGW_CAP_WRITE);
+ }
+
+ void execute() override;
+
+ const char* name() const override { return "remove_object"; }
+};
+
+void RGWOp_Object_Remove::execute()
+{
+ std::string bucket;
+ std::string object;
+
+ RGWBucketAdminOpState op_state;
+
+ RESTArgs::get_string(s, "bucket", bucket, &bucket);
+ RESTArgs::get_string(s, "object", object, &object);
+
+ op_state.set_bucket_name(bucket);
+ op_state.set_object(object);
+
+ http_ret = RGWBucketAdminOp::remove_object(store, op_state);
+}
+
+RGWOp *RGWHandler_Bucket::op_get()
+{
+
+ if (s->info.args.sub_resource_exists("policy"))
+ return new RGWOp_Get_Policy;
+
+ if (s->info.args.sub_resource_exists("index"))
+ return new RGWOp_Check_Bucket_Index;
+
+ return new RGWOp_Bucket_Info;
+}
+
+RGWOp *RGWHandler_Bucket::op_put()
+{
+ if (s->info.args.sub_resource_exists("quota"))
+ return new RGWOp_Set_Bucket_Quota;
+ return new RGWOp_Bucket_Link;
+}
+
+RGWOp *RGWHandler_Bucket::op_post()
+{
+ return new RGWOp_Bucket_Unlink;
+}
+
+RGWOp *RGWHandler_Bucket::op_delete()
+{
+ if (s->info.args.sub_resource_exists("object"))
+ return new RGWOp_Object_Remove;
+
+ return new RGWOp_Bucket_Remove;
+}
+
diff --git a/src/rgw/rgw_rest_bucket.h b/src/rgw/rgw_rest_bucket.h
new file mode 100644
index 00000000..19bfd734
--- /dev/null
+++ b/src/rgw/rgw_rest_bucket.h
@@ -0,0 +1,38 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_REST_BUCKET_H
+#define CEPH_RGW_REST_BUCKET_H
+
+#include "rgw_rest.h"
+#include "rgw_rest_s3.h"
+
+
+class RGWHandler_Bucket : public RGWHandler_Auth_S3 {
+protected:
+ RGWOp *op_get() override;
+ RGWOp *op_put() override;
+ RGWOp *op_post() override;
+ RGWOp *op_delete() override;
+public:
+ using RGWHandler_Auth_S3::RGWHandler_Auth_S3;
+ ~RGWHandler_Bucket() override = default;
+
+ int read_permissions(RGWOp*) override {
+ return 0;
+ }
+};
+
+class RGWRESTMgr_Bucket : public RGWRESTMgr {
+public:
+ RGWRESTMgr_Bucket() = default;
+ ~RGWRESTMgr_Bucket() override = default;
+
+ RGWHandler_REST* get_handler(struct req_state*,
+ const rgw::auth::StrategyRegistry& auth_registry,
+ const std::string&) override {
+ return new RGWHandler_Bucket(auth_registry);
+ }
+};
+
+#endif
diff --git a/src/rgw/rgw_rest_client.cc b/src/rgw/rgw_rest_client.cc
new file mode 100644
index 00000000..dde6e29b
--- /dev/null
+++ b/src/rgw/rgw_rest_client.cc
@@ -0,0 +1,999 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "rgw_common.h"
+#include "rgw_rest_client.h"
+#include "rgw_auth_s3.h"
+#include "rgw_http_errors.h"
+#include "rgw_rados.h"
+
+#include "common/ceph_crypto_cms.h"
+#include "common/armor.h"
+#include "common/strtol.h"
+#include "include/str_list.h"
+#include "rgw_crypt_sanitize.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+int RGWHTTPSimpleRequest::get_status()
+{
+ int retcode = get_req_retcode();
+ if (retcode < 0) {
+ return retcode;
+ }
+ return status;
+}
+
+int RGWHTTPSimpleRequest::handle_header(const string& name, const string& val)
+{
+ if (name == "CONTENT_LENGTH") {
+ string err;
+ long len = strict_strtol(val.c_str(), 10, &err);
+ if (!err.empty()) {
+ ldout(cct, 0) << "ERROR: failed converting content length (" << val << ") to int " << dendl;
+ return -EINVAL;
+ }
+
+ max_response = len;
+ }
+
+ return 0;
+}
+
+int RGWHTTPSimpleRequest::receive_header(void *ptr, size_t len)
+{
+ unique_lock guard(out_headers_lock);
+
+ char line[len + 1];
+
+ char *s = (char *)ptr, *end = (char *)ptr + len;
+ char *p = line;
+ ldout(cct, 10) << "receive_http_header" << dendl;
+
+ while (s != end) {
+ if (*s == '\r') {
+ s++;
+ continue;
+ }
+ if (*s == '\n') {
+ *p = '\0';
+ ldout(cct, 10) << "received header:" << line << dendl;
+ // TODO: fill whatever data required here
+ char *l = line;
+ char *tok = strsep(&l, " \t:");
+ if (tok && l) {
+ while (*l == ' ')
+ l++;
+
+ if (strcmp(tok, "HTTP") == 0 || strncmp(tok, "HTTP/", 5) == 0) {
+ http_status = atoi(l);
+ if (http_status == 100) /* 100-continue response */
+ continue;
+ status = rgw_http_error_to_errno(http_status);
+ } else {
+ /* convert header field name to upper case */
+ char *src = tok;
+ char buf[len + 1];
+ size_t i;
+ for (i = 0; i < len && *src; ++i, ++src) {
+ switch (*src) {
+ case '-':
+ buf[i] = '_';
+ break;
+ default:
+ buf[i] = toupper(*src);
+ }
+ }
+ buf[i] = '\0';
+ out_headers[buf] = l;
+ int r = handle_header(buf, l);
+ if (r < 0)
+ return r;
+ }
+ }
+ }
+ if (s != end)
+ *p++ = *s++;
+ }
+ return 0;
+}
+
+static void get_new_date_str(string& date_str)
+{
+ date_str = rgw_to_asctime(ceph_clock_now());
+}
+
+static void get_gmt_date_str(string& date_str)
+{
+ auto now_time = ceph::real_clock::now();
+ time_t rawtime = ceph::real_clock::to_time_t(now_time);
+
+ char buffer[80];
+
+ struct tm timeInfo;
+ gmtime_r(&rawtime, &timeInfo);
+ strftime(buffer, sizeof(buffer), "%a, %d %b %Y %H:%M:%S %z", &timeInfo);
+
+ date_str = buffer;
+}
+
+int RGWRESTSimpleRequest::execute(RGWAccessKey& key, const char *_method, const char *resource)
+{
+ method = _method;
+ string new_url = url;
+ string new_resource = resource;
+
+ if (new_url[new_url.size() - 1] == '/' && resource[0] == '/') {
+ new_url = new_url.substr(0, new_url.size() - 1);
+ } else if (resource[0] != '/') {
+ new_resource = "/";
+ new_resource.append(resource);
+ }
+ new_url.append(new_resource);
+ url = new_url;
+
+ string date_str;
+ get_new_date_str(date_str);
+ headers.push_back(pair<string, string>("HTTP_DATE", date_str));
+
+ string canonical_header;
+ meta_map_t meta_map;
+ map<string, string> sub_resources;
+
+ rgw_create_s3_canonical_header(method.c_str(), NULL, NULL, date_str.c_str(),
+ meta_map, meta_map, url.c_str(), sub_resources,
+ canonical_header);
+
+ string digest;
+ try {
+ digest = rgw::auth::s3::get_v2_signature(cct, key.key, canonical_header);
+ } catch (int ret) {
+ return ret;
+ }
+
+ string auth_hdr = "AWS " + key.id + ":" + digest;
+
+ ldout(cct, 15) << "generated auth header: " << auth_hdr << dendl;
+
+ headers.push_back(pair<string, string>("AUTHORIZATION", auth_hdr));
+ int r = process();
+ if (r < 0)
+ return r;
+
+ return status;
+}
+
+int RGWHTTPSimpleRequest::send_data(void *ptr, size_t len, bool* pause)
+{
+ if (!send_iter)
+ return 0;
+
+ if (len > send_iter->get_remaining())
+ len = send_iter->get_remaining();
+
+ send_iter->copy(len, (char *)ptr);
+
+ return len;
+}
+
+int RGWHTTPSimpleRequest::receive_data(void *ptr, size_t len, bool *pause)
+{
+ size_t cp_len, left_len;
+
+ left_len = max_response > response.length() ? (max_response - response.length()) : 0;
+ if (left_len == 0)
+ return 0; /* don't read extra data */
+
+ cp_len = (len > left_len) ? left_len : len;
+ bufferptr p((char *)ptr, cp_len);
+
+ response.append(p);
+
+ return 0;
+}
+
+static void append_param(string& dest, const string& name, const string& val)
+{
+ if (dest.empty()) {
+ dest.append("?");
+ } else {
+ dest.append("&");
+ }
+ string url_name;
+ url_encode(name, url_name);
+ dest.append(url_name);
+
+ if (!val.empty()) {
+ string url_val;
+ url_encode(val, url_val);
+ dest.append("=");
+ dest.append(url_val);
+ }
+}
+
+static void do_get_params_str(const param_vec_t& params, map<string, string>& extra_args, string& dest)
+{
+ map<string, string>::iterator miter;
+ for (miter = extra_args.begin(); miter != extra_args.end(); ++miter) {
+ append_param(dest, miter->first, miter->second);
+ }
+ for (auto iter = params.begin(); iter != params.end(); ++iter) {
+ append_param(dest, iter->first, iter->second);
+ }
+}
+
+void RGWHTTPSimpleRequest::get_params_str(map<string, string>& extra_args, string& dest)
+{
+ do_get_params_str(params, extra_args, dest);
+}
+
+void RGWHTTPSimpleRequest::get_out_headers(map<string, string> *pheaders)
+{
+ unique_lock guard(out_headers_lock);
+ pheaders->swap(out_headers);
+ out_headers.clear();
+}
+
+static int sign_request(CephContext *cct, RGWAccessKey& key, RGWEnv& env, req_info& info)
+{
+ /* don't sign if no key is provided */
+ if (key.key.empty()) {
+ return 0;
+ }
+
+ if (cct->_conf->subsys.should_gather<ceph_subsys_rgw, 20>()) {
+ for (const auto& i: env.get_map()) {
+ ldout(cct, 20) << "> " << i.first << " -> " << rgw::crypt_sanitize::x_meta_map{i.first, i.second} << dendl;
+ }
+ }
+
+ string canonical_header;
+ if (!rgw_create_s3_canonical_header(info, NULL, canonical_header, false)) {
+ ldout(cct, 0) << "failed to create canonical s3 header" << dendl;
+ return -EINVAL;
+ }
+
+ ldout(cct, 10) << "generated canonical header: " << canonical_header << dendl;
+
+ string digest;
+ try {
+ digest = rgw::auth::s3::get_v2_signature(cct, key.key, canonical_header);
+ } catch (int ret) {
+ return ret;
+ }
+
+ string auth_hdr = "AWS " + key.id + ":" + digest;
+ ldout(cct, 15) << "generated auth header: " << auth_hdr << dendl;
+
+ env.set("AUTHORIZATION", auth_hdr);
+
+ return 0;
+}
+
+int RGWRESTSimpleRequest::forward_request(RGWAccessKey& key, req_info& info, size_t max_response, bufferlist *inbl, bufferlist *outbl)
+{
+
+ string date_str;
+ get_new_date_str(date_str);
+
+ RGWEnv new_env;
+ req_info new_info(cct, &new_env);
+ new_info.rebuild_from(info);
+ string bucket_encode;
+ string request_uri_encode;
+ size_t pos = new_info.request_uri.substr(1, new_info.request_uri.size() - 1).find("/");
+ string bucket = new_info.request_uri.substr(1, pos);
+ url_encode(bucket, bucket_encode);
+ if (std::string::npos != pos)
+ request_uri_encode = string("/") + bucket_encode + new_info.request_uri.substr(pos + 1);
+ else
+ request_uri_encode = string("/") + bucket_encode;
+ new_info.request_uri = request_uri_encode;
+ new_env.set("HTTP_DATE", date_str.c_str());
+
+ int ret = sign_request(cct, key, new_env, new_info);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: failed to sign request" << dendl;
+ return ret;
+ }
+
+ for (const auto& kv: new_env.get_map()) {
+ headers.emplace_back(kv);
+ }
+
+ meta_map_t& meta_map = new_info.x_meta_map;
+ for (const auto& kv: meta_map) {
+ headers.emplace_back(kv);
+ }
+
+ string params_str;
+ get_params_str(info.args.get_params(), params_str);
+
+ string new_url = url;
+ string& resource = new_info.request_uri;
+ string new_resource = resource;
+ if (new_url[new_url.size() - 1] == '/' && resource[0] == '/') {
+ new_url = new_url.substr(0, new_url.size() - 1);
+ } else if (resource[0] != '/') {
+ new_resource = "/";
+ new_resource.append(resource);
+ }
+ new_url.append(new_resource + params_str);
+
+ bufferlist::iterator bliter;
+
+ if (inbl) {
+ bliter = inbl->begin();
+ send_iter = &bliter;
+
+ set_send_length(inbl->length());
+ }
+
+ method = new_info.method;
+ url = new_url;
+
+ int r = process();
+ if (r < 0){
+ if (r == -EINVAL){
+ // curl_easy has errored, generally means the service is not available
+ r = -ERR_SERVICE_UNAVAILABLE;
+ }
+ return r;
+ }
+
+ response.append((char)0); /* NULL terminate response */
+
+ if (outbl) {
+ outbl->claim(response);
+ }
+
+ return status;
+}
+
+class RGWRESTStreamOutCB : public RGWGetDataCB {
+ RGWRESTStreamS3PutObj *req;
+public:
+ explicit RGWRESTStreamOutCB(RGWRESTStreamS3PutObj *_req) : req(_req) {}
+ int handle_data(bufferlist& bl, off_t bl_ofs, off_t bl_len) override; /* callback for object iteration when sending data */
+};
+
+int RGWRESTStreamOutCB::handle_data(bufferlist& bl, off_t bl_ofs, off_t bl_len)
+{
+ dout(20) << "RGWRESTStreamOutCB::handle_data bl.length()=" << bl.length() << " bl_ofs=" << bl_ofs << " bl_len=" << bl_len << dendl;
+ if (!bl_ofs && bl_len == bl.length()) {
+ req->add_send_data(bl);
+ return 0;
+ }
+
+ bufferptr bp(bl.c_str() + bl_ofs, bl_len);
+ bufferlist new_bl;
+ new_bl.push_back(bp);
+
+ req->add_send_data(new_bl);
+ return 0;
+}
+
+RGWRESTStreamS3PutObj::~RGWRESTStreamS3PutObj()
+{
+ delete out_cb;
+}
+
+static void grants_by_type_add_one_grant(map<int, string>& grants_by_type, int perm, ACLGrant& grant)
+{
+ string& s = grants_by_type[perm];
+
+ if (!s.empty())
+ s.append(", ");
+
+ string id_type_str;
+ ACLGranteeType& type = grant.get_type();
+ switch (type.get_type()) {
+ case ACL_TYPE_GROUP:
+ id_type_str = "uri";
+ break;
+ case ACL_TYPE_EMAIL_USER:
+ id_type_str = "emailAddress";
+ break;
+ default:
+ id_type_str = "id";
+ }
+ rgw_user id;
+ grant.get_id(id);
+ s.append(id_type_str + "=\"" + id.to_str() + "\"");
+}
+
+struct grant_type_to_header {
+ int type;
+ const char *header;
+};
+
+struct grant_type_to_header grants_headers_def[] = {
+ { RGW_PERM_FULL_CONTROL, "x-amz-grant-full-control"},
+ { RGW_PERM_READ, "x-amz-grant-read"},
+ { RGW_PERM_WRITE, "x-amz-grant-write"},
+ { RGW_PERM_READ_ACP, "x-amz-grant-read-acp"},
+ { RGW_PERM_WRITE_ACP, "x-amz-grant-write-acp"},
+ { 0, NULL}
+};
+
+static bool grants_by_type_check_perm(map<int, string>& grants_by_type, int perm, ACLGrant& grant, int check_perm)
+{
+ if ((perm & check_perm) == check_perm) {
+ grants_by_type_add_one_grant(grants_by_type, check_perm, grant);
+ return true;
+ }
+ return false;
+}
+
+static void grants_by_type_add_perm(map<int, string>& grants_by_type, int perm, ACLGrant& grant)
+{
+ struct grant_type_to_header *t;
+
+ for (t = grants_headers_def; t->header; t++) {
+ if (grants_by_type_check_perm(grants_by_type, perm, grant, t->type))
+ return;
+ }
+}
+
+static void add_grants_headers(map<int, string>& grants, RGWEnv& env, meta_map_t& meta_map)
+{
+ struct grant_type_to_header *t;
+
+ for (t = grants_headers_def; t->header; t++) {
+ map<int, string>::iterator iter = grants.find(t->type);
+ if (iter != grants.end()) {
+ env.set(t->header,iter->second);
+ meta_map[t->header] = iter->second;
+ }
+ }
+}
+
+void RGWRESTGenerateHTTPHeaders::init(const string& _method, const string& _url, const string& resource, const param_vec_t& params)
+{
+ string params_str;
+ map<string, string>& args = new_info->args.get_params();
+ do_get_params_str(params, args, params_str);
+
+ /* merge params with extra args so that we can sign correctly */
+ for (auto iter = params.begin(); iter != params.end(); ++iter) {
+ new_info->args.append(iter->first, iter->second);
+ }
+
+ url = _url + resource + params_str;
+
+ string date_str;
+ get_gmt_date_str(date_str);
+
+ new_env->set("HTTP_DATE", date_str.c_str());
+
+ method = _method;
+ new_info->method = method.c_str();
+
+ new_info->script_uri = "/";
+ new_info->script_uri.append(resource);
+ new_info->request_uri = new_info->script_uri;
+}
+
+static bool is_x_amz(const string& s) {
+ return boost::algorithm::starts_with(s, "x-amz-");
+}
+
+void RGWRESTGenerateHTTPHeaders::set_extra_headers(const map<string, string>& extra_headers)
+{
+ for (auto iter : extra_headers) {
+ const string& name = lowercase_dash_http_attr(iter.first);
+ new_env->set(name, iter.second.c_str());
+ if (is_x_amz(name)) {
+ new_info->x_meta_map[name] = iter.second;
+ }
+ }
+}
+
+int RGWRESTGenerateHTTPHeaders::set_obj_attrs(map<string, bufferlist>& rgw_attrs)
+{
+ map<string, string> new_attrs;
+
+ /* merge send headers */
+ for (auto& attr: rgw_attrs) {
+ bufferlist& bl = attr.second;
+ const string& name = attr.first;
+ string val = bl.c_str();
+ if (name.compare(0, sizeof(RGW_ATTR_META_PREFIX) - 1, RGW_ATTR_META_PREFIX) == 0) {
+ string header_name = RGW_AMZ_META_PREFIX;
+ header_name.append(name.substr(sizeof(RGW_ATTR_META_PREFIX) - 1));
+ new_attrs[header_name] = val;
+ }
+ }
+
+ RGWAccessControlPolicy policy;
+ int ret = rgw_policy_from_attrset(cct, rgw_attrs, &policy);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: couldn't get policy ret=" << ret << dendl;
+ return ret;
+ }
+
+ set_http_attrs(new_attrs);
+ set_policy(policy);
+
+ return 0;
+}
+
+static std::set<string> keep_headers = { "content-type",
+ "content-encoding",
+ "content-disposition",
+ "content-language" };
+
+void RGWRESTGenerateHTTPHeaders::set_http_attrs(const map<string, string>& http_attrs)
+{
+ /* merge send headers */
+ for (auto& attr: http_attrs) {
+ const string& val = attr.second;
+ const string& name = lowercase_dash_http_attr(attr.first);
+ if (is_x_amz(name)) {
+ new_env->set(name, val);
+ new_info->x_meta_map[name] = val;
+ } else {
+ new_env->set(attr.first, val); /* Ugh, using the uppercase representation,
+ as the signing function calls info.env.get("CONTENT_TYPE").
+ This needs to be cleaned up! */
+ }
+ }
+}
+
+void RGWRESTGenerateHTTPHeaders::set_policy(RGWAccessControlPolicy& policy)
+{
+ /* update acl headers */
+ RGWAccessControlList& acl = policy.get_acl();
+ multimap<string, ACLGrant>& grant_map = acl.get_grant_map();
+ multimap<string, ACLGrant>::iterator giter;
+ map<int, string> grants_by_type;
+ for (giter = grant_map.begin(); giter != grant_map.end(); ++giter) {
+ ACLGrant& grant = giter->second;
+ ACLPermission& perm = grant.get_permission();
+ grants_by_type_add_perm(grants_by_type, perm.get_permissions(), grant);
+ }
+ add_grants_headers(grants_by_type, *new_env, new_info->x_meta_map);
+}
+
+int RGWRESTGenerateHTTPHeaders::sign(RGWAccessKey& key)
+{
+ int ret = sign_request(cct, key, *new_env, *new_info);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: failed to sign request" << dendl;
+ return ret;
+ }
+
+ return 0;
+}
+
+void RGWRESTStreamS3PutObj::send_init(rgw_obj& obj)
+{
+ string resource_str;
+ string resource;
+ string new_url = url;
+
+ if (host_style == VirtualStyle) {
+ resource_str = obj.get_oid();
+ new_url = obj.bucket.name + "." + new_url;
+ } else {
+ resource_str = obj.bucket.name + "/" + obj.get_oid();
+ }
+
+ //do not encode slash in object key name
+ url_encode(resource_str, resource, false);
+
+ if (new_url[new_url.size() - 1] != '/')
+ new_url.append("/");
+
+ method = "PUT";
+ headers_gen.init(method, new_url, resource, params);
+
+ url = headers_gen.get_url();
+}
+
+int RGWRESTStreamS3PutObj::send_ready(RGWAccessKey& key, map<string, bufferlist>& rgw_attrs, bool send)
+{
+ headers_gen.set_obj_attrs(rgw_attrs);
+
+ return send_ready(key, send);
+}
+
+int RGWRESTStreamS3PutObj::send_ready(RGWAccessKey& key, const map<string, string>& http_attrs,
+ RGWAccessControlPolicy& policy, bool send)
+{
+ headers_gen.set_http_attrs(http_attrs);
+ headers_gen.set_policy(policy);
+
+ return send_ready(key, send);
+}
+
+int RGWRESTStreamS3PutObj::send_ready(RGWAccessKey& key, bool send)
+{
+ headers_gen.sign(key);
+
+ for (const auto& kv: new_env.get_map()) {
+ headers.emplace_back(kv);
+ }
+
+ out_cb = new RGWRESTStreamOutCB(this);
+
+ if (send) {
+ int r = RGWHTTP::send(this);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int RGWRESTStreamS3PutObj::put_obj_init(RGWAccessKey& key, rgw_obj& obj, uint64_t obj_size, map<string, bufferlist>& attrs, bool send)
+{
+ send_init(obj);
+ return send_ready(key, attrs, send);
+}
+
+void set_str_from_headers(map<string, string>& out_headers, const string& header_name, string& str)
+{
+ map<string, string>::iterator iter = out_headers.find(header_name);
+ if (iter != out_headers.end()) {
+ str = iter->second;
+ } else {
+ str.clear();
+ }
+}
+
+static int parse_rgwx_mtime(CephContext *cct, const string& s, ceph::real_time *rt)
+{
+ string err;
+ vector<string> vec;
+
+ get_str_vec(s, ".", vec);
+
+ if (vec.empty()) {
+ return -EINVAL;
+ }
+
+ long secs = strict_strtol(vec[0].c_str(), 10, &err);
+ long nsecs = 0;
+ if (!err.empty()) {
+ ldout(cct, 0) << "ERROR: failed converting mtime (" << s << ") to real_time " << dendl;
+ return -EINVAL;
+ }
+
+ if (vec.size() > 1) {
+ nsecs = strict_strtol(vec[1].c_str(), 10, &err);
+ if (!err.empty()) {
+ ldout(cct, 0) << "ERROR: failed converting mtime (" << s << ") to real_time " << dendl;
+ return -EINVAL;
+ }
+ }
+
+ *rt = utime_t(secs, nsecs).to_real_time();
+
+ return 0;
+}
+
+static void send_prepare_convert(const rgw_obj& obj, string *resource)
+{
+ string urlsafe_bucket, urlsafe_object;
+ url_encode(obj.bucket.get_key(':', 0), urlsafe_bucket);
+ url_encode(obj.key.name, urlsafe_object);
+ *resource = urlsafe_bucket + "/" + urlsafe_object;
+}
+
+int RGWRESTStreamRWRequest::send_request(RGWAccessKey& key, map<string, string>& extra_headers, const rgw_obj& obj, RGWHTTPManager *mgr)
+{
+ string resource;
+ send_prepare_convert(obj, &resource);
+
+ return send_request(&key, extra_headers, resource, mgr);
+}
+
+int RGWRESTStreamRWRequest::send_prepare(RGWAccessKey& key, map<string, string>& extra_headers, const rgw_obj& obj)
+{
+ string resource;
+ send_prepare_convert(obj, &resource);
+
+ return do_send_prepare(&key, extra_headers, resource);
+}
+
+int RGWRESTStreamRWRequest::send_prepare(RGWAccessKey *key, map<string, string>& extra_headers, const string& resource,
+ bufferlist *send_data)
+{
+ string new_resource;
+ //do not encode slash
+ url_encode(resource, new_resource, false);
+
+ return do_send_prepare(key, extra_headers, new_resource, send_data);
+}
+
+int RGWRESTStreamRWRequest::do_send_prepare(RGWAccessKey *key, map<string, string>& extra_headers, const string& resource,
+ bufferlist *send_data)
+{
+ string new_url = url;
+ if (new_url[new_url.size() - 1] != '/')
+ new_url.append("/");
+
+ RGWEnv new_env;
+ req_info new_info(cct, &new_env);
+
+ string new_resource;
+ string bucket_name;
+ string old_resource = resource;
+
+ if (resource[0] == '/') {
+ new_resource = resource.substr(1);
+ } else {
+ new_resource = resource;
+ }
+
+ size_t pos = new_resource.find("/");
+ bucket_name = new_resource.substr(0, pos);
+
+ //when dest is a bucket with out other params, uri should end up with '/'
+ if(pos == string::npos && params.size() == 0 && host_style == VirtualStyle) {
+ new_resource.append("/");
+ }
+
+ if (host_style == VirtualStyle) {
+ new_url = bucket_name + "." + new_url;
+ if(pos == string::npos) {
+ new_resource = "";
+ } else {
+ new_resource = new_resource.substr(pos+1);
+ }
+ }
+
+ RGWRESTGenerateHTTPHeaders headers_gen(cct, &new_env, &new_info);
+
+ headers_gen.init(method, new_url, new_resource, params);
+
+ headers_gen.set_http_attrs(extra_headers);
+
+ if (key) {
+#if 0
+ new_info.init_meta_info(nullptr);
+#endif
+
+ int ret = headers_gen.sign(*key);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: failed to sign request" << dendl;
+ return ret;
+ }
+ }
+
+ for (const auto& kv: new_env.get_map()) {
+ headers.emplace_back(kv);
+ }
+
+ if (send_data) {
+ set_send_length(send_data->length());
+ set_outbl(*send_data);
+ set_send_data_hint(true);
+ }
+
+
+ method = new_info.method;
+ url = headers_gen.get_url();
+
+ return 0;
+}
+
+int RGWRESTStreamRWRequest::send_request(RGWAccessKey *key, map<string, string>& extra_headers, const string& resource,
+ RGWHTTPManager *mgr, bufferlist *send_data)
+{
+ int ret = send_prepare(key, extra_headers, resource, send_data);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return send(mgr);
+}
+
+
+int RGWRESTStreamRWRequest::send(RGWHTTPManager *mgr)
+{
+ if (!mgr) {
+ return RGWHTTP::send(this);
+ }
+
+ int r = mgr->add_request(this);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int RGWRESTStreamRWRequest::complete_request(string *etag,
+ real_time *mtime,
+ uint64_t *psize,
+ map<string, string> *pattrs,
+ map<string, string> *pheaders)
+{
+ int ret = wait();
+ if (ret < 0) {
+ return ret;
+ }
+
+ unique_lock guard(out_headers_lock);
+
+ if (etag) {
+ set_str_from_headers(out_headers, "ETAG", *etag);
+ }
+ if (status >= 0) {
+ if (mtime) {
+ string mtime_str;
+ set_str_from_headers(out_headers, "RGWX_MTIME", mtime_str);
+ if (!mtime_str.empty()) {
+ int ret = parse_rgwx_mtime(cct, mtime_str, mtime);
+ if (ret < 0) {
+ return ret;
+ }
+ } else {
+ *mtime = real_time();
+ }
+ }
+ if (psize) {
+ string size_str;
+ set_str_from_headers(out_headers, "RGWX_OBJECT_SIZE", size_str);
+ string err;
+ *psize = strict_strtoll(size_str.c_str(), 10, &err);
+ if (!err.empty()) {
+ ldout(cct, 0) << "ERROR: failed parsing embedded metadata object size (" << size_str << ") to int " << dendl;
+ return -EIO;
+ }
+ }
+ }
+
+ for (auto iter = out_headers.begin(); pattrs && iter != out_headers.end(); ++iter) {
+ const string& attr_name = iter->first;
+ if (attr_name.compare(0, sizeof(RGW_HTTP_RGWX_ATTR_PREFIX) - 1, RGW_HTTP_RGWX_ATTR_PREFIX) == 0) {
+ string name = attr_name.substr(sizeof(RGW_HTTP_RGWX_ATTR_PREFIX) - 1);
+ const char *src = name.c_str();
+ char buf[name.size() + 1];
+ char *dest = buf;
+ for (; *src; ++src, ++dest) {
+ switch(*src) {
+ case '_':
+ *dest = '-';
+ break;
+ default:
+ *dest = tolower(*src);
+ }
+ }
+ *dest = '\0';
+ (*pattrs)[buf] = iter->second;
+ }
+ }
+
+ if (pheaders) {
+ *pheaders = std::move(out_headers);
+ }
+ return status;
+}
+
+int RGWHTTPStreamRWRequest::handle_header(const string& name, const string& val)
+{
+ if (name == "RGWX_EMBEDDED_METADATA_LEN") {
+ string err;
+ long len = strict_strtol(val.c_str(), 10, &err);
+ if (!err.empty()) {
+ ldout(cct, 0) << "ERROR: failed converting embedded metadata len (" << val << ") to int " << dendl;
+ return -EINVAL;
+ }
+
+ cb->set_extra_data_len(len);
+ }
+ return 0;
+}
+
+int RGWHTTPStreamRWRequest::receive_data(void *ptr, size_t len, bool *pause)
+{
+ size_t orig_len = len;
+
+ if (cb) {
+ in_data.append((const char *)ptr, len);
+
+ size_t orig_in_data_len = in_data.length();
+
+ int ret = cb->handle_data(in_data, pause);
+ if (ret < 0)
+ return ret;
+ if (ret == 0) {
+ in_data.clear();
+ } else {
+ /* partial read */
+ ceph_assert(in_data.length() <= orig_in_data_len);
+ len = ret;
+ bufferlist bl;
+ size_t left_to_read = orig_in_data_len - len;
+ if (in_data.length() > left_to_read) {
+ in_data.splice(0, in_data.length() - left_to_read, &bl);
+ }
+ }
+ }
+ ofs += len;
+ return orig_len;
+}
+
+void RGWHTTPStreamRWRequest::set_stream_write(bool s) {
+ Mutex::Locker wl(write_lock);
+ stream_writes = s;
+}
+
+void RGWHTTPStreamRWRequest::unpause_receive()
+{
+ Mutex::Locker req_locker(get_req_lock());
+ if (!read_paused) {
+ _set_read_paused(false);
+ }
+}
+
+void RGWHTTPStreamRWRequest::add_send_data(bufferlist& bl)
+{
+ Mutex::Locker req_locker(get_req_lock());
+ Mutex::Locker wl(write_lock);
+ outbl.claim_append(bl);
+ _set_write_paused(false);
+}
+
+uint64_t RGWHTTPStreamRWRequest::get_pending_send_size()
+{
+ Mutex::Locker wl(write_lock);
+ return outbl.length();
+}
+
+void RGWHTTPStreamRWRequest::finish_write()
+{
+ Mutex::Locker req_locker(get_req_lock());
+ Mutex::Locker wl(write_lock);
+ write_stream_complete = true;
+ _set_write_paused(false);
+}
+
+int RGWHTTPStreamRWRequest::send_data(void *ptr, size_t len, bool *pause)
+{
+ uint64_t out_len;
+ uint64_t send_size;
+ {
+ Mutex::Locker wl(write_lock);
+
+ if (outbl.length() == 0) {
+ if ((stream_writes && !write_stream_complete) ||
+ (write_ofs < send_len)) {
+ *pause = true;
+ }
+ return 0;
+ }
+
+ len = std::min(len, (size_t)outbl.length());
+
+ bufferlist bl;
+ outbl.splice(0, len, &bl);
+ send_size = bl.length();
+ if (send_size > 0) {
+ memcpy(ptr, bl.c_str(), send_size);
+ write_ofs += send_size;
+ }
+
+ out_len = outbl.length();
+ }
+ /* don't need to be under write_lock here, avoid deadlocks in case notify callback
+ * needs to lock */
+ if (write_drain_cb) {
+ write_drain_cb->notify(out_len);
+ }
+ return send_size;
+}
+
+class StreamIntoBufferlist : public RGWGetDataCB {
+ bufferlist& bl;
+public:
+ explicit StreamIntoBufferlist(bufferlist& _bl) : bl(_bl) {}
+ int handle_data(bufferlist& inbl, off_t bl_ofs, off_t bl_len) override {
+ bl.claim_append(inbl);
+ return bl_len;
+ }
+};
+
diff --git a/src/rgw/rgw_rest_client.h b/src/rgw/rgw_rest_client.h
new file mode 100644
index 00000000..8f9b2c16
--- /dev/null
+++ b/src/rgw/rgw_rest_client.h
@@ -0,0 +1,226 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_REST_CLIENT_H
+#define CEPH_RGW_REST_CLIENT_H
+
+#include "rgw_http_client.h"
+
+class RGWGetDataCB;
+
+class RGWHTTPSimpleRequest : public RGWHTTPClient {
+protected:
+ int http_status;
+ int status;
+
+ using unique_lock = std::unique_lock<std::mutex>;
+
+ std::mutex out_headers_lock;
+ map<string, string> out_headers;
+ param_vec_t params;
+
+ bufferlist::iterator *send_iter;
+
+ size_t max_response; /* we need this as we don't stream out response */
+ bufferlist response;
+
+ virtual int handle_header(const string& name, const string& val);
+ void get_params_str(map<string, string>& extra_args, string& dest);
+
+public:
+ RGWHTTPSimpleRequest(CephContext *_cct, const string& _method, const string& _url,
+ param_vec_t *_headers, param_vec_t *_params) : RGWHTTPClient(_cct, _method, _url),
+ http_status(0), status(0),
+ send_iter(NULL),
+ max_response(0) {
+ set_headers(_headers);
+ set_params(_params);
+ }
+
+ void set_headers(param_vec_t *_headers) {
+ if (_headers)
+ headers = *_headers;
+ }
+
+ void set_params(param_vec_t *_params) {
+ if (_params)
+ params = *_params;
+ }
+
+ int receive_header(void *ptr, size_t len) override;
+ int receive_data(void *ptr, size_t len, bool *pause) override;
+ int send_data(void *ptr, size_t len, bool* pause=nullptr) override;
+
+ bufferlist& get_response() { return response; }
+
+ void get_out_headers(map<string, string> *pheaders); /* modifies out_headers */
+
+ int get_http_status() { return http_status; }
+ int get_status();
+};
+
+class RGWRESTSimpleRequest : public RGWHTTPSimpleRequest {
+public:
+ RGWRESTSimpleRequest(CephContext *_cct, const string& _method, const string& _url,
+ param_vec_t *_headers, param_vec_t *_params) : RGWHTTPSimpleRequest(_cct, _method, _url, _headers, _params) {}
+
+ int execute(RGWAccessKey& key, const char *method, const char *resource);
+ int forward_request(RGWAccessKey& key, req_info& info, size_t max_response, bufferlist *inbl, bufferlist *outbl);
+};
+
+class RGWWriteDrainCB {
+public:
+ RGWWriteDrainCB() = default;
+ virtual ~RGWWriteDrainCB() = default;
+ virtual void notify(uint64_t pending_size) = 0;
+};
+
+class RGWRESTGenerateHTTPHeaders {
+ CephContext *cct;
+ RGWEnv *new_env;
+ req_info *new_info;
+ string method;
+ string url;
+ string resource;
+
+public:
+ RGWRESTGenerateHTTPHeaders(CephContext *_cct, RGWEnv *_env, req_info *_info) : cct(_cct), new_env(_env), new_info(_info) {}
+ void init(const string& method, const string& url, const string& resource, const param_vec_t& params);
+ void set_extra_headers(const map<string, string>& extra_headers);
+ int set_obj_attrs(map<string, bufferlist>& rgw_attrs);
+ void set_http_attrs(const map<string, string>& http_attrs);
+ void set_policy(RGWAccessControlPolicy& policy);
+ int sign(RGWAccessKey& key);
+
+ const string& get_url() { return url; }
+};
+
+class RGWHTTPStreamRWRequest : public RGWHTTPSimpleRequest {
+public:
+ class ReceiveCB;
+
+private:
+ Mutex lock;
+ Mutex write_lock;
+ ReceiveCB *cb{nullptr};
+ RGWWriteDrainCB *write_drain_cb{nullptr};
+ bufferlist outbl;
+ bufferlist in_data;
+ size_t chunk_ofs{0};
+ size_t ofs{0};
+ uint64_t write_ofs{0};
+ bool read_paused{false};
+ bool send_paused{false};
+ bool stream_writes{false};
+ bool write_stream_complete{false};
+protected:
+ int handle_header(const string& name, const string& val) override;
+public:
+ int send_data(void *ptr, size_t len, bool *pause) override;
+ int receive_data(void *ptr, size_t len, bool *pause) override;
+
+ class ReceiveCB {
+ protected:
+ uint64_t extra_data_len{0};
+ public:
+ ReceiveCB() = default;
+ virtual ~ReceiveCB() = default;
+ virtual int handle_data(bufferlist& bl, bool *pause = nullptr) = 0;
+ virtual void set_extra_data_len(uint64_t len) {
+ extra_data_len = len;
+ }
+ };
+
+ RGWHTTPStreamRWRequest(CephContext *_cct, const string& _method, const string& _url,
+ param_vec_t *_headers, param_vec_t *_params) : RGWHTTPSimpleRequest(_cct, _method, _url, _headers, _params),
+ lock("RGWHTTPStreamRWRequest"), write_lock("RGWHTTPStreamRWRequest::write_lock") {
+ }
+ RGWHTTPStreamRWRequest(CephContext *_cct, const string& _method, const string& _url, ReceiveCB *_cb,
+ param_vec_t *_headers, param_vec_t *_params) : RGWHTTPSimpleRequest(_cct, _method, _url, _headers, _params),
+ lock("RGWHTTPStreamRWRequest"), write_lock("RGWHTTPStreamRWRequest::write_lock"), cb(_cb) {
+ }
+ virtual ~RGWHTTPStreamRWRequest() override {}
+
+ void set_outbl(bufferlist& _outbl) {
+ outbl.swap(_outbl);
+ }
+
+ void set_in_cb(ReceiveCB *_cb) { cb = _cb; }
+ void set_write_drain_cb(RGWWriteDrainCB *_cb) { write_drain_cb = _cb; }
+
+ void unpause_receive();
+
+ void add_send_data(bufferlist& bl);
+
+ void set_stream_write(bool s);
+
+ uint64_t get_pending_send_size();
+
+ /* finish streaming writes */
+ void finish_write();
+};
+
+class RGWRESTStreamRWRequest : public RGWHTTPStreamRWRequest {
+protected:
+ HostStyle host_style;
+public:
+ RGWRESTStreamRWRequest(CephContext *_cct, const string& _method, const string& _url, RGWHTTPStreamRWRequest::ReceiveCB *_cb,
+ param_vec_t *_headers, param_vec_t *_params, HostStyle _host_style = PathStyle) : RGWHTTPStreamRWRequest(_cct, _method, _url, _cb, _headers, _params), host_style(_host_style) {
+ }
+ virtual ~RGWRESTStreamRWRequest() override {}
+
+ int send_prepare(RGWAccessKey *key, map<string, string>& extra_headers, const string& resource, bufferlist *send_data = nullptr /* optional input data */);
+ int send_prepare(RGWAccessKey& key, map<string, string>& extra_headers, const rgw_obj& obj);
+ int send(RGWHTTPManager *mgr);
+
+ int send_request(RGWAccessKey& key, map<string, string>& extra_headers, const rgw_obj& obj, RGWHTTPManager *mgr);
+ int send_request(RGWAccessKey *key, map<string, string>& extra_headers, const string& resource, RGWHTTPManager *mgr, bufferlist *send_data = nullptr /* optional input data */);
+
+ int complete_request(string *etag = nullptr,
+ real_time *mtime = nullptr,
+ uint64_t *psize = nullptr,
+ map<string, string> *pattrs = nullptr,
+ map<string, string> *pheaders = nullptr);
+
+ void add_params(param_vec_t *params);
+
+private:
+ int do_send_prepare(RGWAccessKey *key, map<string, string>& extra_headers, const string& resource, bufferlist *send_data = nullptr /* optional input data */);
+};
+
+class RGWRESTStreamReadRequest : public RGWRESTStreamRWRequest {
+public:
+ RGWRESTStreamReadRequest(CephContext *_cct, const string& _url, ReceiveCB *_cb, param_vec_t *_headers,
+ param_vec_t *_params, HostStyle _host_style = PathStyle) : RGWRESTStreamRWRequest(_cct, "GET", _url, _cb, _headers, _params, _host_style) {}
+};
+
+class RGWRESTStreamHeadRequest : public RGWRESTStreamRWRequest {
+public:
+ RGWRESTStreamHeadRequest(CephContext *_cct, const string& _url, ReceiveCB *_cb, param_vec_t *_headers,
+ param_vec_t *_params) : RGWRESTStreamRWRequest(_cct, "HEAD", _url, _cb, _headers, _params) {}
+};
+
+class RGWRESTStreamS3PutObj : public RGWRESTStreamRWRequest {
+ RGWGetDataCB *out_cb;
+ RGWEnv new_env;
+ req_info new_info;
+ RGWRESTGenerateHTTPHeaders headers_gen;
+public:
+ RGWRESTStreamS3PutObj(CephContext *_cct, const string& _method, const string& _url, param_vec_t *_headers,
+ param_vec_t *_params, HostStyle _host_style) : RGWRESTStreamRWRequest(_cct, _method, _url, nullptr, _headers, _params, _host_style),
+ out_cb(NULL), new_info(cct, &new_env), headers_gen(_cct, &new_env, &new_info) {}
+ ~RGWRESTStreamS3PutObj() override;
+
+ void send_init(rgw_obj& obj);
+ int send_ready(RGWAccessKey& key, map<string, bufferlist>& rgw_attrs, bool send);
+ int send_ready(RGWAccessKey& key, const map<string, string>& http_attrs,
+ RGWAccessControlPolicy& policy, bool send);
+ int send_ready(RGWAccessKey& key, bool send);
+
+ int put_obj_init(RGWAccessKey& key, rgw_obj& obj, uint64_t obj_size, map<string, bufferlist>& attrs, bool send);
+
+ RGWGetDataCB *get_out_cb() { return out_cb; }
+};
+
+#endif
+
diff --git a/src/rgw/rgw_rest_config.cc b/src/rgw/rgw_rest_config.cc
new file mode 100644
index 00000000..e5b863d0
--- /dev/null
+++ b/src/rgw/rgw_rest_config.cc
@@ -0,0 +1,85 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2013 eNovance SAS <licensing@enovance.com>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "common/ceph_json.h"
+#include "common/strtol.h"
+#include "rgw_rest.h"
+#include "rgw_op.h"
+#include "rgw_rados.h"
+#include "rgw_rest_s3.h"
+#include "rgw_rest_config.h"
+#include "rgw_client_io.h"
+#include "common/errno.h"
+#include "include/ceph_assert.h"
+
+#include "services/svc_zone.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+void RGWOp_ZoneGroupMap_Get::execute() {
+ http_ret = zonegroup_map.read(g_ceph_context, store->svc.sysobj);
+ if (http_ret < 0) {
+ dout(5) << "failed to read zone_group map" << dendl;
+ }
+}
+
+void RGWOp_ZoneGroupMap_Get::send_response() {
+ set_req_state_err(s, http_ret);
+ dump_errno(s);
+ end_header(s);
+
+ if (http_ret < 0)
+ return;
+
+ if (old_format) {
+ RGWRegionMap region_map;
+ region_map.regions = zonegroup_map.zonegroups;
+ region_map.master_region = zonegroup_map.master_zonegroup;
+ region_map.bucket_quota = zonegroup_map.bucket_quota;
+ region_map.user_quota = zonegroup_map.user_quota;
+ encode_json("region-map", region_map, s->formatter);
+ } else {
+ encode_json("zonegroup-map", zonegroup_map, s->formatter);
+ }
+ flusher.flush();
+}
+
+void RGWOp_ZoneConfig_Get::send_response() {
+ const RGWZoneParams& zone_params = store->svc.zone->get_zone_params();
+
+ set_req_state_err(s, http_ret);
+ dump_errno(s);
+ end_header(s);
+
+ if (http_ret < 0)
+ return;
+
+ encode_json("zone_params", zone_params, s->formatter);
+ flusher.flush();
+}
+
+RGWOp* RGWHandler_Config::op_get() {
+ bool exists;
+ string type = s->info.args.get("type", &exists);
+
+ if (type.compare("zonegroup-map") == 0) {
+ return new RGWOp_ZoneGroupMap_Get(false);
+ } else if (type.compare("zone") == 0) {
+ return new RGWOp_ZoneConfig_Get();
+ } else {
+ return new RGWOp_ZoneGroupMap_Get(true);
+ }
+}
diff --git a/src/rgw/rgw_rest_config.h b/src/rgw/rgw_rest_config.h
new file mode 100644
index 00000000..56ca129b
--- /dev/null
+++ b/src/rgw/rgw_rest_config.h
@@ -0,0 +1,88 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2013 eNovance SAS <licensing@enovance.com>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef RGW_REST_CONFIG_H
+#define RGW_REST_CONFIG_H
+
+#include "rgw_zone.h"
+
+class RGWOp_ZoneGroupMap_Get : public RGWRESTOp {
+ RGWZoneGroupMap zonegroup_map;
+ bool old_format;
+public:
+ explicit RGWOp_ZoneGroupMap_Get(bool _old_format):old_format(_old_format) {}
+ ~RGWOp_ZoneGroupMap_Get() override {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("zone", RGW_CAP_READ);
+ }
+ int verify_permission() override {
+ return check_caps(s->user->caps);
+ }
+ void execute() override;
+ void send_response() override;
+ const char* name() const override {
+ if (old_format) {
+ return "get_region_map";
+ } else {
+ return "get_zonegroup_map";
+ }
+ }
+};
+
+class RGWOp_ZoneConfig_Get : public RGWRESTOp {
+ RGWZoneParams zone_params;
+public:
+ RGWOp_ZoneConfig_Get() {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("zone", RGW_CAP_READ);
+ }
+ int verify_permission() override {
+ return check_caps(s->user->caps);
+ }
+ void execute() override {} /* store already has the info we need, just need to send response */
+ void send_response() override ;
+ const char* name() const override {
+ return "get_zone_config";
+ }
+};
+
+class RGWHandler_Config : public RGWHandler_Auth_S3 {
+protected:
+ RGWOp *op_get() override;
+
+ int read_permissions(RGWOp*) override {
+ return 0;
+ }
+public:
+ using RGWHandler_Auth_S3::RGWHandler_Auth_S3;
+ ~RGWHandler_Config() override = default;
+};
+
+
+class RGWRESTMgr_Config : public RGWRESTMgr {
+public:
+ RGWRESTMgr_Config() = default;
+ ~RGWRESTMgr_Config() override = default;
+
+ RGWHandler_REST* get_handler(struct req_state*,
+ const rgw::auth::StrategyRegistry& auth_registry,
+ const std::string&) override {
+ return new RGWHandler_Config(auth_registry);
+ }
+};
+
+#endif /* RGW_REST_CONFIG_H */
diff --git a/src/rgw/rgw_rest_conn.cc b/src/rgw/rgw_rest_conn.cc
new file mode 100644
index 00000000..08eb51cd
--- /dev/null
+++ b/src/rgw/rgw_rest_conn.cc
@@ -0,0 +1,466 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "rgw_rados.h"
+#include "rgw_zone.h"
+#include "rgw_rest_conn.h"
+
+#include "services/svc_zone.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+RGWRESTConn::RGWRESTConn(CephContext *_cct, RGWSI_Zone *zone_svc,
+ const string& _remote_id,
+ const list<string>& remote_endpoints,
+ HostStyle _host_style)
+ : cct(_cct),
+ endpoints(remote_endpoints.begin(), remote_endpoints.end()),
+ remote_id(_remote_id), host_style(_host_style)
+{
+ if (zone_svc) {
+ key = zone_svc->get_zone_params().system_key;
+ self_zone_group = zone_svc->get_zonegroup().get_id();
+ }
+}
+
+RGWRESTConn::RGWRESTConn(CephContext *_cct, RGWSI_Zone *zone_svc,
+ const string& _remote_id,
+ const list<string>& remote_endpoints,
+ RGWAccessKey _cred,
+ HostStyle _host_style)
+ : cct(_cct),
+ endpoints(remote_endpoints.begin(), remote_endpoints.end()),
+ key(std::move(_cred)),
+ remote_id(_remote_id), host_style(_host_style)
+{
+ if (zone_svc) {
+ self_zone_group = zone_svc->get_zonegroup().get_id();
+ }
+}
+
+RGWRESTConn::RGWRESTConn(RGWRESTConn&& other)
+ : cct(other.cct),
+ endpoints(std::move(other.endpoints)),
+ key(std::move(other.key)),
+ self_zone_group(std::move(other.self_zone_group)),
+ remote_id(std::move(other.remote_id)),
+ counter(other.counter.load())
+{
+}
+
+RGWRESTConn& RGWRESTConn::operator=(RGWRESTConn&& other)
+{
+ cct = other.cct;
+ endpoints = std::move(other.endpoints);
+ key = std::move(other.key);
+ self_zone_group = std::move(other.self_zone_group);
+ remote_id = std::move(other.remote_id);
+ counter = other.counter.load();
+ return *this;
+}
+
+int RGWRESTConn::get_url(string& endpoint)
+{
+ if (endpoints.empty()) {
+ ldout(cct, 0) << "ERROR: endpoints not configured for upstream zone" << dendl;
+ return -EIO;
+ }
+
+ int i = ++counter;
+ endpoint = endpoints[i % endpoints.size()];
+
+ return 0;
+}
+
+string RGWRESTConn::get_url()
+{
+ string endpoint;
+ if (endpoints.empty()) {
+ ldout(cct, 0) << "WARNING: endpoints not configured for upstream zone" << dendl; /* we'll catch this later */
+ return endpoint;
+ }
+
+ int i = ++counter;
+ endpoint = endpoints[i % endpoints.size()];
+
+ return endpoint;
+}
+
+void RGWRESTConn::populate_params(param_vec_t& params, const rgw_user *uid, const string& zonegroup)
+{
+ populate_uid(params, uid);
+ populate_zonegroup(params, zonegroup);
+}
+
+int RGWRESTConn::forward(const rgw_user& uid, req_info& info, obj_version *objv, size_t max_response, bufferlist *inbl, bufferlist *outbl)
+{
+ string url;
+ int ret = get_url(url);
+ if (ret < 0)
+ return ret;
+ param_vec_t params;
+ populate_params(params, &uid, self_zone_group);
+ if (objv) {
+ params.push_back(param_pair_t(RGW_SYS_PARAM_PREFIX "tag", objv->tag));
+ char buf[16];
+ snprintf(buf, sizeof(buf), "%lld", (long long)objv->ver);
+ params.push_back(param_pair_t(RGW_SYS_PARAM_PREFIX "ver", buf));
+ }
+ RGWRESTSimpleRequest req(cct, info.method, url, NULL, &params);
+ return req.forward_request(key, info, max_response, inbl, outbl);
+}
+
+class StreamObjData : public RGWGetDataCB {
+ rgw_obj obj;
+public:
+ explicit StreamObjData(rgw_obj& _obj) : obj(_obj) {}
+};
+
+int RGWRESTConn::put_obj_send_init(rgw_obj& obj, const rgw_http_param_pair *extra_params, RGWRESTStreamS3PutObj **req)
+{
+ string url;
+ int ret = get_url(url);
+ if (ret < 0)
+ return ret;
+
+ rgw_user uid;
+ param_vec_t params;
+ populate_params(params, &uid, self_zone_group);
+
+ if (extra_params) {
+ append_param_list(params, extra_params);
+ }
+
+ RGWRESTStreamS3PutObj *wr = new RGWRESTStreamS3PutObj(cct, "PUT", url, NULL, &params, host_style);
+ wr->send_init(obj);
+ *req = wr;
+ return 0;
+}
+
+int RGWRESTConn::put_obj_async(const rgw_user& uid, rgw_obj& obj, uint64_t obj_size,
+ map<string, bufferlist>& attrs, bool send,
+ RGWRESTStreamS3PutObj **req)
+{
+ string url;
+ int ret = get_url(url);
+ if (ret < 0)
+ return ret;
+
+ param_vec_t params;
+ populate_params(params, &uid, self_zone_group);
+ RGWRESTStreamS3PutObj *wr = new RGWRESTStreamS3PutObj(cct, "PUT", url, NULL, &params, host_style);
+ ret = wr->put_obj_init(key, obj, obj_size, attrs, send);
+ if (ret < 0) {
+ delete wr;
+ return ret;
+ }
+ *req = wr;
+ return 0;
+}
+
+int RGWRESTConn::complete_request(RGWRESTStreamS3PutObj *req, string& etag, real_time *mtime)
+{
+ int ret = req->complete_request(&etag, mtime);
+ delete req;
+
+ return ret;
+}
+
+static void set_date_header(const real_time *t, map<string, string>& headers, bool high_precision_time, const string& header_name)
+{
+ if (!t) {
+ return;
+ }
+ stringstream s;
+ utime_t tm = utime_t(*t);
+ if (high_precision_time) {
+ tm.gmtime_nsec(s);
+ } else {
+ tm.gmtime(s);
+ }
+ headers[header_name] = s.str();
+}
+
+template <class T>
+static void set_header(T val, map<string, string>& headers, const string& header_name)
+{
+ stringstream s;
+ s << val;
+ headers[header_name] = s.str();
+}
+
+
+int RGWRESTConn::get_obj(const rgw_user& uid, req_info *info /* optional */, const rgw_obj& obj,
+ const real_time *mod_ptr, const real_time *unmod_ptr,
+ uint32_t mod_zone_id, uint64_t mod_pg_ver,
+ bool prepend_metadata, bool get_op, bool rgwx_stat,
+ bool sync_manifest, bool skip_decrypt,
+ bool send, RGWHTTPStreamRWRequest::ReceiveCB *cb, RGWRESTStreamRWRequest **req)
+{
+ get_obj_params params;
+ params.uid = uid;
+ params.info = info;
+ params.mod_ptr = mod_ptr;
+ params.mod_pg_ver = mod_pg_ver;
+ params.prepend_metadata = prepend_metadata;
+ params.get_op = get_op;
+ params.rgwx_stat = rgwx_stat;
+ params.sync_manifest = sync_manifest;
+ params.skip_decrypt = skip_decrypt;
+ params.cb = cb;
+ return get_obj(obj, params, send, req);
+}
+
+int RGWRESTConn::get_obj(const rgw_obj& obj, const get_obj_params& in_params, bool send, RGWRESTStreamRWRequest **req)
+{
+ string url;
+ int ret = get_url(url);
+ if (ret < 0)
+ return ret;
+
+ param_vec_t params;
+ populate_params(params, &in_params.uid, self_zone_group);
+ if (in_params.prepend_metadata) {
+ params.push_back(param_pair_t(RGW_SYS_PARAM_PREFIX "prepend-metadata", "true"));
+ }
+ if (in_params.rgwx_stat) {
+ params.push_back(param_pair_t(RGW_SYS_PARAM_PREFIX "stat", "true"));
+ }
+ if (in_params.sync_manifest) {
+ params.push_back(param_pair_t(RGW_SYS_PARAM_PREFIX "sync-manifest", ""));
+ }
+ if (in_params.skip_decrypt) {
+ params.push_back(param_pair_t(RGW_SYS_PARAM_PREFIX "skip-decrypt", ""));
+ }
+ if (!obj.key.instance.empty()) {
+ const string& instance = obj.key.instance;
+ params.push_back(param_pair_t("versionId", instance));
+ }
+ if (in_params.get_op) {
+ *req = new RGWRESTStreamReadRequest(cct, url, in_params.cb, NULL, &params, host_style);
+ } else {
+ *req = new RGWRESTStreamHeadRequest(cct, url, in_params.cb, NULL, &params);
+ }
+ map<string, string> extra_headers;
+ if (in_params.info) {
+ const auto& orig_map = in_params.info->env->get_map();
+
+ /* add original headers that start with HTTP_X_AMZ_ */
+ static constexpr char SEARCH_AMZ_PREFIX[] = "HTTP_X_AMZ_";
+ for (auto iter= orig_map.lower_bound(SEARCH_AMZ_PREFIX); iter != orig_map.end(); ++iter) {
+ const string& name = iter->first;
+ if (name == "HTTP_X_AMZ_DATE") /* don't forward date from original request */
+ continue;
+ if (name.compare(0, strlen(SEARCH_AMZ_PREFIX), SEARCH_AMZ_PREFIX) != 0)
+ break;
+ extra_headers[iter->first] = iter->second;
+ }
+ }
+
+ set_date_header(in_params.mod_ptr, extra_headers, in_params.high_precision_time, "HTTP_IF_MODIFIED_SINCE");
+ set_date_header(in_params.unmod_ptr, extra_headers, in_params.high_precision_time, "HTTP_IF_UNMODIFIED_SINCE");
+ if (!in_params.etag.empty()) {
+ set_header(in_params.etag, extra_headers, "HTTP_IF_MATCH");
+ }
+ if (in_params.mod_zone_id != 0) {
+ set_header(in_params.mod_zone_id, extra_headers, "HTTP_DEST_ZONE_SHORT_ID");
+ }
+ if (in_params.mod_pg_ver != 0) {
+ set_header(in_params.mod_pg_ver, extra_headers, "HTTP_DEST_PG_VER");
+ }
+ if (in_params.range_is_set) {
+ char buf[64];
+ snprintf(buf, sizeof(buf), "bytes=%lld-%lld", (long long)in_params.range_start, (long long)in_params.range_end);
+ set_header(buf, extra_headers, "RANGE");
+ }
+
+ int r = (*req)->send_prepare(key, extra_headers, obj);
+ if (r < 0) {
+ goto done_err;
+ }
+
+ if (!send) {
+ return 0;
+ }
+
+ r = (*req)->send(nullptr);
+ if (r < 0) {
+ goto done_err;
+ }
+ return 0;
+done_err:
+ delete *req;
+ *req = nullptr;
+ return r;
+}
+
+int RGWRESTConn::complete_request(RGWRESTStreamRWRequest *req,
+ string *etag,
+ real_time *mtime,
+ uint64_t *psize,
+ map<string, string> *pattrs,
+ map<string, string> *pheaders)
+{
+ int ret = req->complete_request(etag, mtime, psize, pattrs, pheaders);
+ delete req;
+
+ return ret;
+}
+
+int RGWRESTConn::get_resource(const string& resource,
+ param_vec_t *extra_params,
+ map<string, string> *extra_headers,
+ bufferlist& bl,
+ bufferlist *send_data,
+ RGWHTTPManager *mgr)
+{
+ string url;
+ int ret = get_url(url);
+ if (ret < 0)
+ return ret;
+
+ param_vec_t params;
+
+ if (extra_params) {
+ params.insert(params.end(), extra_params->begin(), extra_params->end());
+ }
+
+ populate_params(params, nullptr, self_zone_group);
+
+ RGWStreamIntoBufferlist cb(bl);
+
+ RGWRESTStreamReadRequest req(cct, url, &cb, NULL, &params, host_style);
+
+ map<string, string> headers;
+ if (extra_headers) {
+ headers.insert(extra_headers->begin(), extra_headers->end());
+ }
+
+ ret = req.send_request(&key, headers, resource, mgr, send_data);
+ if (ret < 0) {
+ ldout(cct, 5) << __func__ << ": send_request() resource=" << resource << " returned ret=" << ret << dendl;
+ return ret;
+ }
+
+ return req.complete_request();
+}
+
+RGWRESTReadResource::RGWRESTReadResource(RGWRESTConn *_conn,
+ const string& _resource,
+ const rgw_http_param_pair *pp,
+ param_vec_t *extra_headers,
+ RGWHTTPManager *_mgr)
+ : cct(_conn->get_ctx()), conn(_conn), resource(_resource),
+ params(make_param_list(pp)), cb(bl), mgr(_mgr),
+ req(cct, conn->get_url(), &cb, NULL, NULL)
+{
+ init_common(extra_headers);
+}
+
+RGWRESTReadResource::RGWRESTReadResource(RGWRESTConn *_conn,
+ const string& _resource,
+ param_vec_t& _params,
+ param_vec_t *extra_headers,
+ RGWHTTPManager *_mgr)
+ : cct(_conn->get_ctx()), conn(_conn), resource(_resource), params(_params),
+ cb(bl), mgr(_mgr), req(cct, conn->get_url(), &cb, NULL, NULL)
+{
+ init_common(extra_headers);
+}
+
+void RGWRESTReadResource::init_common(param_vec_t *extra_headers)
+{
+ conn->populate_params(params, nullptr, conn->get_self_zonegroup());
+
+ if (extra_headers) {
+ headers.insert(extra_headers->begin(), extra_headers->end());
+ }
+
+ req.set_params(&params);
+}
+
+int RGWRESTReadResource::read()
+{
+ int ret = req.send_request(&conn->get_key(), headers, resource, mgr);
+ if (ret < 0) {
+ ldout(cct, 5) << __func__ << ": send_request() resource=" << resource << " returned ret=" << ret << dendl;
+ return ret;
+ }
+
+ return req.complete_request();
+}
+
+int RGWRESTReadResource::aio_read()
+{
+ int ret = req.send_request(&conn->get_key(), headers, resource, mgr);
+ if (ret < 0) {
+ ldout(cct, 5) << __func__ << ": send_request() resource=" << resource << " returned ret=" << ret << dendl;
+ return ret;
+ }
+
+ return 0;
+}
+
+RGWRESTSendResource::RGWRESTSendResource(RGWRESTConn *_conn,
+ const string& _method,
+ const string& _resource,
+ const rgw_http_param_pair *pp,
+ param_vec_t *extra_headers,
+ RGWHTTPManager *_mgr)
+ : cct(_conn->get_ctx()), conn(_conn), method(_method), resource(_resource),
+ params(make_param_list(pp)), cb(bl), mgr(_mgr),
+ req(cct, method.c_str(), conn->get_url(), &cb, NULL, NULL, _conn->get_host_style())
+{
+ init_common(extra_headers);
+}
+
+RGWRESTSendResource::RGWRESTSendResource(RGWRESTConn *_conn,
+ const string& _method,
+ const string& _resource,
+ param_vec_t& params,
+ param_vec_t *extra_headers,
+ RGWHTTPManager *_mgr)
+ : cct(_conn->get_ctx()), conn(_conn), method(_method), resource(_resource), params(params),
+ cb(bl), mgr(_mgr), req(cct, method.c_str(), conn->get_url(), &cb, NULL, NULL, _conn->get_host_style())
+{
+ init_common(extra_headers);
+}
+
+void RGWRESTSendResource::init_common(param_vec_t *extra_headers)
+{
+ conn->populate_params(params, nullptr, conn->get_self_zonegroup());
+
+ if (extra_headers) {
+ headers.insert(extra_headers->begin(), extra_headers->end());
+ }
+
+ req.set_params(&params);
+}
+
+int RGWRESTSendResource::send(bufferlist& outbl)
+{
+ req.set_send_length(outbl.length());
+ req.set_outbl(outbl);
+
+ int ret = req.send_request(&conn->get_key(), headers, resource, mgr);
+ if (ret < 0) {
+ ldout(cct, 5) << __func__ << ": send_request() resource=" << resource << " returned ret=" << ret << dendl;
+ return ret;
+ }
+
+ return req.complete_request();
+}
+
+int RGWRESTSendResource::aio_send(bufferlist& outbl)
+{
+ req.set_send_length(outbl.length());
+ req.set_outbl(outbl);
+
+ int ret = req.send_request(&conn->get_key(), headers, resource, mgr);
+ if (ret < 0) {
+ ldout(cct, 5) << __func__ << ": send_request() resource=" << resource << " returned ret=" << ret << dendl;
+ return ret;
+ }
+
+ return 0;
+}
diff --git a/src/rgw/rgw_rest_conn.h b/src/rgw/rgw_rest_conn.h
new file mode 100644
index 00000000..9a210292
--- /dev/null
+++ b/src/rgw/rgw_rest_conn.h
@@ -0,0 +1,521 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_REST_CONN_H
+#define CEPH_RGW_REST_CONN_H
+
+#include "rgw_rados.h"
+#include "rgw_rest_client.h"
+#include "common/ceph_json.h"
+#include "common/RefCountedObj.h"
+
+#include <atomic>
+
+class CephContext;
+class RGWSI_Zone;
+
+template <class T>
+static int parse_decode_json(T& t, bufferlist& bl)
+{
+ JSONParser p;
+ if (!p.parse(bl.c_str(), bl.length())) {
+ return -EINVAL;
+ }
+
+ try {
+ decode_json_obj(t, &p);
+ } catch (JSONDecoder::err& e) {
+ return -EINVAL;
+ }
+ return 0;
+}
+
+struct rgw_http_param_pair {
+ const char *key;
+ const char *val;
+};
+
+// append a null-terminated rgw_http_param_pair list into a list of string pairs
+inline void append_param_list(param_vec_t& params, const rgw_http_param_pair* pp)
+{
+ while (pp && pp->key) {
+ string k = pp->key;
+ string v = (pp->val ? pp->val : "");
+ params.emplace_back(make_pair(std::move(k), std::move(v)));
+ ++pp;
+ }
+}
+
+// copy a null-terminated rgw_http_param_pair list into a list of string pairs
+inline param_vec_t make_param_list(const rgw_http_param_pair* pp)
+{
+ param_vec_t params;
+ append_param_list(params, pp);
+ return params;
+}
+
+inline param_vec_t make_param_list(const map<string, string> *pp)
+{
+ param_vec_t params;
+ if (!pp) {
+ return params;
+ }
+ for (auto iter : *pp) {
+ params.emplace_back(make_pair(iter.first, iter.second));
+ }
+ return params;
+}
+
+class RGWRESTConn
+{
+ CephContext *cct;
+ vector<string> endpoints;
+ RGWAccessKey key;
+ string self_zone_group;
+ string remote_id;
+ HostStyle host_style;
+ std::atomic<int64_t> counter = { 0 };
+
+public:
+
+ RGWRESTConn(CephContext *_cct, RGWSI_Zone *zone_svc, const string& _remote_id, const list<string>& endpoints, HostStyle _host_style = PathStyle);
+ RGWRESTConn(CephContext *_cct, RGWSI_Zone *zone_svc, const string& _remote_id, const list<string>& endpoints, RGWAccessKey _cred, HostStyle _host_style = PathStyle);
+
+ // custom move needed for atomic
+ RGWRESTConn(RGWRESTConn&& other);
+ RGWRESTConn& operator=(RGWRESTConn&& other);
+ virtual ~RGWRESTConn() = default;
+
+ int get_url(string& endpoint);
+ string get_url();
+ const string& get_self_zonegroup() {
+ return self_zone_group;
+ }
+ const string& get_remote_id() {
+ return remote_id;
+ }
+ RGWAccessKey& get_key() {
+ return key;
+ }
+
+ HostStyle get_host_style() {
+ return host_style;
+ }
+
+ CephContext *get_ctx() {
+ return cct;
+ }
+ size_t get_endpoint_count() const { return endpoints.size(); }
+
+ virtual void populate_params(param_vec_t& params, const rgw_user *uid, const string& zonegroup);
+
+ /* sync request */
+ int forward(const rgw_user& uid, req_info& info, obj_version *objv, size_t max_response, bufferlist *inbl, bufferlist *outbl);
+
+
+ /* async requests */
+ int put_obj_send_init(rgw_obj& obj, const rgw_http_param_pair *extra_params, RGWRESTStreamS3PutObj **req);
+ int put_obj_async(const rgw_user& uid, rgw_obj& obj, uint64_t obj_size,
+ map<string, bufferlist>& attrs, bool send, RGWRESTStreamS3PutObj **req);
+ int complete_request(RGWRESTStreamS3PutObj *req, string& etag, ceph::real_time *mtime);
+
+ struct get_obj_params {
+ rgw_user uid;
+ req_info *info{nullptr};
+ const ceph::real_time *mod_ptr{nullptr};
+ const ceph::real_time *unmod_ptr{nullptr};
+ bool high_precision_time{true};
+
+ string etag;
+
+ uint32_t mod_zone_id{0};
+ uint64_t mod_pg_ver{0};
+
+ bool prepend_metadata{false};
+ bool get_op{false};
+ bool rgwx_stat{false};
+ bool sync_manifest{false};
+
+ bool skip_decrypt{true};
+ RGWHTTPStreamRWRequest::ReceiveCB *cb{nullptr};
+
+ bool range_is_set{false};
+ uint64_t range_start{0};
+ uint64_t range_end{0};
+ };
+
+ int get_obj(const rgw_obj& obj, const get_obj_params& params, bool send, RGWRESTStreamRWRequest **req);
+
+ int get_obj(const rgw_user& uid, req_info *info /* optional */, const rgw_obj& obj,
+ const ceph::real_time *mod_ptr, const ceph::real_time *unmod_ptr,
+ uint32_t mod_zone_id, uint64_t mod_pg_ver,
+ bool prepend_metadata, bool get_op, bool rgwx_stat, bool sync_manifest,
+ bool skip_decrypt, bool send, RGWHTTPStreamRWRequest::ReceiveCB *cb, RGWRESTStreamRWRequest **req);
+ int complete_request(RGWRESTStreamRWRequest *req,
+ string *etag,
+ ceph::real_time *mtime,
+ uint64_t *psize,
+ map<string, string> *pattrs,
+ map<string, string> *pheaders);
+
+ int get_resource(const string& resource,
+ param_vec_t *extra_params,
+ map<string, string>* extra_headers,
+ bufferlist& bl,
+ bufferlist *send_data = nullptr,
+ RGWHTTPManager *mgr = nullptr);
+
+ template <class T>
+ int get_json_resource(const string& resource, param_vec_t *params, bufferlist *in_data, T& t);
+ template <class T>
+ int get_json_resource(const string& resource, param_vec_t *params, T& t);
+ template <class T>
+ int get_json_resource(const string& resource, const rgw_http_param_pair *pp, T& t);
+
+private:
+ void populate_zonegroup(param_vec_t& params, const string& zonegroup) {
+ if (!zonegroup.empty()) {
+ params.push_back(param_pair_t(RGW_SYS_PARAM_PREFIX "zonegroup", zonegroup));
+ }
+ }
+ void populate_uid(param_vec_t& params, const rgw_user *uid) {
+ if (uid) {
+ string uid_str = uid->to_str();
+ if (!uid->empty()){
+ params.push_back(param_pair_t(RGW_SYS_PARAM_PREFIX "uid", uid_str));
+ }
+ }
+ }
+};
+
+class S3RESTConn : public RGWRESTConn {
+
+public:
+
+ S3RESTConn(CephContext *_cct, RGWSI_Zone *svc_zone, const string& _remote_id, const list<string>& endpoints, HostStyle _host_style = PathStyle) :
+ RGWRESTConn(_cct, svc_zone, _remote_id, endpoints, _host_style) {}
+
+ S3RESTConn(CephContext *_cct, RGWSI_Zone *svc_zone, const string& _remote_id, const list<string>& endpoints, RGWAccessKey _cred, HostStyle _host_style = PathStyle):
+ RGWRESTConn(_cct, svc_zone, _remote_id, endpoints, _cred, _host_style) {}
+ ~S3RESTConn() override = default;
+
+ void populate_params(param_vec_t& params, const rgw_user *uid, const string& zonegroup) override {
+ // do not populate any params in S3 REST Connection.
+ return;
+ }
+};
+
+
+template<class T>
+int RGWRESTConn::get_json_resource(const string& resource, param_vec_t *params, bufferlist *in_data, T& t)
+{
+ bufferlist bl;
+ int ret = get_resource(resource, params, nullptr, bl, in_data);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = parse_decode_json(t, bl);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+template<class T>
+int RGWRESTConn::get_json_resource(const string& resource, param_vec_t *params, T& t)
+{
+ return get_json_resource(resource, params, nullptr, t);
+}
+
+template<class T>
+int RGWRESTConn::get_json_resource(const string& resource, const rgw_http_param_pair *pp, T& t)
+{
+ param_vec_t params = make_param_list(pp);
+ return get_json_resource(resource, &params, t);
+}
+
+class RGWStreamIntoBufferlist : public RGWHTTPStreamRWRequest::ReceiveCB {
+ bufferlist& bl;
+public:
+ explicit RGWStreamIntoBufferlist(bufferlist& _bl) : bl(_bl) {}
+ int handle_data(bufferlist& inbl, bool *pause) override {
+ bl.claim_append(inbl);
+ return inbl.length();
+ }
+};
+
+class RGWRESTReadResource : public RefCountedObject, public RGWIOProvider {
+ CephContext *cct;
+ RGWRESTConn *conn;
+ string resource;
+ param_vec_t params;
+ map<string, string> headers;
+ bufferlist bl;
+ RGWStreamIntoBufferlist cb;
+
+ RGWHTTPManager *mgr;
+ RGWRESTStreamReadRequest req;
+
+ void init_common(param_vec_t *extra_headers);
+
+public:
+ RGWRESTReadResource(RGWRESTConn *_conn,
+ const string& _resource,
+ const rgw_http_param_pair *pp,
+ param_vec_t *extra_headers,
+ RGWHTTPManager *_mgr);
+
+ RGWRESTReadResource(RGWRESTConn *_conn,
+ const string& _resource,
+ param_vec_t& _params,
+ param_vec_t *extra_headers,
+ RGWHTTPManager *_mgr);
+ ~RGWRESTReadResource() = default;
+
+ rgw_io_id get_io_id(int io_type) {
+ return req.get_io_id(io_type);
+ }
+
+ void set_io_user_info(void *user_info) override {
+ req.set_io_user_info(user_info);
+ }
+
+ void *get_io_user_info() override {
+ return req.get_io_user_info();
+ }
+
+ template <class T>
+ int decode_resource(T *dest);
+
+ int read();
+
+ int aio_read();
+
+ string to_str() {
+ return req.to_str();
+ }
+
+ int get_http_status() {
+ return req.get_http_status();
+ }
+
+ int wait(bufferlist *pbl) {
+ int ret = req.wait();
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (req.get_status() < 0) {
+ return req.get_status();
+ }
+ *pbl = bl;
+ return 0;
+ }
+
+ template <class T>
+ int wait(T *dest);
+
+ template <class T>
+ int fetch(T *dest);
+};
+
+
+template <class T>
+int RGWRESTReadResource::decode_resource(T *dest)
+{
+ int ret = req.get_status();
+ if (ret < 0) {
+ return ret;
+ }
+ ret = parse_decode_json(*dest, bl);
+ if (ret < 0) {
+ return ret;
+ }
+ return 0;
+}
+
+template <class T>
+int RGWRESTReadResource::fetch(T *dest)
+{
+ int ret = read();
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = decode_resource(dest);
+ if (ret < 0) {
+ return ret;
+ }
+ return 0;
+}
+
+template <class T>
+int RGWRESTReadResource::wait(T *dest)
+{
+ int ret = req.wait();
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = decode_resource(dest);
+ if (ret < 0) {
+ return ret;
+ }
+ return 0;
+}
+
+class RGWRESTSendResource : public RefCountedObject, public RGWIOProvider {
+ CephContext *cct;
+ RGWRESTConn *conn;
+ string method;
+ string resource;
+ param_vec_t params;
+ map<string, string> headers;
+ bufferlist bl;
+ RGWStreamIntoBufferlist cb;
+
+ RGWHTTPManager *mgr;
+ RGWRESTStreamRWRequest req;
+
+ void init_common(param_vec_t *extra_headers);
+
+public:
+ RGWRESTSendResource(RGWRESTConn *_conn,
+ const string& _method,
+ const string& _resource,
+ const rgw_http_param_pair *pp,
+ param_vec_t *extra_headers,
+ RGWHTTPManager *_mgr);
+
+ RGWRESTSendResource(RGWRESTConn *_conn,
+ const string& _method,
+ const string& _resource,
+ param_vec_t& params,
+ param_vec_t *extra_headers,
+ RGWHTTPManager *_mgr);
+
+ ~RGWRESTSendResource() = default;
+
+ rgw_io_id get_io_id(int io_type) {
+ return req.get_io_id(io_type);
+ }
+
+ void set_io_user_info(void *user_info) override {
+ req.set_io_user_info(user_info);
+ }
+
+ void *get_io_user_info() override {
+ return req.get_io_user_info();
+ }
+
+ int send(bufferlist& bl);
+
+ int aio_send(bufferlist& bl);
+
+ string to_str() {
+ return req.to_str();
+ }
+
+ int get_http_status() {
+ return req.get_http_status();
+ }
+
+ template <class E = int>
+ int wait(bufferlist *pbl, E *err_result = nullptr) {
+ int ret = req.wait();
+ *pbl = bl;
+
+ if (ret < 0 && err_result ) {
+ ret = parse_decode_json(*err_result, bl);
+ }
+
+ return req.get_status();
+ }
+
+ template <class T, class E = int>
+ int wait(T *dest, E *err_result = nullptr);
+};
+
+template <class T, class E>
+int RGWRESTSendResource::wait(T *dest, E *err_result)
+{
+ int ret = req.wait();
+ if (ret >= 0) {
+ ret = req.get_status();
+ }
+
+ if (ret < 0 && err_result) {
+ ret = parse_decode_json(*err_result, bl);
+ }
+
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = parse_decode_json(*dest, bl);
+ if (ret < 0) {
+ return ret;
+ }
+ return 0;
+
+}
+
+class RGWRESTPostResource : public RGWRESTSendResource {
+public:
+ RGWRESTPostResource(RGWRESTConn *_conn,
+ const string& _resource,
+ const rgw_http_param_pair *pp,
+ param_vec_t *extra_headers,
+ RGWHTTPManager *_mgr) : RGWRESTSendResource(_conn, "POST", _resource,
+ pp, extra_headers, _mgr) {}
+
+ RGWRESTPostResource(RGWRESTConn *_conn,
+ const string& _resource,
+ param_vec_t& params,
+ param_vec_t *extra_headers,
+ RGWHTTPManager *_mgr) : RGWRESTSendResource(_conn, "POST", _resource,
+ params, extra_headers, _mgr) {}
+
+};
+
+class RGWRESTPutResource : public RGWRESTSendResource {
+public:
+ RGWRESTPutResource(RGWRESTConn *_conn,
+ const string& _resource,
+ const rgw_http_param_pair *pp,
+ param_vec_t *extra_headers,
+ RGWHTTPManager *_mgr) : RGWRESTSendResource(_conn, "PUT", _resource,
+ pp, extra_headers, _mgr) {}
+
+ RGWRESTPutResource(RGWRESTConn *_conn,
+ const string& _resource,
+ param_vec_t& params,
+ param_vec_t *extra_headers,
+ RGWHTTPManager *_mgr) : RGWRESTSendResource(_conn, "PUT", _resource,
+ params, extra_headers, _mgr) {}
+
+};
+
+class RGWRESTDeleteResource : public RGWRESTSendResource {
+public:
+ RGWRESTDeleteResource(RGWRESTConn *_conn,
+ const string& _resource,
+ const rgw_http_param_pair *pp,
+ param_vec_t *extra_headers,
+ RGWHTTPManager *_mgr) : RGWRESTSendResource(_conn, "DELETE", _resource,
+ pp, extra_headers, _mgr) {}
+
+ RGWRESTDeleteResource(RGWRESTConn *_conn,
+ const string& _resource,
+ param_vec_t& params,
+ param_vec_t *extra_headers,
+ RGWHTTPManager *_mgr) : RGWRESTSendResource(_conn, "DELETE", _resource,
+ params, extra_headers, _mgr) {}
+
+};
+
+
+
+#endif
diff --git a/src/rgw/rgw_rest_iam.cc b/src/rgw/rgw_rest_iam.cc
new file mode 100644
index 00000000..ef0e958d
--- /dev/null
+++ b/src/rgw/rgw_rest_iam.cc
@@ -0,0 +1,147 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <boost/tokenizer.hpp>
+
+#include "rgw_rest.h"
+#include "rgw_rest_iam.h"
+
+#include "rgw_request.h"
+#include "rgw_process.h"
+
+#include "rgw_rest_role.h"
+#include "rgw_rest_user_policy.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+void RGWHandler_REST_IAM::rgw_iam_parse_input()
+{
+ if (post_body.size() > 0) {
+ ldout(s->cct, 10) << "Content of POST: " << post_body << dendl;
+
+ if (post_body.find("Action") != string::npos) {
+ boost::char_separator<char> sep("&");
+ boost::tokenizer<boost::char_separator<char>> tokens(post_body, sep);
+ for (const auto& t : tokens) {
+ auto pos = t.find("=");
+ if (pos != string::npos) {
+ std::string key = t.substr(0, pos);
+ std::string value = t.substr(pos + 1, t.size() - 1);
+ if (key == "AssumeRolePolicyDocument" || key == "Path" || key == "PolicyDocument") {
+ value = url_decode(value);
+ }
+ s->info.args.append(key, value);
+ }
+ }
+ }
+ }
+ auto payload_hash = rgw::auth::s3::calc_v4_payload_hash(post_body);
+ s->info.args.append("PayloadHash", payload_hash);
+}
+
+RGWOp *RGWHandler_REST_IAM::op_post()
+{
+ rgw_iam_parse_input();
+
+ if (s->info.args.exists("Action")) {
+ string action = s->info.args.get("Action");
+ if (action.compare("CreateRole") == 0)
+ return new RGWCreateRole;
+ if (action.compare("DeleteRole") == 0)
+ return new RGWDeleteRole;
+ if (action.compare("GetRole") == 0)
+ return new RGWGetRole;
+ if (action.compare("UpdateAssumeRolePolicy") == 0)
+ return new RGWModifyRole;
+ if (action.compare("ListRoles") == 0)
+ return new RGWListRoles;
+ if (action.compare("PutRolePolicy") == 0)
+ return new RGWPutRolePolicy;
+ if (action.compare("GetRolePolicy") == 0)
+ return new RGWGetRolePolicy;
+ if (action.compare("ListRolePolicies") == 0)
+ return new RGWListRolePolicies;
+ if (action.compare("DeleteRolePolicy") == 0)
+ return new RGWDeleteRolePolicy;
+ if (action.compare("PutUserPolicy") == 0)
+ return new RGWPutUserPolicy;
+ if (action.compare("GetUserPolicy") == 0)
+ return new RGWGetUserPolicy;
+ if (action.compare("ListUserPolicies") == 0)
+ return new RGWListUserPolicies;
+ if (action.compare("DeleteUserPolicy") == 0)
+ return new RGWDeleteUserPolicy;
+ }
+
+ return nullptr;
+}
+
+int RGWHandler_REST_IAM::init(RGWRados *store,
+ struct req_state *s,
+ rgw::io::BasicClient *cio)
+{
+ s->dialect = "iam";
+
+ if (int ret = RGWHandler_REST_IAM::init_from_header(s, RGW_FORMAT_XML, true); ret < 0) {
+ ldout(s->cct, 10) << "init_from_header returned err=" << ret << dendl;
+ return ret;
+ }
+
+ return RGWHandler_REST::init(store, s, cio);
+}
+
+int RGWHandler_REST_IAM::authorize(const DoutPrefixProvider* dpp)
+{
+ return RGW_Auth_S3::authorize(dpp, store, auth_registry, s);
+}
+
+int RGWHandler_REST_IAM::init_from_header(struct req_state* s,
+ int default_formatter,
+ bool configurable_format)
+{
+ string req;
+ string first;
+
+ s->prot_flags = RGW_REST_IAM;
+
+ const char *p, *req_name;
+ if (req_name = s->relative_uri.c_str(); *req_name == '?') {
+ p = req_name;
+ } else {
+ p = s->info.request_params.c_str();
+ }
+
+ s->info.args.set(p);
+ s->info.args.parse();
+
+ /* must be called after the args parsing */
+ if (int ret = allocate_formatter(s, default_formatter, configurable_format); ret < 0)
+ return ret;
+
+ if (*req_name != '/')
+ return 0;
+
+ req_name++;
+
+ if (!*req_name)
+ return 0;
+
+ req = req_name;
+ int pos = req.find('/');
+ if (pos >= 0) {
+ first = req.substr(0, pos);
+ } else {
+ first = req;
+ }
+
+ return 0;
+}
+
+RGWHandler_REST*
+RGWRESTMgr_IAM::get_handler(struct req_state* const s,
+ const rgw::auth::StrategyRegistry& auth_registry,
+ const std::string& frontend_prefix)
+{
+ return new RGWHandler_REST_IAM(auth_registry);
+}
diff --git a/src/rgw/rgw_rest_iam.h b/src/rgw/rgw_rest_iam.h
new file mode 100644
index 00000000..e9dbfcd0
--- /dev/null
+++ b/src/rgw/rgw_rest_iam.h
@@ -0,0 +1,49 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_REST_IAM_H
+#define CEPH_RGW_REST_IAM_H
+
+#include "rgw_auth.h"
+#include "rgw_auth_filters.h"
+
+class RGWHandler_REST_IAM : public RGWHandler_REST {
+ const rgw::auth::StrategyRegistry& auth_registry;
+ const string& post_body;
+ RGWOp *op_post() override;
+ void rgw_iam_parse_input();
+public:
+
+ static int init_from_header(struct req_state *s, int default_formatter, bool configurable_format);
+
+ RGWHandler_REST_IAM(const rgw::auth::StrategyRegistry& auth_registry, const string& post_body="")
+ : RGWHandler_REST(),
+ auth_registry(auth_registry),
+ post_body(post_body) {}
+ ~RGWHandler_REST_IAM() override = default;
+
+ int init(RGWRados *store,
+ struct req_state *s,
+ rgw::io::BasicClient *cio) override;
+ int authorize(const DoutPrefixProvider* dpp) override;
+ int postauth_init() override { return 0; }
+};
+
+class RGWRESTMgr_IAM : public RGWRESTMgr {
+public:
+ RGWRESTMgr_IAM() = default;
+ ~RGWRESTMgr_IAM() override = default;
+
+ RGWRESTMgr *get_resource_mgr(struct req_state* const s,
+ const std::string& uri,
+ std::string* const out_uri) override {
+ return this;
+ }
+
+ RGWHandler_REST* get_handler(struct req_state*,
+ const rgw::auth::StrategyRegistry&,
+ const std::string&) override;
+};
+
+#endif /* CEPH_RGW_REST_STS_H */
+
diff --git a/src/rgw/rgw_rest_log.cc b/src/rgw/rgw_rest_log.cc
new file mode 100644
index 00000000..6daeca16
--- /dev/null
+++ b/src/rgw/rgw_rest_log.cc
@@ -0,0 +1,1060 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2013 eNovance SAS <licensing@enovance.com>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "common/ceph_json.h"
+#include "common/strtol.h"
+#include "rgw_rest.h"
+#include "rgw_op.h"
+#include "rgw_rest_s3.h"
+#include "rgw_rest_log.h"
+#include "rgw_client_io.h"
+#include "rgw_sync.h"
+#include "rgw_data_sync.h"
+#include "rgw_common.h"
+#include "rgw_zone.h"
+
+#include "services/svc_zone.h"
+
+#include "common/errno.h"
+#include "include/ceph_assert.h"
+
+#define dout_context g_ceph_context
+#define LOG_CLASS_LIST_MAX_ENTRIES (1000)
+#define dout_subsys ceph_subsys_rgw
+
+static int parse_date_str(string& in, real_time& out) {
+ uint64_t epoch = 0;
+ uint64_t nsec = 0;
+
+ if (!in.empty()) {
+ if (utime_t::parse_date(in, &epoch, &nsec) < 0) {
+ dout(5) << "Error parsing date " << in << dendl;
+ return -EINVAL;
+ }
+ }
+ out = utime_t(epoch, nsec).to_real_time();
+ return 0;
+}
+
+void RGWOp_MDLog_List::execute() {
+ string period = s->info.args.get("period");
+ string shard = s->info.args.get("id");
+ string max_entries_str = s->info.args.get("max-entries");
+ string st = s->info.args.get("start-time"),
+ et = s->info.args.get("end-time"),
+ marker = s->info.args.get("marker"),
+ err;
+ real_time ut_st,
+ ut_et;
+ void *handle;
+ unsigned shard_id, max_entries = LOG_CLASS_LIST_MAX_ENTRIES;
+
+ shard_id = (unsigned)strict_strtol(shard.c_str(), 10, &err);
+ if (!err.empty()) {
+ dout(5) << "Error parsing shard_id " << shard << dendl;
+ http_ret = -EINVAL;
+ return;
+ }
+
+ if (parse_date_str(st, ut_st) < 0) {
+ http_ret = -EINVAL;
+ return;
+ }
+
+ if (parse_date_str(et, ut_et) < 0) {
+ http_ret = -EINVAL;
+ return;
+ }
+
+ if (!max_entries_str.empty()) {
+ max_entries = (unsigned)strict_strtol(max_entries_str.c_str(), 10, &err);
+ if (!err.empty()) {
+ dout(5) << "Error parsing max-entries " << max_entries_str << dendl;
+ http_ret = -EINVAL;
+ return;
+ }
+ if (max_entries > LOG_CLASS_LIST_MAX_ENTRIES) {
+ max_entries = LOG_CLASS_LIST_MAX_ENTRIES;
+ }
+ }
+
+ if (period.empty()) {
+ ldout(s->cct, 5) << "Missing period id trying to use current" << dendl;
+ period = store->svc.zone->get_current_period_id();
+ if (period.empty()) {
+ ldout(s->cct, 5) << "Missing period id" << dendl;
+ http_ret = -EINVAL;
+ return;
+ }
+ }
+
+ RGWMetadataLog meta_log{s->cct, store, period};
+
+ meta_log.init_list_entries(shard_id, ut_st, ut_et, marker, &handle);
+
+ http_ret = meta_log.list_entries(handle, max_entries, entries,
+ &last_marker, &truncated);
+
+ meta_log.complete_list_entries(handle);
+}
+
+void RGWOp_MDLog_List::send_response() {
+ set_req_state_err(s, http_ret);
+ dump_errno(s);
+ end_header(s);
+
+ if (http_ret < 0)
+ return;
+
+ s->formatter->open_object_section("log_entries");
+ s->formatter->dump_string("marker", last_marker);
+ s->formatter->dump_bool("truncated", truncated);
+ {
+ s->formatter->open_array_section("entries");
+ for (list<cls_log_entry>::iterator iter = entries.begin();
+ iter != entries.end(); ++iter) {
+ cls_log_entry& entry = *iter;
+ store->meta_mgr->dump_log_entry(entry, s->formatter);
+ flusher.flush();
+ }
+ s->formatter->close_section();
+ }
+ s->formatter->close_section();
+ flusher.flush();
+}
+
+void RGWOp_MDLog_Info::execute() {
+ num_objects = s->cct->_conf->rgw_md_log_max_shards;
+ period = store->meta_mgr->read_oldest_log_period();
+ http_ret = period.get_error();
+}
+
+void RGWOp_MDLog_Info::send_response() {
+ set_req_state_err(s, http_ret);
+ dump_errno(s);
+ end_header(s);
+
+ s->formatter->open_object_section("mdlog");
+ s->formatter->dump_unsigned("num_objects", num_objects);
+ if (period) {
+ s->formatter->dump_string("period", period.get_period().get_id());
+ s->formatter->dump_unsigned("realm_epoch", period.get_epoch());
+ }
+ s->formatter->close_section();
+ flusher.flush();
+}
+
+void RGWOp_MDLog_ShardInfo::execute() {
+ string period = s->info.args.get("period");
+ string shard = s->info.args.get("id");
+ string err;
+
+ unsigned shard_id = (unsigned)strict_strtol(shard.c_str(), 10, &err);
+ if (!err.empty()) {
+ dout(5) << "Error parsing shard_id " << shard << dendl;
+ http_ret = -EINVAL;
+ return;
+ }
+
+ if (period.empty()) {
+ ldout(s->cct, 5) << "Missing period id trying to use current" << dendl;
+ period = store->svc.zone->get_current_period_id();
+
+ if (period.empty()) {
+ ldout(s->cct, 5) << "Missing period id" << dendl;
+ http_ret = -EINVAL;
+ return;
+ }
+ }
+ RGWMetadataLog meta_log{s->cct, store, period};
+
+ http_ret = meta_log.get_info(shard_id, &info);
+}
+
+void RGWOp_MDLog_ShardInfo::send_response() {
+ set_req_state_err(s, http_ret);
+ dump_errno(s);
+ end_header(s);
+
+ encode_json("info", info, s->formatter);
+ flusher.flush();
+}
+
+void RGWOp_MDLog_Delete::execute() {
+ string st = s->info.args.get("start-time"),
+ et = s->info.args.get("end-time"),
+ start_marker = s->info.args.get("start-marker"),
+ end_marker = s->info.args.get("end-marker"),
+ period = s->info.args.get("period"),
+ shard = s->info.args.get("id"),
+ err;
+ real_time ut_st,
+ ut_et;
+ unsigned shard_id;
+
+ http_ret = 0;
+
+ shard_id = (unsigned)strict_strtol(shard.c_str(), 10, &err);
+ if (!err.empty()) {
+ dout(5) << "Error parsing shard_id " << shard << dendl;
+ http_ret = -EINVAL;
+ return;
+ }
+ if (et.empty() && end_marker.empty()) { /* bounding end */
+ http_ret = -EINVAL;
+ return;
+ }
+
+ if (parse_date_str(st, ut_st) < 0) {
+ http_ret = -EINVAL;
+ return;
+ }
+
+ if (parse_date_str(et, ut_et) < 0) {
+ http_ret = -EINVAL;
+ return;
+ }
+
+ if (period.empty()) {
+ ldout(s->cct, 5) << "Missing period id trying to use current" << dendl;
+ period = store->svc.zone->get_current_period_id();
+
+ if (period.empty()) {
+ ldout(s->cct, 5) << "Missing period id" << dendl;
+ http_ret = -EINVAL;
+ return;
+ }
+ }
+ RGWMetadataLog meta_log{s->cct, store, period};
+
+ http_ret = meta_log.trim(shard_id, ut_st, ut_et, start_marker, end_marker);
+}
+
+void RGWOp_MDLog_Lock::execute() {
+ string period, shard_id_str, duration_str, locker_id, zone_id;
+ unsigned shard_id;
+
+ http_ret = 0;
+
+ period = s->info.args.get("period");
+ shard_id_str = s->info.args.get("id");
+ duration_str = s->info.args.get("length");
+ locker_id = s->info.args.get("locker-id");
+ zone_id = s->info.args.get("zone-id");
+
+ if (period.empty()) {
+ ldout(s->cct, 5) << "Missing period id trying to use current" << dendl;
+ period = store->svc.zone->get_current_period_id();
+ }
+
+ if (period.empty() ||
+ shard_id_str.empty() ||
+ (duration_str.empty()) ||
+ locker_id.empty() ||
+ zone_id.empty()) {
+ dout(5) << "Error invalid parameter list" << dendl;
+ http_ret = -EINVAL;
+ return;
+ }
+
+ string err;
+ shard_id = (unsigned)strict_strtol(shard_id_str.c_str(), 10, &err);
+ if (!err.empty()) {
+ dout(5) << "Error parsing shard_id param " << shard_id_str << dendl;
+ http_ret = -EINVAL;
+ return;
+ }
+
+ RGWMetadataLog meta_log{s->cct, store, period};
+ unsigned dur;
+ dur = (unsigned)strict_strtol(duration_str.c_str(), 10, &err);
+ if (!err.empty() || dur <= 0) {
+ dout(5) << "invalid length param " << duration_str << dendl;
+ http_ret = -EINVAL;
+ return;
+ }
+ http_ret = meta_log.lock_exclusive(shard_id, make_timespan(dur), zone_id,
+ locker_id);
+ if (http_ret == -EBUSY)
+ http_ret = -ERR_LOCKED;
+}
+
+void RGWOp_MDLog_Unlock::execute() {
+ string period, shard_id_str, locker_id, zone_id;
+ unsigned shard_id;
+
+ http_ret = 0;
+
+ period = s->info.args.get("period");
+ shard_id_str = s->info.args.get("id");
+ locker_id = s->info.args.get("locker-id");
+ zone_id = s->info.args.get("zone-id");
+
+ if (period.empty()) {
+ ldout(s->cct, 5) << "Missing period id trying to use current" << dendl;
+ period = store->svc.zone->get_current_period_id();
+ }
+
+ if (period.empty() ||
+ shard_id_str.empty() ||
+ locker_id.empty() ||
+ zone_id.empty()) {
+ dout(5) << "Error invalid parameter list" << dendl;
+ http_ret = -EINVAL;
+ return;
+ }
+
+ string err;
+ shard_id = (unsigned)strict_strtol(shard_id_str.c_str(), 10, &err);
+ if (!err.empty()) {
+ dout(5) << "Error parsing shard_id param " << shard_id_str << dendl;
+ http_ret = -EINVAL;
+ return;
+ }
+
+ RGWMetadataLog meta_log{s->cct, store, period};
+ http_ret = meta_log.unlock(shard_id, zone_id, locker_id);
+}
+
+void RGWOp_MDLog_Notify::execute() {
+#define LARGE_ENOUGH_BUF (128 * 1024)
+
+ int r = 0;
+ bufferlist data;
+ std::tie(r, data) = rgw_rest_read_all_input(s, LARGE_ENOUGH_BUF);
+ if (r < 0) {
+ http_ret = r;
+ return;
+ }
+
+ char* buf = data.c_str();
+ ldout(s->cct, 20) << __func__ << "(): read data: " << buf << dendl;
+
+ JSONParser p;
+ r = p.parse(buf, data.length());
+ if (r < 0) {
+ ldout(s->cct, 0) << "ERROR: failed to parse JSON" << dendl;
+ http_ret = r;
+ return;
+ }
+
+ set<int> updated_shards;
+ try {
+ decode_json_obj(updated_shards, &p);
+ } catch (JSONDecoder::err& err) {
+ ldout(s->cct, 0) << "ERROR: failed to decode JSON" << dendl;
+ http_ret = -EINVAL;
+ return;
+ }
+
+ if (store->ctx()->_conf->subsys.should_gather<ceph_subsys_rgw, 20>()) {
+ for (set<int>::iterator iter = updated_shards.begin(); iter != updated_shards.end(); ++iter) {
+ ldout(s->cct, 20) << __func__ << "(): updated shard=" << *iter << dendl;
+ }
+ }
+
+ store->wakeup_meta_sync_shards(updated_shards);
+
+ http_ret = 0;
+}
+
+void RGWOp_BILog_List::execute() {
+ string tenant_name = s->info.args.get("tenant"),
+ bucket_name = s->info.args.get("bucket"),
+ marker = s->info.args.get("marker"),
+ max_entries_str = s->info.args.get("max-entries"),
+ bucket_instance = s->info.args.get("bucket-instance");
+ RGWBucketInfo bucket_info;
+ unsigned max_entries;
+
+ if (bucket_name.empty() && bucket_instance.empty()) {
+ dout(5) << "ERROR: neither bucket nor bucket instance specified" << dendl;
+ http_ret = -EINVAL;
+ return;
+ }
+
+ int shard_id;
+ http_ret = rgw_bucket_parse_bucket_instance(bucket_instance, &bucket_instance, &shard_id);
+ if (http_ret < 0) {
+ return;
+ }
+
+ if (!bucket_instance.empty()) {
+ http_ret = store->get_bucket_instance_info(*s->sysobj_ctx, bucket_instance, bucket_info, NULL, NULL);
+ if (http_ret < 0) {
+ dout(5) << "could not get bucket instance info for bucket instance id=" << bucket_instance << dendl;
+ return;
+ }
+ } else { /* !bucket_name.empty() */
+ http_ret = store->get_bucket_info(*s->sysobj_ctx, tenant_name, bucket_name, bucket_info, NULL, NULL);
+ if (http_ret < 0) {
+ dout(5) << "could not get bucket info for bucket=" << bucket_name << dendl;
+ return;
+ }
+ }
+
+ bool truncated;
+ unsigned count = 0;
+ string err;
+
+ max_entries = (unsigned)strict_strtol(max_entries_str.c_str(), 10, &err);
+ if (!err.empty())
+ max_entries = LOG_CLASS_LIST_MAX_ENTRIES;
+
+ send_response();
+ do {
+ list<rgw_bi_log_entry> entries;
+ int ret = store->list_bi_log_entries(bucket_info, shard_id,
+ marker, max_entries - count,
+ entries, &truncated);
+ if (ret < 0) {
+ dout(5) << "ERROR: list_bi_log_entries()" << dendl;
+ return;
+ }
+
+ count += entries.size();
+
+ send_response(entries, marker);
+ } while (truncated && count < max_entries);
+
+ send_response_end();
+}
+
+void RGWOp_BILog_List::send_response() {
+ if (sent_header)
+ return;
+
+ set_req_state_err(s, http_ret);
+ dump_errno(s);
+ end_header(s);
+
+ sent_header = true;
+
+ if (http_ret < 0)
+ return;
+
+ s->formatter->open_array_section("entries");
+}
+
+void RGWOp_BILog_List::send_response(list<rgw_bi_log_entry>& entries, string& marker)
+{
+ for (list<rgw_bi_log_entry>::iterator iter = entries.begin(); iter != entries.end(); ++iter) {
+ rgw_bi_log_entry& entry = *iter;
+ encode_json("entry", entry, s->formatter);
+
+ marker = entry.id;
+ flusher.flush();
+ }
+}
+
+void RGWOp_BILog_List::send_response_end() {
+ s->formatter->close_section();
+ flusher.flush();
+}
+
+void RGWOp_BILog_Info::execute() {
+ string tenant_name = s->info.args.get("tenant"),
+ bucket_name = s->info.args.get("bucket"),
+ bucket_instance = s->info.args.get("bucket-instance");
+ RGWBucketInfo bucket_info;
+
+ if (bucket_name.empty() && bucket_instance.empty()) {
+ dout(5) << "ERROR: neither bucket nor bucket instance specified" << dendl;
+ http_ret = -EINVAL;
+ return;
+ }
+
+ int shard_id;
+ http_ret = rgw_bucket_parse_bucket_instance(bucket_instance, &bucket_instance, &shard_id);
+ if (http_ret < 0) {
+ return;
+ }
+
+ if (!bucket_instance.empty()) {
+ http_ret = store->get_bucket_instance_info(*s->sysobj_ctx, bucket_instance, bucket_info, NULL, NULL);
+ if (http_ret < 0) {
+ dout(5) << "could not get bucket instance info for bucket instance id=" << bucket_instance << dendl;
+ return;
+ }
+ } else { /* !bucket_name.empty() */
+ http_ret = store->get_bucket_info(*s->sysobj_ctx, tenant_name, bucket_name, bucket_info, NULL, NULL);
+ if (http_ret < 0) {
+ dout(5) << "could not get bucket info for bucket=" << bucket_name << dendl;
+ return;
+ }
+ }
+ map<RGWObjCategory, RGWStorageStats> stats;
+ int ret = store->get_bucket_stats(bucket_info, shard_id, &bucket_ver, &master_ver, stats, &max_marker, &syncstopped);
+ if (ret < 0 && ret != -ENOENT) {
+ http_ret = ret;
+ return;
+ }
+}
+
+void RGWOp_BILog_Info::send_response() {
+ set_req_state_err(s, http_ret);
+ dump_errno(s);
+ end_header(s);
+
+ if (http_ret < 0)
+ return;
+
+ s->formatter->open_object_section("info");
+ encode_json("bucket_ver", bucket_ver, s->formatter);
+ encode_json("master_ver", master_ver, s->formatter);
+ encode_json("max_marker", max_marker, s->formatter);
+ encode_json("syncstopped", syncstopped, s->formatter);
+ s->formatter->close_section();
+
+ flusher.flush();
+}
+
+void RGWOp_BILog_Delete::execute() {
+ string tenant_name = s->info.args.get("tenant"),
+ bucket_name = s->info.args.get("bucket"),
+ start_marker = s->info.args.get("start-marker"),
+ end_marker = s->info.args.get("end-marker"),
+ bucket_instance = s->info.args.get("bucket-instance");
+
+ RGWBucketInfo bucket_info;
+
+ http_ret = 0;
+ if ((bucket_name.empty() && bucket_instance.empty()) ||
+ end_marker.empty()) {
+ dout(5) << "ERROR: one of bucket and bucket instance, and also end-marker is mandatory" << dendl;
+ http_ret = -EINVAL;
+ return;
+ }
+
+ int shard_id;
+ http_ret = rgw_bucket_parse_bucket_instance(bucket_instance, &bucket_instance, &shard_id);
+ if (http_ret < 0) {
+ return;
+ }
+
+ if (!bucket_instance.empty()) {
+ http_ret = store->get_bucket_instance_info(*s->sysobj_ctx, bucket_instance, bucket_info, NULL, NULL);
+ if (http_ret < 0) {
+ dout(5) << "could not get bucket instance info for bucket instance id=" << bucket_instance << dendl;
+ return;
+ }
+ } else { /* !bucket_name.empty() */
+ http_ret = store->get_bucket_info(*s->sysobj_ctx, tenant_name, bucket_name, bucket_info, NULL, NULL);
+ if (http_ret < 0) {
+ dout(5) << "could not get bucket info for bucket=" << bucket_name << dendl;
+ return;
+ }
+ }
+ http_ret = store->trim_bi_log_entries(bucket_info, shard_id, start_marker, end_marker);
+ if (http_ret < 0) {
+ dout(5) << "ERROR: trim_bi_log_entries() " << dendl;
+ }
+ return;
+}
+
+void RGWOp_DATALog_List::execute() {
+ string shard = s->info.args.get("id");
+
+ string st = s->info.args.get("start-time"),
+ et = s->info.args.get("end-time"),
+ max_entries_str = s->info.args.get("max-entries"),
+ marker = s->info.args.get("marker"),
+ err;
+ real_time ut_st,
+ ut_et;
+ unsigned shard_id, max_entries = LOG_CLASS_LIST_MAX_ENTRIES;
+
+ s->info.args.get_bool("extra-info", &extra_info, false);
+
+ shard_id = (unsigned)strict_strtol(shard.c_str(), 10, &err);
+ if (!err.empty()) {
+ dout(5) << "Error parsing shard_id " << shard << dendl;
+ http_ret = -EINVAL;
+ return;
+ }
+
+ if (parse_date_str(st, ut_st) < 0) {
+ http_ret = -EINVAL;
+ return;
+ }
+
+ if (parse_date_str(et, ut_et) < 0) {
+ http_ret = -EINVAL;
+ return;
+ }
+
+ if (!max_entries_str.empty()) {
+ max_entries = (unsigned)strict_strtol(max_entries_str.c_str(), 10, &err);
+ if (!err.empty()) {
+ dout(5) << "Error parsing max-entries " << max_entries_str << dendl;
+ http_ret = -EINVAL;
+ return;
+ }
+ if (max_entries > LOG_CLASS_LIST_MAX_ENTRIES) {
+ max_entries = LOG_CLASS_LIST_MAX_ENTRIES;
+ }
+ }
+
+ // Note that last_marker is updated to be the marker of the last
+ // entry listed
+ http_ret = store->data_log->list_entries(shard_id, ut_st, ut_et,
+ max_entries, entries, marker,
+ &last_marker, &truncated);
+}
+
+void RGWOp_DATALog_List::send_response() {
+ set_req_state_err(s, http_ret);
+ dump_errno(s);
+ end_header(s);
+
+ if (http_ret < 0)
+ return;
+
+ s->formatter->open_object_section("log_entries");
+ s->formatter->dump_string("marker", last_marker);
+ s->formatter->dump_bool("truncated", truncated);
+ {
+ s->formatter->open_array_section("entries");
+ for (list<rgw_data_change_log_entry>::iterator iter = entries.begin();
+ iter != entries.end(); ++iter) {
+ rgw_data_change_log_entry& entry = *iter;
+ if (!extra_info) {
+ encode_json("entry", entry.entry, s->formatter);
+ } else {
+ encode_json("entry", entry, s->formatter);
+ }
+ flusher.flush();
+ }
+ s->formatter->close_section();
+ }
+ s->formatter->close_section();
+ flusher.flush();
+}
+
+
+void RGWOp_DATALog_Info::execute() {
+ num_objects = s->cct->_conf->rgw_data_log_num_shards;
+ http_ret = 0;
+}
+
+void RGWOp_DATALog_Info::send_response() {
+ set_req_state_err(s, http_ret);
+ dump_errno(s);
+ end_header(s);
+
+ s->formatter->open_object_section("num_objects");
+ s->formatter->dump_unsigned("num_objects", num_objects);
+ s->formatter->close_section();
+ flusher.flush();
+}
+
+void RGWOp_DATALog_ShardInfo::execute() {
+ string shard = s->info.args.get("id");
+ string err;
+
+ unsigned shard_id = (unsigned)strict_strtol(shard.c_str(), 10, &err);
+ if (!err.empty()) {
+ dout(5) << "Error parsing shard_id " << shard << dendl;
+ http_ret = -EINVAL;
+ return;
+ }
+
+ http_ret = store->data_log->get_info(shard_id, &info);
+}
+
+void RGWOp_DATALog_ShardInfo::send_response() {
+ set_req_state_err(s, http_ret);
+ dump_errno(s);
+ end_header(s);
+
+ encode_json("info", info, s->formatter);
+ flusher.flush();
+}
+
+void RGWOp_DATALog_Lock::execute() {
+ string shard_id_str, duration_str, locker_id, zone_id;
+ unsigned shard_id;
+
+ http_ret = 0;
+
+ shard_id_str = s->info.args.get("id");
+ duration_str = s->info.args.get("length");
+ locker_id = s->info.args.get("locker-id");
+ zone_id = s->info.args.get("zone-id");
+
+ if (shard_id_str.empty() ||
+ (duration_str.empty()) ||
+ locker_id.empty() ||
+ zone_id.empty()) {
+ dout(5) << "Error invalid parameter list" << dendl;
+ http_ret = -EINVAL;
+ return;
+ }
+
+ string err;
+ shard_id = (unsigned)strict_strtol(shard_id_str.c_str(), 10, &err);
+ if (!err.empty()) {
+ dout(5) << "Error parsing shard_id param " << shard_id_str << dendl;
+ http_ret = -EINVAL;
+ return;
+ }
+
+ unsigned dur;
+ dur = (unsigned)strict_strtol(duration_str.c_str(), 10, &err);
+ if (!err.empty() || dur <= 0) {
+ dout(5) << "invalid length param " << duration_str << dendl;
+ http_ret = -EINVAL;
+ return;
+ }
+ http_ret = store->data_log->lock_exclusive(shard_id, make_timespan(dur), zone_id, locker_id);
+ if (http_ret == -EBUSY)
+ http_ret = -ERR_LOCKED;
+}
+
+void RGWOp_DATALog_Unlock::execute() {
+ string shard_id_str, locker_id, zone_id;
+ unsigned shard_id;
+
+ http_ret = 0;
+
+ shard_id_str = s->info.args.get("id");
+ locker_id = s->info.args.get("locker-id");
+ zone_id = s->info.args.get("zone-id");
+
+ if (shard_id_str.empty() ||
+ locker_id.empty() ||
+ zone_id.empty()) {
+ dout(5) << "Error invalid parameter list" << dendl;
+ http_ret = -EINVAL;
+ return;
+ }
+
+ string err;
+ shard_id = (unsigned)strict_strtol(shard_id_str.c_str(), 10, &err);
+ if (!err.empty()) {
+ dout(5) << "Error parsing shard_id param " << shard_id_str << dendl;
+ http_ret = -EINVAL;
+ return;
+ }
+
+ http_ret = store->data_log->unlock(shard_id, zone_id, locker_id);
+}
+
+void RGWOp_DATALog_Notify::execute() {
+ string source_zone = s->info.args.get("source-zone");
+#define LARGE_ENOUGH_BUF (128 * 1024)
+
+ int r = 0;
+ bufferlist data;
+ std::tie(r, data) = rgw_rest_read_all_input(s, LARGE_ENOUGH_BUF);
+ if (r < 0) {
+ http_ret = r;
+ return;
+ }
+
+ char* buf = data.c_str();
+ ldout(s->cct, 20) << __func__ << "(): read data: " << buf << dendl;
+
+ JSONParser p;
+ r = p.parse(buf, data.length());
+ if (r < 0) {
+ ldout(s->cct, 0) << "ERROR: failed to parse JSON" << dendl;
+ http_ret = r;
+ return;
+ }
+
+ map<int, set<string> > updated_shards;
+ try {
+ decode_json_obj(updated_shards, &p);
+ } catch (JSONDecoder::err& err) {
+ ldout(s->cct, 0) << "ERROR: failed to decode JSON" << dendl;
+ http_ret = -EINVAL;
+ return;
+ }
+
+ if (store->ctx()->_conf->subsys.should_gather<ceph_subsys_rgw, 20>()) {
+ for (map<int, set<string> >::iterator iter = updated_shards.begin(); iter != updated_shards.end(); ++iter) {
+ ldout(s->cct, 20) << __func__ << "(): updated shard=" << iter->first << dendl;
+ set<string>& keys = iter->second;
+ for (set<string>::iterator kiter = keys.begin(); kiter != keys.end(); ++kiter) {
+ ldout(s->cct, 20) << __func__ << "(): modified key=" << *kiter << dendl;
+ }
+ }
+ }
+
+ store->wakeup_data_sync_shards(source_zone, updated_shards);
+
+ http_ret = 0;
+}
+
+void RGWOp_DATALog_Delete::execute() {
+ string st = s->info.args.get("start-time"),
+ et = s->info.args.get("end-time"),
+ start_marker = s->info.args.get("start-marker"),
+ end_marker = s->info.args.get("end-marker"),
+ shard = s->info.args.get("id"),
+ err;
+ real_time ut_st,
+ ut_et;
+ unsigned shard_id;
+
+ http_ret = 0;
+
+ shard_id = (unsigned)strict_strtol(shard.c_str(), 10, &err);
+ if (!err.empty()) {
+ dout(5) << "Error parsing shard_id " << shard << dendl;
+ http_ret = -EINVAL;
+ return;
+ }
+ if (et.empty() && end_marker.empty()) { /* bounding end */
+ http_ret = -EINVAL;
+ return;
+ }
+
+ if (parse_date_str(st, ut_st) < 0) {
+ http_ret = -EINVAL;
+ return;
+ }
+
+ if (parse_date_str(et, ut_et) < 0) {
+ http_ret = -EINVAL;
+ return;
+ }
+
+ http_ret = store->data_log->trim_entries(shard_id, ut_st, ut_et, start_marker, end_marker);
+}
+
+// not in header to avoid pulling in rgw_sync.h
+class RGWOp_MDLog_Status : public RGWRESTOp {
+ rgw_meta_sync_status status;
+public:
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("mdlog", RGW_CAP_READ);
+ }
+ int verify_permission() override {
+ return check_caps(s->user->caps);
+ }
+ void execute() override;
+ void send_response() override;
+ const char* name() const override { return "get_metadata_log_status"; }
+};
+
+void RGWOp_MDLog_Status::execute()
+{
+ auto sync = store->get_meta_sync_manager();
+ if (sync == nullptr) {
+ ldout(s->cct, 1) << "no sync manager" << dendl;
+ http_ret = -ENOENT;
+ return;
+ }
+ http_ret = sync->read_sync_status(&status);
+}
+
+void RGWOp_MDLog_Status::send_response()
+{
+ set_req_state_err(s, http_ret);
+ dump_errno(s);
+ end_header(s);
+
+ if (http_ret >= 0) {
+ encode_json("status", status, s->formatter);
+ }
+ flusher.flush();
+}
+
+// not in header to avoid pulling in rgw_data_sync.h
+class RGWOp_BILog_Status : public RGWRESTOp {
+ std::vector<rgw_bucket_shard_sync_info> status;
+public:
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("bilog", RGW_CAP_READ);
+ }
+ int verify_permission() override {
+ return check_caps(s->user->caps);
+ }
+ void execute() override;
+ void send_response() override;
+ const char* name() const override { return "get_bucket_index_log_status"; }
+};
+
+void RGWOp_BILog_Status::execute()
+{
+ const auto source_zone = s->info.args.get("source-zone");
+ const auto key = s->info.args.get("bucket");
+ if (key.empty()) {
+ ldout(s->cct, 4) << "no 'bucket' provided" << dendl;
+ http_ret = -EINVAL;
+ return;
+ }
+
+ rgw_bucket bucket;
+ int shard_id{-1}; // unused
+ http_ret = rgw_bucket_parse_bucket_key(s->cct, key, &bucket, &shard_id);
+ if (http_ret < 0) {
+ ldout(s->cct, 4) << "no 'bucket' provided" << dendl;
+ http_ret = -EINVAL;
+ return;
+ }
+
+ // read the bucket instance info for num_shards
+ auto ctx = store->svc.sysobj->init_obj_ctx();
+ RGWBucketInfo info;
+ http_ret = store->get_bucket_instance_info(ctx, bucket, info, nullptr, nullptr);
+ if (http_ret < 0) {
+ ldout(s->cct, 4) << "failed to read bucket info: " << cpp_strerror(http_ret) << dendl;
+ return;
+ }
+ http_ret = rgw_bucket_sync_status(this, store, source_zone, info, &status);
+}
+
+void RGWOp_BILog_Status::send_response()
+{
+ set_req_state_err(s, http_ret);
+ dump_errno(s);
+ end_header(s);
+
+ if (http_ret >= 0) {
+ encode_json("status", status, s->formatter);
+ }
+ flusher.flush();
+}
+
+// not in header to avoid pulling in rgw_data_sync.h
+class RGWOp_DATALog_Status : public RGWRESTOp {
+ rgw_data_sync_status status;
+public:
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("datalog", RGW_CAP_READ);
+ }
+ int verify_permission() override {
+ return check_caps(s->user->caps);
+ }
+ void execute() override ;
+ void send_response() override;
+ const char* name() const override { return "get_data_changes_log_status"; }
+};
+
+void RGWOp_DATALog_Status::execute()
+{
+ const auto source_zone = s->info.args.get("source-zone");
+ auto sync = store->get_data_sync_manager(source_zone);
+ if (sync == nullptr) {
+ ldout(s->cct, 1) << "no sync manager for source-zone " << source_zone << dendl;
+ http_ret = -ENOENT;
+ return;
+ }
+ http_ret = sync->read_sync_status(&status);
+}
+
+void RGWOp_DATALog_Status::send_response()
+{
+ set_req_state_err(s, http_ret);
+ dump_errno(s);
+ end_header(s);
+
+ if (http_ret >= 0) {
+ encode_json("status", status, s->formatter);
+ }
+ flusher.flush();
+}
+
+
+RGWOp *RGWHandler_Log::op_get() {
+ bool exists;
+ string type = s->info.args.get("type", &exists);
+
+ if (!exists) {
+ return NULL;
+ }
+
+ if (type.compare("metadata") == 0) {
+ if (s->info.args.exists("id")) {
+ if (s->info.args.exists("info")) {
+ return new RGWOp_MDLog_ShardInfo;
+ } else {
+ return new RGWOp_MDLog_List;
+ }
+ } else if (s->info.args.exists("status")) {
+ return new RGWOp_MDLog_Status;
+ } else {
+ return new RGWOp_MDLog_Info;
+ }
+ } else if (type.compare("bucket-index") == 0) {
+ if (s->info.args.exists("info")) {
+ return new RGWOp_BILog_Info;
+ } else if (s->info.args.exists("status")) {
+ return new RGWOp_BILog_Status;
+ } else {
+ return new RGWOp_BILog_List;
+ }
+ } else if (type.compare("data") == 0) {
+ if (s->info.args.exists("id")) {
+ if (s->info.args.exists("info")) {
+ return new RGWOp_DATALog_ShardInfo;
+ } else {
+ return new RGWOp_DATALog_List;
+ }
+ } else if (s->info.args.exists("status")) {
+ return new RGWOp_DATALog_Status;
+ } else {
+ return new RGWOp_DATALog_Info;
+ }
+ }
+ return NULL;
+}
+
+RGWOp *RGWHandler_Log::op_delete() {
+ bool exists;
+ string type = s->info.args.get("type", &exists);
+
+ if (!exists) {
+ return NULL;
+ }
+
+ if (type.compare("metadata") == 0)
+ return new RGWOp_MDLog_Delete;
+ else if (type.compare("bucket-index") == 0)
+ return new RGWOp_BILog_Delete;
+ else if (type.compare("data") == 0)
+ return new RGWOp_DATALog_Delete;
+ return NULL;
+}
+
+RGWOp *RGWHandler_Log::op_post() {
+ bool exists;
+ string type = s->info.args.get("type", &exists);
+
+ if (!exists) {
+ return NULL;
+ }
+
+ if (type.compare("metadata") == 0) {
+ if (s->info.args.exists("lock"))
+ return new RGWOp_MDLog_Lock;
+ else if (s->info.args.exists("unlock"))
+ return new RGWOp_MDLog_Unlock;
+ else if (s->info.args.exists("notify"))
+ return new RGWOp_MDLog_Notify;
+ } else if (type.compare("data") == 0) {
+ if (s->info.args.exists("lock"))
+ return new RGWOp_DATALog_Lock;
+ else if (s->info.args.exists("unlock"))
+ return new RGWOp_DATALog_Unlock;
+ else if (s->info.args.exists("notify"))
+ return new RGWOp_DATALog_Notify;
+ }
+ return NULL;
+}
+
diff --git a/src/rgw/rgw_rest_log.h b/src/rgw/rgw_rest_log.h
new file mode 100644
index 00000000..d5fbf814
--- /dev/null
+++ b/src/rgw/rgw_rest_log.h
@@ -0,0 +1,336 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2013 eNovance SAS <licensing@enovance.com>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef RGW_REST_LOG_H
+#define RGW_REST_LOG_H
+
+#include "rgw_metadata.h"
+
+class RGWOp_BILog_List : public RGWRESTOp {
+ bool sent_header;
+public:
+ RGWOp_BILog_List() : sent_header(false) {}
+ ~RGWOp_BILog_List() override {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("bilog", RGW_CAP_READ);
+ }
+ int verify_permission() override {
+ return check_caps(s->user->caps);
+ }
+ void send_response() override;
+ virtual void send_response(list<rgw_bi_log_entry>& entries, string& marker);
+ virtual void send_response_end();
+ void execute() override;
+ const char* name() const override {
+ return "list_bucket_index_log";
+ }
+};
+
+class RGWOp_BILog_Info : public RGWRESTOp {
+ string bucket_ver;
+ string master_ver;
+ string max_marker;
+ bool syncstopped;
+public:
+ RGWOp_BILog_Info() : bucket_ver(), master_ver(), syncstopped(false) {}
+ ~RGWOp_BILog_Info() override {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("bilog", RGW_CAP_READ);
+ }
+ int verify_permission() override {
+ return check_caps(s->user->caps);
+ }
+ void send_response() override;
+ void execute() override;
+ const char* name() const override {
+ return "bucket_index_log_info";
+ }
+};
+
+class RGWOp_BILog_Delete : public RGWRESTOp {
+public:
+ RGWOp_BILog_Delete() {}
+ ~RGWOp_BILog_Delete() override {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("bilog", RGW_CAP_WRITE);
+ }
+ void execute() override;
+ const char* name() const override {
+ return "trim_bucket_index_log";
+ }
+};
+
+class RGWOp_MDLog_List : public RGWRESTOp {
+ list<cls_log_entry> entries;
+ string last_marker;
+ bool truncated;
+public:
+ RGWOp_MDLog_List() : truncated(false) {}
+ ~RGWOp_MDLog_List() override {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("mdlog", RGW_CAP_READ);
+ }
+ int verify_permission() override {
+ return check_caps(s->user->caps);
+ }
+ void execute() override;
+ void send_response() override;
+ const char* name() const override {
+ return "list_metadata_log";
+ }
+};
+
+class RGWOp_MDLog_Info : public RGWRESTOp {
+ unsigned num_objects;
+ RGWPeriodHistory::Cursor period;
+public:
+ RGWOp_MDLog_Info() : num_objects(0) {}
+ ~RGWOp_MDLog_Info() override {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("mdlog", RGW_CAP_READ);
+ }
+ int verify_permission() override {
+ return check_caps(s->user->caps);
+ }
+ void execute() override;
+ void send_response() override;
+ const char* name() const override {
+ return "get_metadata_log_info";
+ }
+};
+
+class RGWOp_MDLog_ShardInfo : public RGWRESTOp {
+ RGWMetadataLogInfo info;
+public:
+ RGWOp_MDLog_ShardInfo() {}
+ ~RGWOp_MDLog_ShardInfo() override {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("mdlog", RGW_CAP_READ);
+ }
+ int verify_permission() override {
+ return check_caps(s->user->caps);
+ }
+ void execute() override;
+ void send_response() override;
+ const char* name() const override {
+ return "get_metadata_log_shard_info";
+ }
+};
+
+class RGWOp_MDLog_Lock : public RGWRESTOp {
+public:
+ RGWOp_MDLog_Lock() {}
+ ~RGWOp_MDLog_Lock() override {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("mdlog", RGW_CAP_WRITE);
+ }
+ void execute() override;
+ const char* name() const override {
+ return "lock_mdlog_object";
+ }
+};
+
+class RGWOp_MDLog_Unlock : public RGWRESTOp {
+public:
+ RGWOp_MDLog_Unlock() {}
+ ~RGWOp_MDLog_Unlock() override {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("mdlog", RGW_CAP_WRITE);
+ }
+ void execute() override;
+ const char* name() const override {
+ return "unlock_mdlog_object";
+ }
+};
+
+class RGWOp_MDLog_Notify : public RGWRESTOp {
+public:
+ RGWOp_MDLog_Notify() {}
+ ~RGWOp_MDLog_Notify() override {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("mdlog", RGW_CAP_WRITE);
+ }
+ void execute() override;
+ const char* name() const override {
+ return "mdlog_notify";
+ }
+};
+
+class RGWOp_MDLog_Delete : public RGWRESTOp {
+public:
+ RGWOp_MDLog_Delete() {}
+ ~RGWOp_MDLog_Delete() override {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("mdlog", RGW_CAP_WRITE);
+ }
+ void execute() override;
+ const char* name() const override {
+ return "trim_metadata_log";
+ }
+};
+
+class RGWOp_DATALog_List : public RGWRESTOp {
+ list<rgw_data_change_log_entry> entries;
+ string last_marker;
+ bool truncated;
+ bool extra_info;
+public:
+ RGWOp_DATALog_List() : truncated(false), extra_info(false) {}
+ ~RGWOp_DATALog_List() override {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("datalog", RGW_CAP_READ);
+ }
+ int verify_permission() override {
+ return check_caps(s->user->caps);
+ }
+ void execute() override;
+ void send_response() override;
+ const char* name() const override {
+ return "list_data_changes_log";
+ }
+};
+
+class RGWOp_DATALog_Info : public RGWRESTOp {
+ unsigned num_objects;
+public:
+ RGWOp_DATALog_Info() : num_objects(0) {}
+ ~RGWOp_DATALog_Info() override {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("datalog", RGW_CAP_READ);
+ }
+ int verify_permission() override {
+ return check_caps(s->user->caps);
+ }
+ void execute() override;
+ void send_response() override;
+ const char* name() const override {
+ return "get_data_changes_log_info";
+ }
+};
+
+class RGWOp_DATALog_ShardInfo : public RGWRESTOp {
+ RGWDataChangesLogInfo info;
+public:
+ RGWOp_DATALog_ShardInfo() {}
+ ~RGWOp_DATALog_ShardInfo() override {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("datalog", RGW_CAP_READ);
+ }
+ int verify_permission() override {
+ return check_caps(s->user->caps);
+ }
+ void execute() override;
+ void send_response() override;
+ const char* name() const override {
+ return "get_data_changes_log_shard_info";
+ }
+};
+
+class RGWOp_DATALog_Lock : public RGWRESTOp {
+public:
+ RGWOp_DATALog_Lock() {}
+ ~RGWOp_DATALog_Lock() override {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("datalog", RGW_CAP_WRITE);
+ }
+ void execute() override;
+ const char* name() const override {
+ return "lock_datalog_object";
+ }
+};
+
+class RGWOp_DATALog_Unlock : public RGWRESTOp {
+public:
+ RGWOp_DATALog_Unlock() {}
+ ~RGWOp_DATALog_Unlock() override {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("datalog", RGW_CAP_WRITE);
+ }
+ void execute() override;
+ const char* name() const override {
+ return "unlock_datalog_object";
+ }
+};
+
+class RGWOp_DATALog_Notify : public RGWRESTOp {
+public:
+ RGWOp_DATALog_Notify() {}
+ ~RGWOp_DATALog_Notify() override {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("datalog", RGW_CAP_WRITE);
+ }
+ void execute() override;
+ const char* name() const override {
+ return "datalog_notify";
+ }
+};
+
+class RGWOp_DATALog_Delete : public RGWRESTOp {
+public:
+ RGWOp_DATALog_Delete() {}
+ ~RGWOp_DATALog_Delete() override {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("datalog", RGW_CAP_WRITE);
+ }
+ void execute() override;
+ const char* name() const override {
+ return "trim_data_changes_log";
+ }
+};
+
+class RGWHandler_Log : public RGWHandler_Auth_S3 {
+protected:
+ RGWOp *op_get() override;
+ RGWOp *op_delete() override;
+ RGWOp *op_post() override;
+
+ int read_permissions(RGWOp*) override {
+ return 0;
+ }
+public:
+ using RGWHandler_Auth_S3::RGWHandler_Auth_S3;
+ ~RGWHandler_Log() override = default;
+};
+
+class RGWRESTMgr_Log : public RGWRESTMgr {
+public:
+ RGWRESTMgr_Log() = default;
+ ~RGWRESTMgr_Log() override = default;
+
+ RGWHandler_REST* get_handler(struct req_state* const,
+ const rgw::auth::StrategyRegistry& auth_registry,
+ const std::string& frontend_prefixs) override {
+ return new RGWHandler_Log(auth_registry);
+ }
+};
+
+#endif /* RGW_REST_LOG_H */
diff --git a/src/rgw/rgw_rest_metadata.cc b/src/rgw/rgw_rest_metadata.cc
new file mode 100644
index 00000000..0f81d54c
--- /dev/null
+++ b/src/rgw/rgw_rest_metadata.cc
@@ -0,0 +1,363 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2013 eNovance SAS <licensing@enovance.com>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "include/page.h"
+
+#include "rgw_rest.h"
+#include "rgw_op.h"
+#include "rgw_rest_s3.h"
+#include "rgw_rest_metadata.h"
+#include "rgw_client_io.h"
+#include "common/errno.h"
+#include "common/strtol.h"
+#include "rgw/rgw_b64.h"
+#include "include/ceph_assert.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+static inline void frame_metadata_key(req_state *s, string& out) {
+ bool exists;
+ string key = s->info.args.get("key", &exists);
+
+ string section;
+ if (!s->init_state.url_bucket.empty()) {
+ section = s->init_state.url_bucket;
+ } else {
+ section = key;
+ key.clear();
+ }
+
+ out = section;
+
+ if (!key.empty()) {
+ out += string(":") + key;
+ }
+}
+
+void RGWOp_Metadata_Get::execute() {
+ string metadata_key;
+
+ frame_metadata_key(s, metadata_key);
+
+ /* Get keys */
+ http_ret = store->meta_mgr->get(metadata_key, s->formatter);
+ if (http_ret < 0) {
+ dout(5) << "ERROR: can't get key: " << cpp_strerror(http_ret) << dendl;
+ return;
+ }
+
+ http_ret = 0;
+}
+
+void RGWOp_Metadata_Get_Myself::execute() {
+ string owner_id;
+
+ owner_id = s->owner.get_id().to_str();
+ s->info.args.append("key", owner_id);
+
+ return RGWOp_Metadata_Get::execute();
+}
+
+void RGWOp_Metadata_List::execute() {
+ string marker;
+ ldout(s->cct, 16) << __func__
+ << " raw marker " << s->info.args.get("marker")
+ << dendl;
+
+ try {
+ marker = s->info.args.get("marker");
+ if (!marker.empty()) {
+ marker = rgw::from_base64(marker);
+ }
+ ldout(s->cct, 16) << __func__
+ << " marker " << marker << dendl;
+ } catch (...) {
+ marker = std::string("");
+ }
+
+ bool max_entries_specified;
+ string max_entries_str =
+ s->info.args.get("max-entries", &max_entries_specified);
+
+ bool extended_response = (max_entries_specified); /* for backward compatibility, if max-entries is not specified
+ we will send the old response format */
+ uint64_t max_entries = 0;
+
+ if (max_entries_specified) {
+ string err;
+ max_entries = (unsigned)strict_strtol(max_entries_str.c_str(), 10, &err);
+ if (!err.empty()) {
+ dout(5) << "Error parsing max-entries " << max_entries_str << dendl;
+ http_ret = -EINVAL;
+ return;
+ }
+ }
+
+ string metadata_key;
+
+ frame_metadata_key(s, metadata_key);
+ /* List keys */
+ void *handle;
+ int max = 1000;
+
+ /* example markers:
+ marker = "3:b55a9110:root::bu_9:head";
+ marker = "3:b9a8b2a6:root::sorry_janefonda_890:head";
+ marker = "3:bf885d8f:root::sorry_janefonda_665:head";
+ */
+
+ http_ret = store->meta_mgr->list_keys_init(metadata_key, marker, &handle);
+ if (http_ret < 0) {
+ dout(5) << "ERROR: can't get key: " << cpp_strerror(http_ret) << dendl;
+ return;
+ }
+
+ bool truncated;
+ uint64_t count = 0;
+
+ if (extended_response) {
+ s->formatter->open_object_section("result");
+ }
+
+ s->formatter->open_array_section("keys");
+
+ uint64_t left;
+ do {
+ list<string> keys;
+ left = (max_entries_specified ? max_entries - count : max);
+ http_ret = store->meta_mgr->list_keys_next(handle, left, keys, &truncated);
+ if (http_ret < 0) {
+ dout(5) << "ERROR: lists_keys_next(): " << cpp_strerror(http_ret)
+ << dendl;
+ return;
+ }
+
+ for (list<string>::iterator iter = keys.begin(); iter != keys.end();
+ ++iter) {
+ s->formatter->dump_string("key", *iter);
+ ++count;
+ }
+
+ } while (truncated && left > 0);
+
+ s->formatter->close_section();
+
+ if (extended_response) {
+ encode_json("truncated", truncated, s->formatter);
+ encode_json("count", count, s->formatter);
+ if (truncated) {
+ string esc_marker =
+ rgw::to_base64(store->meta_mgr->get_marker(handle));
+ encode_json("marker", esc_marker, s->formatter);
+ }
+ s->formatter->close_section();
+ }
+ store->meta_mgr->list_keys_complete(handle);
+
+ http_ret = 0;
+}
+
+int RGWOp_Metadata_Put::get_data(bufferlist& bl) {
+ size_t cl = 0;
+ char *data;
+ int read_len;
+
+ if (s->length)
+ cl = atoll(s->length);
+ if (cl) {
+ data = (char *)malloc(cl + 1);
+ if (!data) {
+ return -ENOMEM;
+ }
+ read_len = recv_body(s, data, cl);
+ if (cl != (size_t)read_len) {
+ dout(10) << "recv_body incomplete" << dendl;
+ }
+ if (read_len < 0) {
+ free(data);
+ return read_len;
+ }
+ bl.append(data, read_len);
+ } else {
+ int chunk_size = CEPH_PAGE_SIZE;
+ const char *enc = s->info.env->get("HTTP_TRANSFER_ENCODING");
+ if (!enc || strcmp(enc, "chunked")) {
+ return -ERR_LENGTH_REQUIRED;
+ }
+ data = (char *)malloc(chunk_size);
+ if (!data) {
+ return -ENOMEM;
+ }
+ do {
+ read_len = recv_body(s, data, chunk_size);
+ if (read_len < 0) {
+ free(data);
+ return read_len;
+ }
+ bl.append(data, read_len);
+ } while (read_len == chunk_size);
+ }
+
+ free(data);
+ return 0;
+}
+
+void RGWOp_Metadata_Put::execute() {
+ bufferlist bl;
+ string metadata_key;
+
+ http_ret = get_data(bl);
+ if (http_ret < 0) {
+ return;
+ }
+
+ http_ret = do_aws4_auth_completion();
+ if (http_ret < 0) {
+ return;
+ }
+
+ frame_metadata_key(s, metadata_key);
+
+ RGWMetadataHandler::sync_type_t sync_type = RGWMetadataHandler::APPLY_ALWAYS;
+
+ bool mode_exists = false;
+ string mode_string = s->info.args.get("update-type", &mode_exists);
+ if (mode_exists) {
+ bool parsed = RGWMetadataHandler::string_to_sync_type(mode_string,
+ sync_type);
+ if (!parsed) {
+ http_ret = -EINVAL;
+ return;
+ }
+ }
+
+ http_ret = store->meta_mgr->put(metadata_key, bl, sync_type,
+ &ondisk_version);
+ if (http_ret < 0) {
+ dout(5) << "ERROR: can't put key: " << cpp_strerror(http_ret) << dendl;
+ return;
+ }
+ // translate internal codes into return header
+ if (http_ret == STATUS_NO_APPLY)
+ update_status = "skipped";
+ else if (http_ret == STATUS_APPLIED)
+ update_status = "applied";
+}
+
+void RGWOp_Metadata_Put::send_response() {
+ int http_return_code = http_ret;
+ if ((http_ret == STATUS_NO_APPLY) || (http_ret == STATUS_APPLIED))
+ http_return_code = STATUS_NO_CONTENT;
+ set_req_state_err(s, http_return_code);
+ dump_errno(s);
+ stringstream ver_stream;
+ ver_stream << "ver:" << ondisk_version.ver
+ <<",tag:" << ondisk_version.tag;
+ dump_header_if_nonempty(s, "RGWX_UPDATE_STATUS", update_status);
+ dump_header_if_nonempty(s, "RGWX_UPDATE_VERSION", ver_stream.str());
+ end_header(s);
+}
+
+void RGWOp_Metadata_Delete::execute() {
+ string metadata_key;
+
+ frame_metadata_key(s, metadata_key);
+ http_ret = store->meta_mgr->remove(metadata_key);
+ if (http_ret < 0) {
+ dout(5) << "ERROR: can't remove key: " << cpp_strerror(http_ret) << dendl;
+ return;
+ }
+ http_ret = 0;
+}
+
+void RGWOp_Metadata_Lock::execute() {
+ string duration_str, lock_id;
+ string metadata_key;
+
+ frame_metadata_key(s, metadata_key);
+
+ http_ret = 0;
+
+ duration_str = s->info.args.get("length");
+ lock_id = s->info.args.get("lock_id");
+
+ if ((!s->info.args.exists("key")) ||
+ (duration_str.empty()) ||
+ lock_id.empty()) {
+ dout(5) << "Error invalid parameter list" << dendl;
+ http_ret = -EINVAL;
+ return;
+ }
+
+ int dur;
+ string err;
+
+ dur = strict_strtol(duration_str.c_str(), 10, &err);
+ if (!err.empty() || dur <= 0) {
+ dout(5) << "invalid length param " << duration_str << dendl;
+ http_ret = -EINVAL;
+ return;
+ }
+ http_ret = store->meta_mgr->lock_exclusive(metadata_key, make_timespan(dur), lock_id);
+ if (http_ret == -EBUSY)
+ http_ret = -ERR_LOCKED;
+}
+
+void RGWOp_Metadata_Unlock::execute() {
+ string lock_id;
+ string metadata_key;
+
+ frame_metadata_key(s, metadata_key);
+
+ http_ret = 0;
+
+ lock_id = s->info.args.get("lock_id");
+
+ if ((!s->info.args.exists("key")) ||
+ lock_id.empty()) {
+ dout(5) << "Error invalid parameter list" << dendl;
+ http_ret = -EINVAL;
+ return;
+ }
+
+ http_ret = store->meta_mgr->unlock(metadata_key, lock_id);
+}
+
+RGWOp *RGWHandler_Metadata::op_get() {
+ if (s->info.args.exists("myself"))
+ return new RGWOp_Metadata_Get_Myself;
+ if (s->info.args.exists("key"))
+ return new RGWOp_Metadata_Get;
+ else
+ return new RGWOp_Metadata_List;
+}
+
+RGWOp *RGWHandler_Metadata::op_put() {
+ return new RGWOp_Metadata_Put;
+}
+
+RGWOp *RGWHandler_Metadata::op_delete() {
+ return new RGWOp_Metadata_Delete;
+}
+
+RGWOp *RGWHandler_Metadata::op_post() {
+ if (s->info.args.exists("lock"))
+ return new RGWOp_Metadata_Lock;
+ else if (s->info.args.exists("unlock"))
+ return new RGWOp_Metadata_Unlock;
+
+ return NULL;
+}
diff --git a/src/rgw/rgw_rest_metadata.h b/src/rgw/rgw_rest_metadata.h
new file mode 100644
index 00000000..728813c7
--- /dev/null
+++ b/src/rgw/rgw_rest_metadata.h
@@ -0,0 +1,135 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2013 eNovance SAS <licensing@enovance.com>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef RGW_REST_METADATA_H
+#define RGW_REST_METADATA_H
+
+class RGWOp_Metadata_List : public RGWRESTOp {
+public:
+ RGWOp_Metadata_List() {}
+ ~RGWOp_Metadata_List() override {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("metadata", RGW_CAP_READ);
+ }
+ void execute() override;
+ const char* name() const override { return "list_metadata"; }
+};
+
+class RGWOp_Metadata_Get : public RGWRESTOp {
+public:
+ RGWOp_Metadata_Get() {}
+ ~RGWOp_Metadata_Get() override {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("metadata", RGW_CAP_READ);
+ }
+ void execute() override;
+ const char* name() const override { return "get_metadata"; }
+};
+
+class RGWOp_Metadata_Get_Myself : public RGWOp_Metadata_Get {
+public:
+ RGWOp_Metadata_Get_Myself() {}
+ ~RGWOp_Metadata_Get_Myself() override {}
+
+ void execute() override;
+};
+
+class RGWOp_Metadata_Put : public RGWRESTOp {
+ int get_data(bufferlist& bl);
+ string update_status;
+ obj_version ondisk_version;
+public:
+ RGWOp_Metadata_Put() {}
+ ~RGWOp_Metadata_Put() override {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("metadata", RGW_CAP_WRITE);
+ }
+ void execute() override;
+ void send_response() override;
+ const char* name() const override { return "set_metadata"; }
+ RGWOpType get_type() override { return RGW_OP_ADMIN_SET_METADATA; }
+};
+
+class RGWOp_Metadata_Delete : public RGWRESTOp {
+public:
+ RGWOp_Metadata_Delete() {}
+ ~RGWOp_Metadata_Delete() override {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("metadata", RGW_CAP_WRITE);
+ }
+ void execute() override;
+ const char* name() const override { return "remove_metadata"; }
+};
+
+class RGWOp_Metadata_Lock : public RGWRESTOp {
+public:
+ RGWOp_Metadata_Lock() {}
+ ~RGWOp_Metadata_Lock() override {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("metadata", RGW_CAP_WRITE);
+ }
+ void execute() override;
+ const char* name() const override {
+ return "lock_metadata_object";
+ }
+};
+
+class RGWOp_Metadata_Unlock : public RGWRESTOp {
+public:
+ RGWOp_Metadata_Unlock() {}
+ ~RGWOp_Metadata_Unlock() override {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("metadata", RGW_CAP_WRITE);
+ }
+ void execute() override;
+ const char* name() const override {
+ return "unlock_metadata_object";
+ }
+};
+
+class RGWHandler_Metadata : public RGWHandler_Auth_S3 {
+protected:
+ RGWOp *op_get() override;
+ RGWOp *op_put() override;
+ RGWOp *op_delete() override;
+ RGWOp *op_post() override;
+
+ int read_permissions(RGWOp*) override {
+ return 0;
+ }
+public:
+ using RGWHandler_Auth_S3::RGWHandler_Auth_S3;
+ ~RGWHandler_Metadata() override = default;
+};
+
+class RGWRESTMgr_Metadata : public RGWRESTMgr {
+public:
+ RGWRESTMgr_Metadata() = default;
+ ~RGWRESTMgr_Metadata() override = default;
+
+ RGWHandler_REST* get_handler(struct req_state* const s,
+ const rgw::auth::StrategyRegistry& auth_registry,
+ const std::string& frontend_prefix) override {
+ return new RGWHandler_Metadata(auth_registry);
+ }
+};
+
+#endif /* RGW_REST_METADATA_H */
diff --git a/src/rgw/rgw_rest_pubsub.cc b/src/rgw/rgw_rest_pubsub.cc
new file mode 100644
index 00000000..de4babd4
--- /dev/null
+++ b/src/rgw/rgw_rest_pubsub.cc
@@ -0,0 +1,729 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <algorithm>
+#include <boost/tokenizer.hpp>
+#include <optional>
+#include "rgw_rest_pubsub_common.h"
+#include "rgw_rest_pubsub.h"
+#include "rgw_pubsub_push.h"
+#include "rgw_pubsub.h"
+#include "rgw_sync_module_pubsub.h"
+#include "rgw_op.h"
+#include "rgw_rest.h"
+#include "rgw_rest_s3.h"
+#include "rgw_arn.h"
+#include "rgw_auth_s3.h"
+#include "services/svc_zone.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+
+// command (AWS compliant):
+// POST
+// Action=CreateTopic&Name=<topic-name>[&push-endpoint=<endpoint>[&<arg1>=<value1>]]
+class RGWPSCreateTopic_ObjStore_AWS : public RGWPSCreateTopicOp {
+public:
+ int get_params() override {
+ topic_name = s->info.args.get("Name");
+ if (topic_name.empty()) {
+ ldout(s->cct, 1) << "CreateTopic Action 'Name' argument is missing" << dendl;
+ return -EINVAL;
+ }
+
+ opaque_data = s->info.args.get("OpaqueData");
+
+ dest.push_endpoint = s->info.args.get("push-endpoint");
+
+ if (!validate_and_update_endpoint_secret(dest, s->cct, *(s->info.env))) {
+ return -EINVAL;
+ }
+ for (const auto param : s->info.args.get_params()) {
+ if (param.first == "Action" || param.first == "Name" || param.first == "PayloadHash") {
+ continue;
+ }
+ dest.push_endpoint_args.append(param.first+"="+param.second+"&");
+ }
+
+ if (!dest.push_endpoint_args.empty()) {
+ // remove last separator
+ dest.push_endpoint_args.pop_back();
+ }
+
+ // dest object only stores endpoint info
+ // bucket to store events/records will be set only when subscription is created
+ dest.bucket_name = "";
+ dest.oid_prefix = "";
+ dest.arn_topic = topic_name;
+ // the topic ARN will be sent in the reply
+ const rgw::ARN arn(rgw::Partition::aws, rgw::Service::sns,
+ store->svc.zone->get_zonegroup().get_name(),
+ s->user->user_id.tenant, topic_name);
+ topic_arn = arn.to_string();
+ return 0;
+ }
+
+ void send_response() override {
+ if (op_ret) {
+ set_req_state_err(s, op_ret);
+ }
+ dump_errno(s);
+ end_header(s, this, "application/xml");
+
+ if (op_ret < 0) {
+ return;
+ }
+
+ const auto f = s->formatter;
+ f->open_object_section_in_ns("CreateTopicResponse", "https://sns.amazonaws.com/doc/2010-03-31/");
+ f->open_object_section("CreateTopicResult");
+ encode_xml("TopicArn", topic_arn, f);
+ f->close_section();
+ f->open_object_section("ResponseMetadata");
+ encode_xml("RequestId", s->req_id, f);
+ f->close_section();
+ f->close_section();
+ rgw_flush_formatter_and_reset(s, f);
+ }
+};
+
+// command (AWS compliant):
+// POST
+// Action=ListTopics
+class RGWPSListTopics_ObjStore_AWS : public RGWPSListTopicsOp {
+public:
+ void send_response() override {
+ if (op_ret) {
+ set_req_state_err(s, op_ret);
+ }
+ dump_errno(s);
+ end_header(s, this, "application/xml");
+
+ if (op_ret < 0) {
+ return;
+ }
+
+ const auto f = s->formatter;
+ f->open_object_section_in_ns("ListTopicsResponse", "https://sns.amazonaws.com/doc/2010-03-31/");
+ f->open_object_section("ListTopicsResult");
+ encode_xml("Topics", result, f);
+ f->close_section();
+ f->open_object_section("ResponseMetadata");
+ encode_xml("RequestId", s->req_id, f);
+ f->close_section();
+ f->close_section();
+ rgw_flush_formatter_and_reset(s, f);
+ }
+};
+
+// command (extension to AWS):
+// POST
+// Action=GetTopic&TopicArn=<topic-arn>
+class RGWPSGetTopic_ObjStore_AWS : public RGWPSGetTopicOp {
+public:
+ int get_params() override {
+ const auto topic_arn = rgw::ARN::parse((s->info.args.get("TopicArn")));
+
+ if (!topic_arn || topic_arn->resource.empty()) {
+ ldout(s->cct, 1) << "GetTopic Action 'TopicArn' argument is missing or invalid" << dendl;
+ return -EINVAL;
+ }
+
+ topic_name = topic_arn->resource;
+ return 0;
+ }
+
+ void send_response() override {
+ if (op_ret) {
+ set_req_state_err(s, op_ret);
+ }
+ dump_errno(s);
+ end_header(s, this, "application/xml");
+
+ if (op_ret < 0) {
+ return;
+ }
+
+ const auto f = s->formatter;
+ f->open_object_section("GetTopicResponse");
+ f->open_object_section("GetTopicResult");
+ encode_xml("Topic", result.topic, f);
+ f->close_section();
+ f->open_object_section("ResponseMetadata");
+ encode_xml("RequestId", s->req_id, f);
+ f->close_section();
+ f->close_section();
+ rgw_flush_formatter_and_reset(s, f);
+ }
+};
+
+// command (AWS compliant):
+// POST
+// Action=DeleteTopic&TopicArn=<topic-arn>
+class RGWPSDeleteTopic_ObjStore_AWS : public RGWPSDeleteTopicOp {
+public:
+ int get_params() override {
+ const auto topic_arn = rgw::ARN::parse((s->info.args.get("TopicArn")));
+
+ if (!topic_arn || topic_arn->resource.empty()) {
+ ldout(s->cct, 1) << "DeleteTopic Action 'TopicArn' argument is missing or invalid" << dendl;
+ return -EINVAL;
+ }
+
+ topic_name = topic_arn->resource;
+ return 0;
+ }
+
+ void send_response() override {
+ if (op_ret) {
+ set_req_state_err(s, op_ret);
+ }
+ dump_errno(s);
+ end_header(s, this, "application/xml");
+
+ if (op_ret < 0) {
+ return;
+ }
+
+ const auto f = s->formatter;
+ f->open_object_section_in_ns("DeleteTopicResponse", "https://sns.amazonaws.com/doc/2010-03-31/");
+ f->open_object_section("ResponseMetadata");
+ encode_xml("RequestId", s->req_id, f);
+ f->close_section();
+ f->close_section();
+ rgw_flush_formatter_and_reset(s, f);
+ }
+};
+
+namespace {
+// utility classes and functions for handling parameters with the following format:
+// Attributes.entry.{N}.{key|value}={VALUE}
+// N - any unsigned number
+// VALUE - url encoded string
+
+// and Attribute is holding key and value
+// ctor and set are done according to the "type" argument
+// if type is not "key" or "value" its a no-op
+class Attribute {
+ std::string key;
+ std::string value;
+public:
+ Attribute(const std::string& type, const std::string& key_or_value) {
+ set(type, key_or_value);
+ }
+ void set(const std::string& type, const std::string& key_or_value) {
+ if (type == "key") {
+ key = key_or_value;
+ } else if (type == "value") {
+ value = key_or_value;
+ }
+ }
+ const std::string& get_key() const { return key; }
+ const std::string& get_value() const { return value; }
+};
+
+using AttributeMap = std::map<unsigned, Attribute>;
+
+// aggregate the attributes into a map
+// the key and value are associated by the index (N)
+// no assumptions are made on the order in which these parameters are added
+void update_attribute_map(const std::string& input, AttributeMap& map) {
+ const boost::char_separator<char> sep(".");
+ const boost::tokenizer tokens(input, sep);
+ auto token = tokens.begin();
+ if (*token != "Attributes") {
+ return;
+ }
+ ++token;
+
+ if (*token != "entry") {
+ return;
+ }
+ ++token;
+
+ unsigned idx;
+ try {
+ idx = std::stoul(*token);
+ } catch (const std::invalid_argument&) {
+ return;
+ }
+ ++token;
+
+ std::string key_or_value = "";
+ // get the rest of the string regardless of dots
+ // this is to allow dots in the value
+ while (token != tokens.end()) {
+ key_or_value.append(*token+".");
+ ++token;
+ }
+ // remove last separator
+ key_or_value.pop_back();
+
+ auto pos = key_or_value.find("=");
+ if (pos != string::npos) {
+ const auto key_or_value_lhs = key_or_value.substr(0, pos);
+ const auto key_or_value_rhs = url_decode(key_or_value.substr(pos + 1, key_or_value.size() - 1));
+ const auto map_it = map.find(idx);
+ if (map_it == map.end()) {
+ // new entry
+ map.emplace(std::make_pair(idx, Attribute(key_or_value_lhs, key_or_value_rhs)));
+ } else {
+ // existing entry
+ map_it->second.set(key_or_value_lhs, key_or_value_rhs);
+ }
+ }
+}
+}
+
+void RGWHandler_REST_PSTopic_AWS::rgw_topic_parse_input() {
+ if (post_body.size() > 0) {
+ ldout(s->cct, 10) << "Content of POST: " << post_body << dendl;
+
+ if (post_body.find("Action") != string::npos) {
+ const boost::char_separator<char> sep("&");
+ const boost::tokenizer<boost::char_separator<char>> tokens(post_body, sep);
+ AttributeMap map;
+ for (const auto& t : tokens) {
+ auto pos = t.find("=");
+ if (pos != string::npos) {
+ const auto key = t.substr(0, pos);
+ if (key == "Action") {
+ s->info.args.append(key, t.substr(pos + 1, t.size() - 1));
+ } else if (key == "Name" || key == "TopicArn") {
+ const auto value = url_decode(t.substr(pos + 1, t.size() - 1));
+ s->info.args.append(key, value);
+ } else {
+ update_attribute_map(t, map);
+ }
+ }
+ }
+ // update the regular args with the content of the attribute map
+ for (const auto attr : map) {
+ s->info.args.append(attr.second.get_key(), attr.second.get_value());
+ }
+ }
+ const auto payload_hash = rgw::auth::s3::calc_v4_payload_hash(post_body);
+ s->info.args.append("PayloadHash", payload_hash);
+ }
+}
+
+RGWOp* RGWHandler_REST_PSTopic_AWS::op_post() {
+ rgw_topic_parse_input();
+
+ if (s->info.args.exists("Action")) {
+ const auto action = s->info.args.get("Action");
+ if (action.compare("CreateTopic") == 0)
+ return new RGWPSCreateTopic_ObjStore_AWS();
+ if (action.compare("DeleteTopic") == 0)
+ return new RGWPSDeleteTopic_ObjStore_AWS;
+ if (action.compare("ListTopics") == 0)
+ return new RGWPSListTopics_ObjStore_AWS();
+ if (action.compare("GetTopic") == 0)
+ return new RGWPSGetTopic_ObjStore_AWS();
+ }
+
+ return nullptr;
+}
+
+int RGWHandler_REST_PSTopic_AWS::authorize(const DoutPrefixProvider* dpp) {
+ /*if (s->info.args.exists("Action") && s->info.args.get("Action").find("Topic") != std::string::npos) {
+ // TODO: some topic specific authorization
+ return 0;
+ }*/
+ return RGW_Auth_S3::authorize(dpp, store, auth_registry, s);
+}
+
+
+namespace {
+// return a unique topic by prefexing with the notification name: <notification>_<topic>
+std::string topic_to_unique(const std::string& topic, const std::string& notification) {
+ return notification + "_" + topic;
+}
+
+// extract the topic from a unique topic of the form: <notification>_<topic>
+[[maybe_unused]] std::string unique_to_topic(const std::string& unique_topic, const std::string& notification) {
+ if (unique_topic.find(notification + "_") == string::npos) {
+ return "";
+ }
+ return unique_topic.substr(notification.length() + 1);
+}
+
+// from list of bucket topics, find the one that was auto-generated by a notification
+auto find_unique_topic(const rgw_pubsub_bucket_topics& bucket_topics, const std::string& notif_name) {
+ auto it = std::find_if(bucket_topics.topics.begin(), bucket_topics.topics.end(), [&](const auto& val) { return notif_name == val.second.s3_id; });
+ return it != bucket_topics.topics.end() ?
+ std::optional<std::reference_wrapper<const rgw_pubsub_topic_filter>>(it->second):
+ std::nullopt;
+}
+}
+
+// command (S3 compliant): PUT /<bucket name>?notification
+// a "notification" and a subscription will be auto-generated
+// actual configuration is XML encoded in the body of the message
+class RGWPSCreateNotif_ObjStore_S3 : public RGWPSCreateNotifOp {
+ rgw_pubsub_s3_notifications configurations;
+
+ int get_params_from_body() {
+ const auto max_size = s->cct->_conf->rgw_max_put_param_size;
+ int r;
+ bufferlist data;
+ std::tie(r, data) = rgw_rest_read_all_input(s, max_size, false);
+
+ if (r < 0) {
+ ldout(s->cct, 1) << "failed to read XML payload" << dendl;
+ return r;
+ }
+ if (data.length() == 0) {
+ ldout(s->cct, 1) << "XML payload missing" << dendl;
+ return -EINVAL;
+ }
+
+ RGWXMLDecoder::XMLParser parser;
+
+ if (!parser.init()){
+ ldout(s->cct, 1) << "failed to initialize XML parser" << dendl;
+ return -EINVAL;
+ }
+ if (!parser.parse(data.c_str(), data.length(), 1)) {
+ ldout(s->cct, 1) << "failed to parse XML payload" << dendl;
+ return -ERR_MALFORMED_XML;
+ }
+ try {
+ // NotificationConfigurations is mandatory
+ RGWXMLDecoder::decode_xml("NotificationConfiguration", configurations, &parser, true);
+ } catch (RGWXMLDecoder::err& err) {
+ ldout(s->cct, 1) << "failed to parse XML payload. error: " << err << dendl;
+ return -ERR_MALFORMED_XML;
+ }
+ return 0;
+ }
+
+ int get_params() override {
+ bool exists;
+ const auto no_value = s->info.args.get("notification", &exists);
+ if (!exists) {
+ ldout(s->cct, 1) << "missing required param 'notification'" << dendl;
+ return -EINVAL;
+ }
+ if (no_value.length() > 0) {
+ ldout(s->cct, 1) << "param 'notification' should not have any value" << dendl;
+ return -EINVAL;
+ }
+ if (s->bucket_name.empty()) {
+ ldout(s->cct, 1) << "request must be on a bucket" << dendl;
+ return -EINVAL;
+ }
+ bucket_name = s->bucket_name;
+ return 0;
+ }
+
+public:
+ const char* name() const override { return "pubsub_notification_create_s3"; }
+ void execute() override;
+};
+
+void RGWPSCreateNotif_ObjStore_S3::execute() {
+ op_ret = get_params_from_body();
+ if (op_ret < 0) {
+ return;
+ }
+
+ ups.emplace(store, s->owner.get_id());
+ auto b = ups->get_bucket(bucket_info.bucket);
+ ceph_assert(b);
+ std::string data_bucket_prefix = "";
+ std::string data_oid_prefix = "";
+ bool push_only = true;
+ if (store->get_sync_module()) {
+ const auto psmodule = dynamic_cast<RGWPSSyncModuleInstance*>(store->get_sync_module().get());
+ if (psmodule) {
+ const auto& conf = psmodule->get_effective_conf();
+ data_bucket_prefix = conf["data_bucket_prefix"];
+ data_oid_prefix = conf["data_oid_prefix"];
+ // TODO: allow "push-only" on PS zone as well
+ push_only = false;
+ }
+ }
+
+ for (const auto& c : configurations.list) {
+ const auto& notif_name = c.id;
+ if (notif_name.empty()) {
+ ldout(s->cct, 1) << "missing notification id" << dendl;
+ op_ret = -EINVAL;
+ return;
+ }
+ if (c.topic_arn.empty()) {
+ ldout(s->cct, 1) << "missing topic ARN in notification: '" << notif_name << "'" << dendl;
+ op_ret = -EINVAL;
+ return;
+ }
+
+ const auto arn = rgw::ARN::parse(c.topic_arn);
+ if (!arn || arn->resource.empty()) {
+ ldout(s->cct, 1) << "topic ARN has invalid format: '" << c.topic_arn << "' in notification: '" << notif_name << "'" << dendl;
+ op_ret = -EINVAL;
+ return;
+ }
+
+ if (std::find(c.events.begin(), c.events.end(), rgw::notify::UnknownEvent) != c.events.end()) {
+ ldout(s->cct, 1) << "unknown event type in notification: '" << notif_name << "'" << dendl;
+ op_ret = -EINVAL;
+ return;
+ }
+
+ const auto topic_name = arn->resource;
+
+ // get topic information. destination information is stored in the topic
+ rgw_pubsub_topic topic_info;
+ op_ret = ups->get_topic(topic_name, &topic_info);
+ if (op_ret < 0) {
+ ldout(s->cct, 1) << "failed to get topic '" << topic_name << "', ret=" << op_ret << dendl;
+ return;
+ }
+ // make sure that full topic configuration match
+ // TODO: use ARN match function
+
+ // create unique topic name. this has 2 reasons:
+ // (1) topics cannot be shared between different S3 notifications because they hold the filter information
+ // (2) make topic clneaup easier, when notification is removed
+ const auto unique_topic_name = topic_to_unique(topic_name, notif_name);
+ // generate the internal topic. destination is stored here for the "push-only" case
+ // when no subscription exists
+ // ARN is cached to make the "GET" method faster
+ op_ret = ups->create_topic(unique_topic_name, topic_info.dest, topic_info.arn, topic_info.opaque_data);
+ if (op_ret < 0) {
+ ldout(s->cct, 1) << "failed to auto-generate unique topic '" << unique_topic_name <<
+ "', ret=" << op_ret << dendl;
+ return;
+ }
+ ldout(s->cct, 20) << "successfully auto-generated unique topic '" << unique_topic_name << "'" << dendl;
+ // generate the notification
+ rgw::notify::EventTypeList events;
+ op_ret = b->create_notification(unique_topic_name, c.events, std::make_optional(c.filter), notif_name);
+ if (op_ret < 0) {
+ ldout(s->cct, 1) << "failed to auto-generate notification for unique topic '" << unique_topic_name <<
+ "', ret=" << op_ret << dendl;
+ // rollback generated topic (ignore return value)
+ ups->remove_topic(unique_topic_name);
+ return;
+ }
+ ldout(s->cct, 20) << "successfully auto-generated notification for unique topic '" << unique_topic_name << "'" << dendl;
+
+ if (!push_only) {
+ // generate the subscription with destination information from the original topic
+ rgw_pubsub_sub_dest dest = topic_info.dest;
+ dest.bucket_name = data_bucket_prefix + s->owner.get_id().to_str() + "-" + unique_topic_name;
+ dest.oid_prefix = data_oid_prefix + notif_name + "/";
+ auto sub = ups->get_sub(notif_name);
+ op_ret = sub->subscribe(unique_topic_name, dest, notif_name);
+ if (op_ret < 0) {
+ ldout(s->cct, 1) << "failed to auto-generate subscription '" << notif_name << "', ret=" << op_ret << dendl;
+ // rollback generated notification (ignore return value)
+ b->remove_notification(unique_topic_name);
+ // rollback generated topic (ignore return value)
+ ups->remove_topic(unique_topic_name);
+ return;
+ }
+ ldout(s->cct, 20) << "successfully auto-generated subscription '" << notif_name << "'" << dendl;
+ }
+ }
+}
+
+// command (extension to S3): DELETE /bucket?notification[=<notification-id>]
+class RGWPSDeleteNotif_ObjStore_S3 : public RGWPSDeleteNotifOp {
+private:
+ std::string notif_name;
+
+ int get_params() override {
+ bool exists;
+ notif_name = s->info.args.get("notification", &exists);
+ if (!exists) {
+ ldout(s->cct, 1) << "missing required param 'notification'" << dendl;
+ return -EINVAL;
+ }
+ if (s->bucket_name.empty()) {
+ ldout(s->cct, 1) << "request must be on a bucket" << dendl;
+ return -EINVAL;
+ }
+ bucket_name = s->bucket_name;
+ return 0;
+ }
+
+ void remove_notification_by_topic(const std::string& topic_name, const RGWUserPubSub::BucketRef& b) {
+ op_ret = b->remove_notification(topic_name);
+ if (op_ret < 0) {
+ ldout(s->cct, 1) << "failed to remove notification of topic '" << topic_name << "', ret=" << op_ret << dendl;
+ }
+ op_ret = ups->remove_topic(topic_name);
+ if (op_ret < 0) {
+ ldout(s->cct, 1) << "failed to remove auto-generated topic '" << topic_name << "', ret=" << op_ret << dendl;
+ }
+ }
+
+public:
+ void execute() override;
+ const char* name() const override { return "pubsub_notification_delete_s3"; }
+};
+
+void RGWPSDeleteNotif_ObjStore_S3::execute() {
+ op_ret = get_params();
+ if (op_ret < 0) {
+ return;
+ }
+
+ ups.emplace(store, s->owner.get_id());
+ auto b = ups->get_bucket(bucket_info.bucket);
+ ceph_assert(b);
+
+ // get all topics on a bucket
+ rgw_pubsub_bucket_topics bucket_topics;
+ op_ret = b->get_topics(&bucket_topics);
+ if (op_ret < 0) {
+ ldout(s->cct, 1) << "failed to get list of topics from bucket '" << bucket_info.bucket.name << "', ret=" << op_ret << dendl;
+ return;
+ }
+
+ if (!notif_name.empty()) {
+ // delete a specific notification
+ const auto unique_topic = find_unique_topic(bucket_topics, notif_name);
+ if (unique_topic) {
+ // remove the auto generated subscription according to notification name (if exist)
+ const auto unique_topic_name = unique_topic->get().topic.name;
+ auto sub = ups->get_sub(notif_name);
+ op_ret = sub->unsubscribe(unique_topic_name);
+ if (op_ret < 0 && op_ret != -ENOENT) {
+ ldout(s->cct, 1) << "failed to remove auto-generated subscription '" << notif_name << "', ret=" << op_ret << dendl;
+ return;
+ }
+ remove_notification_by_topic(unique_topic_name, b);
+ return;
+ }
+ // notification to be removed is not found - considered success
+ ldout(s->cct, 20) << "notification '" << notif_name << "' already removed" << dendl;
+ return;
+ }
+
+ // delete all notification of on a bucket
+ for (const auto& topic : bucket_topics.topics) {
+ // remove the auto generated subscription of the topic (if exist)
+ rgw_pubsub_topic_subs topic_subs;
+ op_ret = ups->get_topic(topic.first, &topic_subs);
+ for (const auto& topic_sub_name : topic_subs.subs) {
+ auto sub = ups->get_sub(topic_sub_name);
+ rgw_pubsub_sub_config sub_conf;
+ op_ret = sub->get_conf(&sub_conf);
+ if (op_ret < 0) {
+ ldout(s->cct, 1) << "failed to get subscription '" << topic_sub_name << "' info, ret=" << op_ret << dendl;
+ return;
+ }
+ if (!sub_conf.s3_id.empty()) {
+ // S3 notification, has autogenerated subscription
+ const auto& sub_topic_name = sub_conf.topic;
+ op_ret = sub->unsubscribe(sub_topic_name);
+ if (op_ret < 0) {
+ ldout(s->cct, 1) << "failed to remove auto-generated subscription '" << topic_sub_name << "', ret=" << op_ret << dendl;
+ return;
+ }
+ }
+ }
+ remove_notification_by_topic(topic.first, b);
+ }
+}
+
+// command (S3 compliant): GET /bucket?notification[=<notification-id>]
+class RGWPSListNotifs_ObjStore_S3 : public RGWPSListNotifsOp {
+private:
+ std::string notif_name;
+ rgw_pubsub_s3_notifications notifications;
+
+ int get_params() override {
+ bool exists;
+ notif_name = s->info.args.get("notification", &exists);
+ if (!exists) {
+ ldout(s->cct, 1) << "missing required param 'notification'" << dendl;
+ return -EINVAL;
+ }
+ if (s->bucket_name.empty()) {
+ ldout(s->cct, 1) << "request must be on a bucket" << dendl;
+ return -EINVAL;
+ }
+ bucket_name = s->bucket_name;
+ return 0;
+ }
+
+public:
+ void execute() override;
+ void send_response() override {
+ if (op_ret) {
+ set_req_state_err(s, op_ret);
+ }
+ dump_errno(s);
+ end_header(s, this, "application/xml");
+
+ if (op_ret < 0) {
+ return;
+ }
+ notifications.dump_xml(s->formatter);
+ rgw_flush_formatter_and_reset(s, s->formatter);
+ }
+ const char* name() const override { return "pubsub_notifications_get_s3"; }
+};
+
+void RGWPSListNotifs_ObjStore_S3::execute() {
+ ups.emplace(store, s->owner.get_id());
+ auto b = ups->get_bucket(bucket_info.bucket);
+ ceph_assert(b);
+
+ // get all topics on a bucket
+ rgw_pubsub_bucket_topics bucket_topics;
+ op_ret = b->get_topics(&bucket_topics);
+ if (op_ret < 0) {
+ ldout(s->cct, 1) << "failed to get list of topics from bucket '" << bucket_info.bucket.name << "', ret=" << op_ret << dendl;
+ return;
+ }
+ if (!notif_name.empty()) {
+ // get info of a specific notification
+ const auto unique_topic = find_unique_topic(bucket_topics, notif_name);
+ if (unique_topic) {
+ notifications.list.emplace_back(unique_topic->get());
+ return;
+ }
+ op_ret = -ENOENT;
+ ldout(s->cct, 1) << "failed to get notification info for '" << notif_name << "', ret=" << op_ret << dendl;
+ return;
+ }
+ // loop through all topics of the bucket
+ for (const auto& topic : bucket_topics.topics) {
+ if (topic.second.s3_id.empty()) {
+ // not an s3 notification
+ continue;
+ }
+ notifications.list.emplace_back(topic.second);
+ }
+}
+
+RGWOp* RGWHandler_REST_PSNotifs_S3::op_get() {
+ return new RGWPSListNotifs_ObjStore_S3();
+}
+
+RGWOp* RGWHandler_REST_PSNotifs_S3::op_put() {
+ return new RGWPSCreateNotif_ObjStore_S3();
+}
+
+RGWOp* RGWHandler_REST_PSNotifs_S3::op_delete() {
+ return new RGWPSDeleteNotif_ObjStore_S3();
+}
+
+RGWOp* RGWHandler_REST_PSNotifs_S3::create_get_op() {
+ return new RGWPSListNotifs_ObjStore_S3();
+}
+
+RGWOp* RGWHandler_REST_PSNotifs_S3::create_put_op() {
+ return new RGWPSCreateNotif_ObjStore_S3();
+}
+
+RGWOp* RGWHandler_REST_PSNotifs_S3::create_delete_op() {
+ return new RGWPSDeleteNotif_ObjStore_S3();
+}
+
diff --git a/src/rgw/rgw_rest_pubsub.h b/src/rgw/rgw_rest_pubsub.h
new file mode 100644
index 00000000..f2f63356
--- /dev/null
+++ b/src/rgw/rgw_rest_pubsub.h
@@ -0,0 +1,41 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#pragma once
+
+#include "rgw_rest_s3.h"
+
+// s3 compliant notification handler factory
+class RGWHandler_REST_PSNotifs_S3 : public RGWHandler_REST_S3 {
+protected:
+ int init_permissions(RGWOp* op) override {return 0;}
+ int read_permissions(RGWOp* op) override {return 0;}
+ bool supports_quota() override {return false;}
+ RGWOp* op_get() override;
+ RGWOp* op_put() override;
+ RGWOp* op_delete() override;
+public:
+ using RGWHandler_REST_S3::RGWHandler_REST_S3;
+ virtual ~RGWHandler_REST_PSNotifs_S3() = default;
+ // following are used to generate the operations when invoked by another REST handler
+ static RGWOp* create_get_op();
+ static RGWOp* create_put_op();
+ static RGWOp* create_delete_op();
+};
+
+// AWS compliant topics handler factory
+class RGWHandler_REST_PSTopic_AWS : public RGWHandler_REST {
+ const rgw::auth::StrategyRegistry& auth_registry;
+ const std::string& post_body;
+ void rgw_topic_parse_input();
+ //static int init_from_header(struct req_state *s, int default_formatter, bool configurable_format);
+protected:
+ RGWOp* op_post() override;
+public:
+ RGWHandler_REST_PSTopic_AWS(const rgw::auth::StrategyRegistry& _auth_registry, const std::string& _post_body) :
+ auth_registry(_auth_registry),
+ post_body(_post_body) {}
+ virtual ~RGWHandler_REST_PSTopic_AWS() = default;
+ int postauth_init() override { return 0; }
+ int authorize(const DoutPrefixProvider* dpp) override;
+};
+
diff --git a/src/rgw/rgw_rest_pubsub_common.cc b/src/rgw/rgw_rest_pubsub_common.cc
new file mode 100644
index 00000000..3b5de53f
--- /dev/null
+++ b/src/rgw/rgw_rest_pubsub_common.cc
@@ -0,0 +1,259 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "rgw_common.h"
+#include "rgw_rest_pubsub_common.h"
+#include "common/dout.h"
+#include "rgw_url.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+bool validate_and_update_endpoint_secret(rgw_pubsub_sub_dest& dest, CephContext *cct, const RGWEnv& env) {
+ if (dest.push_endpoint.empty()) {
+ return true;
+ }
+ std::string user;
+ std::string password;
+ if (!rgw::parse_url_userinfo(dest.push_endpoint, user, password)) {
+ ldout(cct, 1) << "endpoint validation error: malformed endpoint URL:" << dest.push_endpoint << dendl;
+ return false;
+ }
+ // this should be verified inside parse_url()
+ ceph_assert(user.empty() == password.empty());
+ if (!user.empty()) {
+ dest.stored_secret = true;
+ if (!rgw_transport_is_secure(cct, env)) {
+ ldout(cct, 1) << "endpoint validation error: sending password over insecure transport" << dendl;
+ return false;
+ }
+ }
+ return true;
+}
+
+bool subscription_has_endpoint_secret(const rgw_pubsub_sub_config& sub) {
+ return sub.dest.stored_secret;
+}
+
+bool topic_has_endpoint_secret(const rgw_pubsub_topic_subs& topic) {
+ return topic.topic.dest.stored_secret;
+}
+
+bool topics_has_endpoint_secret(const rgw_pubsub_user_topics& topics) {
+ for (const auto& topic : topics.topics) {
+ if (topic_has_endpoint_secret(topic.second)) return true;
+ }
+ return false;
+}
+void RGWPSCreateTopicOp::execute() {
+ op_ret = get_params();
+ if (op_ret < 0) {
+ return;
+ }
+
+ ups.emplace(store, s->owner.get_id());
+ op_ret = ups->create_topic(topic_name, dest, topic_arn, opaque_data);
+ if (op_ret < 0) {
+ ldout(s->cct, 1) << "failed to create topic '" << topic_name << "', ret=" << op_ret << dendl;
+ return;
+ }
+ ldout(s->cct, 20) << "successfully created topic '" << topic_name << "'" << dendl;
+}
+
+void RGWPSListTopicsOp::execute() {
+ ups.emplace(store, s->owner.get_id());
+ op_ret = ups->get_user_topics(&result);
+ // if there are no topics it is not considered an error
+ op_ret = op_ret == -ENOENT ? 0 : op_ret;
+ if (op_ret < 0) {
+ ldout(s->cct, 1) << "failed to get topics, ret=" << op_ret << dendl;
+ return;
+ }
+ if (topics_has_endpoint_secret(result) && !rgw_transport_is_secure(s->cct, *(s->info.env))) {
+ ldout(s->cct, 1) << "topics contain secret and cannot be sent over insecure transport" << dendl;
+ op_ret = -EPERM;
+ return;
+ }
+ ldout(s->cct, 20) << "successfully got topics" << dendl;
+}
+
+void RGWPSGetTopicOp::execute() {
+ op_ret = get_params();
+ if (op_ret < 0) {
+ return;
+ }
+ ups.emplace(store, s->owner.get_id());
+ op_ret = ups->get_topic(topic_name, &result);
+ if (topic_has_endpoint_secret(result) && !rgw_transport_is_secure(s->cct, *(s->info.env))) {
+ ldout(s->cct, 1) << "topic '" << topic_name << "' contain secret and cannot be sent over insecure transport" << dendl;
+ op_ret = -EPERM;
+ return;
+ }
+ if (op_ret < 0) {
+ ldout(s->cct, 1) << "failed to get topic '" << topic_name << "', ret=" << op_ret << dendl;
+ return;
+ }
+ ldout(s->cct, 1) << "successfully got topic '" << topic_name << "'" << dendl;
+}
+
+void RGWPSDeleteTopicOp::execute() {
+ op_ret = get_params();
+ if (op_ret < 0) {
+ return;
+ }
+ ups.emplace(store, s->owner.get_id());
+ op_ret = ups->remove_topic(topic_name);
+ if (op_ret < 0) {
+ ldout(s->cct, 1) << "failed to remove topic '" << topic_name << ", ret=" << op_ret << dendl;
+ return;
+ }
+ ldout(s->cct, 1) << "successfully removed topic '" << topic_name << "'" << dendl;
+}
+
+void RGWPSCreateSubOp::execute() {
+ op_ret = get_params();
+ if (op_ret < 0) {
+ return;
+ }
+ ups.emplace(store, s->owner.get_id());
+ auto sub = ups->get_sub(sub_name);
+ op_ret = sub->subscribe(topic_name, dest);
+ if (op_ret < 0) {
+ ldout(s->cct, 1) << "failed to create subscription '" << sub_name << "', ret=" << op_ret << dendl;
+ return;
+ }
+ ldout(s->cct, 20) << "successfully created subscription '" << sub_name << "'" << dendl;
+}
+
+void RGWPSGetSubOp::execute() {
+ op_ret = get_params();
+ if (op_ret < 0) {
+ return;
+ }
+ ups.emplace(store, s->owner.get_id());
+ auto sub = ups->get_sub(sub_name);
+ op_ret = sub->get_conf(&result);
+ if (subscription_has_endpoint_secret(result) && !rgw_transport_is_secure(s->cct, *(s->info.env))) {
+ ldout(s->cct, 1) << "subscription '" << sub_name << "' contain secret and cannot be sent over insecure transport" << dendl;
+ op_ret = -EPERM;
+ return;
+ }
+ if (op_ret < 0) {
+ ldout(s->cct, 1) << "failed to get subscription '" << sub_name << "', ret=" << op_ret << dendl;
+ return;
+ }
+ ldout(s->cct, 20) << "successfully got subscription '" << sub_name << "'" << dendl;
+}
+
+void RGWPSDeleteSubOp::execute() {
+ op_ret = get_params();
+ if (op_ret < 0) {
+ return;
+ }
+ ups.emplace(store, s->owner.get_id());
+ auto sub = ups->get_sub(sub_name);
+ op_ret = sub->unsubscribe(topic_name);
+ if (op_ret < 0) {
+ ldout(s->cct, 1) << "failed to remove subscription '" << sub_name << "', ret=" << op_ret << dendl;
+ return;
+ }
+ ldout(s->cct, 20) << "successfully removed subscription '" << sub_name << "'" << dendl;
+}
+
+void RGWPSAckSubEventOp::execute() {
+ op_ret = get_params();
+ if (op_ret < 0) {
+ return;
+ }
+ ups.emplace(store, s->owner.get_id());
+ auto sub = ups->get_sub_with_events(sub_name);
+ op_ret = sub->remove_event(event_id);
+ if (op_ret < 0) {
+ ldout(s->cct, 1) << "failed to ack event on subscription '" << sub_name << "', ret=" << op_ret << dendl;
+ return;
+ }
+ ldout(s->cct, 20) << "successfully acked event on subscription '" << sub_name << "'" << dendl;
+}
+
+void RGWPSPullSubEventsOp::execute() {
+ op_ret = get_params();
+ if (op_ret < 0) {
+ return;
+ }
+ ups.emplace(store, s->owner.get_id());
+ sub = ups->get_sub_with_events(sub_name);
+ if (!sub) {
+ op_ret = -ENOENT;
+ ldout(s->cct, 1) << "failed to get subscription '" << sub_name << "' for events, ret=" << op_ret << dendl;
+ return;
+ }
+ op_ret = sub->list_events(marker, max_entries);
+ if (op_ret < 0) {
+ ldout(s->cct, 1) << "failed to get events from subscription '" << sub_name << "', ret=" << op_ret << dendl;
+ return;
+ }
+ ldout(s->cct, 20) << "successfully got events from subscription '" << sub_name << "'" << dendl;
+}
+
+
+int RGWPSCreateNotifOp::verify_permission() {
+ int ret = get_params();
+ if (ret < 0) {
+ return ret;
+ }
+
+ const auto& id = s->owner.get_id();
+
+ ret = store->get_bucket_info(*s->sysobj_ctx, id.tenant, bucket_name,
+ bucket_info, nullptr, nullptr);
+ if (ret < 0) {
+ ldout(s->cct, 1) << "failed to get bucket info, cannot verify ownership" << dendl;
+ return ret;
+ }
+
+ if (bucket_info.owner != id) {
+ ldout(s->cct, 1) << "user doesn't own bucket, not allowed to create notification" << dendl;
+ return -EPERM;
+ }
+ return 0;
+}
+
+int RGWPSDeleteNotifOp::verify_permission() {
+ int ret = get_params();
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = store->get_bucket_info(*s->sysobj_ctx, s->owner.get_id().tenant, bucket_name,
+ bucket_info, nullptr, nullptr);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (bucket_info.owner != s->owner.get_id()) {
+ ldout(s->cct, 1) << "user doesn't own bucket, cannot remove notification" << dendl;
+ return -EPERM;
+ }
+ return 0;
+}
+
+int RGWPSListNotifsOp::verify_permission() {
+ int ret = get_params();
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = store->get_bucket_info(*s->sysobj_ctx, s->owner.get_id().tenant, bucket_name,
+ bucket_info, nullptr, nullptr);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (bucket_info.owner != s->owner.get_id()) {
+ ldout(s->cct, 1) << "user doesn't own bucket, cannot get notification list" << dendl;
+ return -EPERM;
+ }
+
+ return 0;
+}
+
diff --git a/src/rgw/rgw_rest_pubsub_common.h b/src/rgw/rgw_rest_pubsub_common.h
new file mode 100644
index 00000000..d472fa40
--- /dev/null
+++ b/src/rgw/rgw_rest_pubsub_common.h
@@ -0,0 +1,287 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#pragma once
+#include <string>
+#include <optional>
+#include "rgw_op.h"
+#include "rgw_pubsub.h"
+
+// make sure that endpoint is a valid URL
+// make sure that if user/password are passed inside URL, it is over secure connection
+// update rgw_pubsub_sub_dest to indicate that a password is stored in the URL
+bool validate_and_update_endpoint_secret(rgw_pubsub_sub_dest& dest, CephContext *cct, const RGWEnv& env);
+
+// create a topic
+class RGWPSCreateTopicOp : public RGWDefaultResponseOp {
+protected:
+ std::optional<RGWUserPubSub> ups;
+ std::string topic_name;
+ rgw_pubsub_sub_dest dest;
+ std::string topic_arn;
+ std::string opaque_data;
+
+ virtual int get_params() = 0;
+
+public:
+ int verify_permission() override {
+ return 0;
+ }
+ void pre_exec() override {
+ rgw_bucket_object_pre_exec(s);
+ }
+ void execute() override;
+
+ const char* name() const override { return "pubsub_topic_create"; }
+ RGWOpType get_type() override { return RGW_OP_PUBSUB_TOPIC_CREATE; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; }
+};
+
+// list all topics
+class RGWPSListTopicsOp : public RGWOp {
+protected:
+ std::optional<RGWUserPubSub> ups;
+ rgw_pubsub_user_topics result;
+
+public:
+ int verify_permission() override {
+ return 0;
+ }
+ void pre_exec() override {
+ rgw_bucket_object_pre_exec(s);
+ }
+ void execute() override;
+
+ const char* name() const override { return "pubsub_topics_list"; }
+ RGWOpType get_type() override { return RGW_OP_PUBSUB_TOPICS_LIST; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_READ; }
+};
+
+// get topic information
+class RGWPSGetTopicOp : public RGWOp {
+protected:
+ std::string topic_name;
+ std::optional<RGWUserPubSub> ups;
+ rgw_pubsub_topic_subs result;
+
+ virtual int get_params() = 0;
+
+public:
+ int verify_permission() override {
+ return 0;
+ }
+ void pre_exec() override {
+ rgw_bucket_object_pre_exec(s);
+ }
+ void execute() override;
+
+ const char* name() const override { return "pubsub_topic_get"; }
+ RGWOpType get_type() override { return RGW_OP_PUBSUB_TOPIC_GET; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_READ; }
+};
+
+// delete a topic
+class RGWPSDeleteTopicOp : public RGWDefaultResponseOp {
+protected:
+ string topic_name;
+ std::optional<RGWUserPubSub> ups;
+
+ virtual int get_params() = 0;
+
+public:
+ int verify_permission() override {
+ return 0;
+ }
+ void pre_exec() override {
+ rgw_bucket_object_pre_exec(s);
+ }
+ void execute() override;
+
+ const char* name() const override { return "pubsub_topic_delete"; }
+ RGWOpType get_type() override { return RGW_OP_PUBSUB_TOPIC_DELETE; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_DELETE; }
+};
+
+// create a subscription
+class RGWPSCreateSubOp : public RGWDefaultResponseOp {
+protected:
+ std::string sub_name;
+ std::string topic_name;
+ std::optional<RGWUserPubSub> ups;
+ rgw_pubsub_sub_dest dest;
+
+ virtual int get_params() = 0;
+
+public:
+ int verify_permission() override {
+ return 0;
+ }
+ void pre_exec() override {
+ rgw_bucket_object_pre_exec(s);
+ }
+ void execute() override;
+
+ const char* name() const override { return "pubsub_subscription_create"; }
+ RGWOpType get_type() override { return RGW_OP_PUBSUB_SUB_CREATE; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; }
+};
+
+// get subscription information (including push-endpoint if exist)
+class RGWPSGetSubOp : public RGWOp {
+protected:
+ std::string sub_name;
+ std::optional<RGWUserPubSub> ups;
+ rgw_pubsub_sub_config result;
+
+ virtual int get_params() = 0;
+
+public:
+ int verify_permission() override {
+ return 0;
+ }
+ void pre_exec() override {
+ rgw_bucket_object_pre_exec(s);
+ }
+ void execute() override;
+
+ const char* name() const override { return "pubsub_subscription_get"; }
+ RGWOpType get_type() override { return RGW_OP_PUBSUB_SUB_GET; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_READ; }
+};
+
+// delete subscription
+class RGWPSDeleteSubOp : public RGWDefaultResponseOp {
+protected:
+ std::string sub_name;
+ std::string topic_name;
+ std::optional<RGWUserPubSub> ups;
+
+ virtual int get_params() = 0;
+
+public:
+ int verify_permission() override {
+ return 0;
+ }
+ void pre_exec() override {
+ rgw_bucket_object_pre_exec(s);
+ }
+ void execute() override;
+
+ const char* name() const override { return "pubsub_subscription_delete"; }
+ RGWOpType get_type() override { return RGW_OP_PUBSUB_SUB_DELETE; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_DELETE; }
+};
+
+// acking of an event
+class RGWPSAckSubEventOp : public RGWDefaultResponseOp {
+protected:
+ std::string sub_name;
+ std::string event_id;
+ std::optional<RGWUserPubSub> ups;
+
+ virtual int get_params() = 0;
+
+public:
+ RGWPSAckSubEventOp() {}
+
+ int verify_permission() override {
+ return 0;
+ }
+ void pre_exec() override {
+ rgw_bucket_object_pre_exec(s);
+ }
+ void execute() override;
+
+ const char* name() const override { return "pubsub_subscription_ack"; }
+ RGWOpType get_type() override { return RGW_OP_PUBSUB_SUB_ACK; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; }
+};
+
+// fetching events from a subscription
+// dpending on whether the subscription was created via s3 compliant API or not
+// the matching events will be returned
+class RGWPSPullSubEventsOp : public RGWOp {
+protected:
+ int max_entries{0};
+ std::string sub_name;
+ std::string marker;
+ std::optional<RGWUserPubSub> ups;
+ RGWUserPubSub::SubRef sub;
+
+ virtual int get_params() = 0;
+
+public:
+ RGWPSPullSubEventsOp() {}
+
+ int verify_permission() override {
+ return 0;
+ }
+ void pre_exec() override {
+ rgw_bucket_object_pre_exec(s);
+ }
+ void execute() override;
+
+ const char* name() const override { return "pubsub_subscription_pull"; }
+ RGWOpType get_type() override { return RGW_OP_PUBSUB_SUB_PULL; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_READ; }
+};
+
+// notification creation
+class RGWPSCreateNotifOp : public RGWDefaultResponseOp {
+protected:
+ std::optional<RGWUserPubSub> ups;
+ string bucket_name;
+ RGWBucketInfo bucket_info;
+
+ virtual int get_params() = 0;
+
+public:
+ int verify_permission() override;
+
+ void pre_exec() override {
+ rgw_bucket_object_pre_exec(s);
+ }
+
+ RGWOpType get_type() override { return RGW_OP_PUBSUB_NOTIF_CREATE; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; }
+};
+
+// delete a notification
+class RGWPSDeleteNotifOp : public RGWDefaultResponseOp {
+protected:
+ std::optional<RGWUserPubSub> ups;
+ std::string bucket_name;
+ RGWBucketInfo bucket_info;
+
+ virtual int get_params() = 0;
+
+public:
+ int verify_permission() override;
+
+ void pre_exec() override {
+ rgw_bucket_object_pre_exec(s);
+ }
+
+ RGWOpType get_type() override { return RGW_OP_PUBSUB_NOTIF_DELETE; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_DELETE; }
+};
+
+// get topics/notifications on a bucket
+class RGWPSListNotifsOp : public RGWOp {
+protected:
+ std::string bucket_name;
+ RGWBucketInfo bucket_info;
+ std::optional<RGWUserPubSub> ups;
+
+ virtual int get_params() = 0;
+
+public:
+ int verify_permission() override;
+
+ void pre_exec() override {
+ rgw_bucket_object_pre_exec(s);
+ }
+
+ RGWOpType get_type() override { return RGW_OP_PUBSUB_NOTIF_LIST; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_READ; }
+};
+
diff --git a/src/rgw/rgw_rest_realm.cc b/src/rgw/rgw_rest_realm.cc
new file mode 100644
index 00000000..18e37676
--- /dev/null
+++ b/src/rgw/rgw_rest_realm.cc
@@ -0,0 +1,367 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/errno.h"
+#include "rgw_rest_realm.h"
+#include "rgw_rest_s3.h"
+#include "rgw_rest_config.h"
+#include "rgw_zone.h"
+
+#include "services/svc_zone.h"
+
+#include "include/ceph_assert.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+// reject 'period push' if we would have to fetch too many intermediate periods
+static const uint32_t PERIOD_HISTORY_FETCH_MAX = 64;
+
+// base period op, shared between Get and Post
+class RGWOp_Period_Base : public RGWRESTOp {
+ protected:
+ RGWPeriod period;
+ std::ostringstream error_stream;
+ public:
+ int verify_permission() override { return 0; }
+ void send_response() override;
+};
+
+// reply with the period object on success
+void RGWOp_Period_Base::send_response()
+{
+ set_req_state_err(s, http_ret, error_stream.str());
+ dump_errno(s);
+
+ if (http_ret < 0) {
+ if (!s->err.message.empty()) {
+ ldout(s->cct, 4) << "Request failed with " << http_ret
+ << ": " << s->err.message << dendl;
+ }
+ end_header(s);
+ return;
+ }
+
+ encode_json("period", period, s->formatter);
+ end_header(s, NULL, "application/json", s->formatter->get_len());
+ flusher.flush();
+}
+
+// GET /admin/realm/period
+class RGWOp_Period_Get : public RGWOp_Period_Base {
+ public:
+ void execute() override;
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("zone", RGW_CAP_READ);
+ }
+ int verify_permission() override {
+ return check_caps(s->user->caps);
+ }
+ const char* name() const override { return "get_period"; }
+};
+
+void RGWOp_Period_Get::execute()
+{
+ string realm_id, realm_name, period_id;
+ epoch_t epoch = 0;
+ RESTArgs::get_string(s, "realm_id", realm_id, &realm_id);
+ RESTArgs::get_string(s, "realm_name", realm_name, &realm_name);
+ RESTArgs::get_string(s, "period_id", period_id, &period_id);
+ RESTArgs::get_uint32(s, "epoch", 0, &epoch);
+
+ period.set_id(period_id);
+ period.set_epoch(epoch);
+
+ http_ret = period.init(store->ctx(), store->svc.sysobj, realm_id, realm_name);
+ if (http_ret < 0)
+ ldout(store->ctx(), 5) << "failed to read period" << dendl;
+}
+
+// POST /admin/realm/period
+class RGWOp_Period_Post : public RGWOp_Period_Base {
+ public:
+ void execute() override;
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("zone", RGW_CAP_WRITE);
+ }
+ int verify_permission() override {
+ return check_caps(s->user->caps);
+ }
+ const char* name() const override { return "post_period"; }
+};
+
+void RGWOp_Period_Post::execute()
+{
+ auto cct = store->ctx();
+
+ // initialize the period without reading from rados
+ period.init(cct, store->svc.sysobj, false);
+
+ // decode the period from input
+ const auto max_size = cct->_conf->rgw_max_put_param_size;
+ bool empty;
+ http_ret = rgw_rest_get_json_input(cct, s, period, max_size, &empty);
+ if (http_ret < 0) {
+ lderr(cct) << "failed to decode period" << dendl;
+ return;
+ }
+
+ // require period.realm_id to match our realm
+ if (period.get_realm() != store->svc.zone->get_realm().get_id()) {
+ error_stream << "period with realm id " << period.get_realm()
+ << " doesn't match current realm " << store->svc.zone->get_realm().get_id() << std::endl;
+ http_ret = -EINVAL;
+ return;
+ }
+
+ // load the realm and current period from rados; there may be a more recent
+ // period that we haven't restarted with yet. we also don't want to modify
+ // the objects in use by RGWRados
+ RGWRealm realm(period.get_realm());
+ http_ret = realm.init(cct, store->svc.sysobj);
+ if (http_ret < 0) {
+ lderr(cct) << "failed to read current realm: "
+ << cpp_strerror(-http_ret) << dendl;
+ return;
+ }
+
+ RGWPeriod current_period;
+ http_ret = current_period.init(cct, store->svc.sysobj, realm.get_id());
+ if (http_ret < 0) {
+ lderr(cct) << "failed to read current period: "
+ << cpp_strerror(-http_ret) << dendl;
+ return;
+ }
+
+ // if period id is empty, handle as 'period commit'
+ if (period.get_id().empty()) {
+ http_ret = period.commit(store, realm, current_period, error_stream);
+ if (http_ret < 0) {
+ lderr(cct) << "master zone failed to commit period" << dendl;
+ }
+ return;
+ }
+
+ // if it's not period commit, nobody is allowed to push to the master zone
+ if (period.get_master_zone() == store->svc.zone->get_zone_params().get_id()) {
+ ldout(cct, 10) << "master zone rejecting period id="
+ << period.get_id() << " epoch=" << period.get_epoch() << dendl;
+ http_ret = -EINVAL; // XXX: error code
+ return;
+ }
+
+ // write the period to rados
+ http_ret = period.store_info(false);
+ if (http_ret < 0) {
+ lderr(cct) << "failed to store period " << period.get_id() << dendl;
+ return;
+ }
+ // set as latest epoch
+ http_ret = period.update_latest_epoch(period.get_epoch());
+ if (http_ret == -EEXIST) {
+ // already have this epoch (or a more recent one)
+ ldout(cct, 4) << "already have epoch >= " << period.get_epoch()
+ << " for period " << period.get_id() << dendl;
+ http_ret = 0;
+ return;
+ }
+ if (http_ret < 0) {
+ lderr(cct) << "failed to set latest epoch" << dendl;
+ return;
+ }
+
+ // decide whether we can set_current_period() or set_latest_epoch()
+ if (period.get_id() != current_period.get_id()) {
+ auto current_epoch = current_period.get_realm_epoch();
+ // discard periods in the past
+ if (period.get_realm_epoch() < current_epoch) {
+ ldout(cct, 10) << "discarding period " << period.get_id()
+ << " with realm epoch " << period.get_realm_epoch()
+ << " older than current epoch " << current_epoch << dendl;
+ // return success to ack that we have this period
+ return;
+ }
+ // discard periods too far in the future
+ if (period.get_realm_epoch() > current_epoch + PERIOD_HISTORY_FETCH_MAX) {
+ lderr(cct) << "discarding period " << period.get_id()
+ << " with realm epoch " << period.get_realm_epoch() << " too far in "
+ "the future from current epoch " << current_epoch << dendl;
+ http_ret = -ENOENT; // XXX: error code
+ return;
+ }
+ // attach a copy of the period into the period history
+ auto cursor = store->period_history->attach(RGWPeriod{period});
+ if (!cursor) {
+ // we're missing some history between the new period and current_period
+ http_ret = cursor.get_error();
+ lderr(cct) << "failed to collect the periods between current period "
+ << current_period.get_id() << " (realm epoch " << current_epoch
+ << ") and the new period " << period.get_id()
+ << " (realm epoch " << period.get_realm_epoch()
+ << "): " << cpp_strerror(-http_ret) << dendl;
+ return;
+ }
+ if (cursor.has_next()) {
+ // don't switch if we have a newer period in our history
+ ldout(cct, 4) << "attached period " << period.get_id()
+ << " to history, but the history contains newer periods" << dendl;
+ return;
+ }
+ // set as current period
+ http_ret = realm.set_current_period(period);
+ if (http_ret < 0) {
+ lderr(cct) << "failed to update realm's current period" << dendl;
+ return;
+ }
+ ldout(cct, 4) << "period " << period.get_id()
+ << " is newer than current period " << current_period.get_id()
+ << ", updating realm's current period and notifying zone" << dendl;
+ realm.notify_new_period(period);
+ return;
+ }
+ // reflect the period into our local objects
+ http_ret = period.reflect();
+ if (http_ret < 0) {
+ lderr(cct) << "failed to update local objects: "
+ << cpp_strerror(-http_ret) << dendl;
+ return;
+ }
+ ldout(cct, 4) << "period epoch " << period.get_epoch()
+ << " is newer than current epoch " << current_period.get_epoch()
+ << ", updating period's latest epoch and notifying zone" << dendl;
+ realm.notify_new_period(period);
+ // update the period history
+ store->period_history->insert(RGWPeriod{period});
+}
+
+class RGWHandler_Period : public RGWHandler_Auth_S3 {
+ protected:
+ using RGWHandler_Auth_S3::RGWHandler_Auth_S3;
+
+ RGWOp *op_get() override { return new RGWOp_Period_Get; }
+ RGWOp *op_post() override { return new RGWOp_Period_Post; }
+};
+
+class RGWRESTMgr_Period : public RGWRESTMgr {
+ public:
+ RGWHandler_REST* get_handler(struct req_state*,
+ const rgw::auth::StrategyRegistry& auth_registry,
+ const std::string&) override {
+ return new RGWHandler_Period(auth_registry);
+ }
+};
+
+
+// GET /admin/realm
+class RGWOp_Realm_Get : public RGWRESTOp {
+ std::unique_ptr<RGWRealm> realm;
+public:
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("zone", RGW_CAP_READ);
+ }
+ int verify_permission() override {
+ return check_caps(s->user->caps);
+ }
+ void execute() override;
+ void send_response() override;
+ const char* name() const override { return "get_realm"; }
+};
+
+void RGWOp_Realm_Get::execute()
+{
+ string id;
+ RESTArgs::get_string(s, "id", id, &id);
+ string name;
+ RESTArgs::get_string(s, "name", name, &name);
+
+ // read realm
+ realm.reset(new RGWRealm(id, name));
+ http_ret = realm->init(g_ceph_context, store->svc.sysobj);
+ if (http_ret < 0)
+ lderr(store->ctx()) << "failed to read realm id=" << id
+ << " name=" << name << dendl;
+}
+
+void RGWOp_Realm_Get::send_response()
+{
+ set_req_state_err(s, http_ret);
+ dump_errno(s);
+
+ if (http_ret < 0) {
+ end_header(s);
+ return;
+ }
+
+ encode_json("realm", *realm, s->formatter);
+ end_header(s, NULL, "application/json", s->formatter->get_len());
+ flusher.flush();
+}
+
+// GET /admin/realm?list
+class RGWOp_Realm_List : public RGWRESTOp {
+ std::string default_id;
+ std::list<std::string> realms;
+public:
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("zone", RGW_CAP_READ);
+ }
+ int verify_permission() override {
+ return check_caps(s->user->caps);
+ }
+ void execute() override;
+ void send_response() override;
+ const char* name() const override { return "list_realms"; }
+};
+
+void RGWOp_Realm_List::execute()
+{
+ {
+ // read default realm
+ RGWRealm realm(store->ctx(), store->svc.sysobj);
+ [[maybe_unused]] int ret = realm.read_default_id(default_id);
+ }
+ http_ret = store->svc.zone->list_realms(realms);
+ if (http_ret < 0)
+ lderr(store->ctx()) << "failed to list realms" << dendl;
+}
+
+void RGWOp_Realm_List::send_response()
+{
+ set_req_state_err(s, http_ret);
+ dump_errno(s);
+
+ if (http_ret < 0) {
+ end_header(s);
+ return;
+ }
+
+ s->formatter->open_object_section("realms_list");
+ encode_json("default_info", default_id, s->formatter);
+ encode_json("realms", realms, s->formatter);
+ s->formatter->close_section();
+ end_header(s, NULL, "application/json", s->formatter->get_len());
+ flusher.flush();
+}
+
+class RGWHandler_Realm : public RGWHandler_Auth_S3 {
+protected:
+ using RGWHandler_Auth_S3::RGWHandler_Auth_S3;
+ RGWOp *op_get() override {
+ if (s->info.args.sub_resource_exists("list"))
+ return new RGWOp_Realm_List;
+ return new RGWOp_Realm_Get;
+ }
+};
+
+RGWRESTMgr_Realm::RGWRESTMgr_Realm()
+{
+ // add the /admin/realm/period resource
+ register_resource("period", new RGWRESTMgr_Period);
+}
+
+RGWHandler_REST*
+RGWRESTMgr_Realm::get_handler(struct req_state*,
+ const rgw::auth::StrategyRegistry& auth_registry,
+ const std::string&)
+{
+ return new RGWHandler_Realm(auth_registry);
+}
diff --git a/src/rgw/rgw_rest_realm.h b/src/rgw/rgw_rest_realm.h
new file mode 100644
index 00000000..68566bcb
--- /dev/null
+++ b/src/rgw/rgw_rest_realm.h
@@ -0,0 +1,18 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_REST_REALM_H
+#define CEPH_RGW_REST_REALM_H
+
+#include "rgw_rest.h"
+
+class RGWRESTMgr_Realm : public RGWRESTMgr {
+public:
+ RGWRESTMgr_Realm();
+
+ RGWHandler_REST* get_handler(struct req_state*,
+ const rgw::auth::StrategyRegistry& auth_registry,
+ const std::string&) override;
+};
+
+#endif
diff --git a/src/rgw/rgw_rest_role.cc b/src/rgw/rgw_rest_role.cc
new file mode 100644
index 00000000..dbcb718d
--- /dev/null
+++ b/src/rgw/rgw_rest_role.cc
@@ -0,0 +1,489 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <errno.h>
+
+#include "common/errno.h"
+#include "common/Formatter.h"
+#include "common/ceph_json.h"
+
+#include "include/types.h"
+#include "rgw_string.h"
+
+#include "rgw_common.h"
+#include "rgw_op.h"
+#include "rgw_rest.h"
+#include "rgw_role.h"
+#include "rgw_rest_role.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+int RGWRestRole::verify_permission()
+{
+ if (s->auth.identity->is_anonymous()) {
+ return -EACCES;
+ }
+
+ string role_name = s->info.args.get("RoleName");
+ RGWRole role(s->cct, store, role_name, s->user->user_id.tenant);
+ if (op_ret = role.get(); op_ret < 0) {
+ if (op_ret == -ENOENT) {
+ op_ret = -ERR_NO_ROLE_FOUND;
+ }
+ return op_ret;
+ }
+
+ if (int ret = check_caps(s->user->caps); ret == 0) {
+ _role = std::move(role);
+ return ret;
+ }
+
+ string resource_name = role.get_path() + role_name;
+ uint64_t op = get_op();
+ if (!verify_user_permission(this,
+ s,
+ rgw::ARN(resource_name,
+ "role",
+ s->user->user_id.tenant, true),
+ op)) {
+ return -EACCES;
+ }
+
+ _role = std::move(role);
+
+ return 0;
+}
+
+void RGWRestRole::send_response()
+{
+ if (op_ret) {
+ set_req_state_err(s, op_ret);
+ }
+ dump_errno(s);
+ end_header(s, this);
+}
+
+int RGWRoleRead::check_caps(RGWUserCaps& caps)
+{
+ return caps.check_cap("roles", RGW_CAP_READ);
+}
+
+int RGWRoleWrite::check_caps(RGWUserCaps& caps)
+{
+ return caps.check_cap("roles", RGW_CAP_WRITE);
+}
+
+int RGWCreateRole::verify_permission()
+{
+ if (s->auth.identity->is_anonymous()) {
+ return -EACCES;
+ }
+
+ if (int ret = check_caps(s->user->caps); ret == 0) {
+ return ret;
+ }
+
+ string role_name = s->info.args.get("RoleName");
+ string role_path = s->info.args.get("Path");
+
+ string resource_name = role_path + role_name;
+ if (!verify_user_permission(this,
+ s,
+ rgw::ARN(resource_name,
+ "role",
+ s->user->user_id.tenant, true),
+ get_op())) {
+ return -EACCES;
+ }
+ return 0;
+}
+
+int RGWCreateRole::get_params()
+{
+ role_name = s->info.args.get("RoleName");
+ role_path = s->info.args.get("Path");
+ trust_policy = s->info.args.get("AssumeRolePolicyDocument");
+ max_session_duration = s->info.args.get("MaxSessionDuration");
+
+ if (role_name.empty() || trust_policy.empty()) {
+ ldout(s->cct, 20) << "ERROR: one of role name or assume role policy document is empty"
+ << dendl;
+ return -EINVAL;
+ }
+
+ bufferlist bl = bufferlist::static_from_string(trust_policy);
+ try {
+ const rgw::IAM::Policy p(s->cct, s->user->user_id.tenant, bl);
+ }
+ catch (rgw::IAM::PolicyParseException& e) {
+ ldout(s->cct, 20) << "failed to parse policy: " << e.what() << dendl;
+ return -ERR_MALFORMED_DOC;
+ }
+
+ return 0;
+}
+
+void RGWCreateRole::execute()
+{
+ op_ret = get_params();
+ if (op_ret < 0) {
+ return;
+ }
+ RGWRole role(s->cct, store, role_name, role_path, trust_policy,
+ s->user->user_id.tenant, max_session_duration);
+ op_ret = role.create(true);
+
+ if (op_ret == -EEXIST) {
+ op_ret = -ERR_ROLE_EXISTS;
+ }
+
+ if (op_ret == 0) {
+ s->formatter->open_object_section("CreateRoleResponse");
+ s->formatter->open_object_section("CreateRoleResult");
+ s->formatter->open_object_section("Role");
+ role.dump(s->formatter);
+ s->formatter->close_section();
+ s->formatter->close_section();
+ s->formatter->open_object_section("ResponseMetadata");
+ s->formatter->dump_string("RequestId", s->trans_id);
+ s->formatter->close_section();
+ s->formatter->close_section();
+ }
+}
+
+int RGWDeleteRole::get_params()
+{
+ role_name = s->info.args.get("RoleName");
+
+ if (role_name.empty()) {
+ ldout(s->cct, 20) << "ERROR: Role name is empty"<< dendl;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+void RGWDeleteRole::execute()
+{
+ op_ret = get_params();
+ if (op_ret < 0) {
+ return;
+ }
+
+ op_ret = _role.delete_obj();
+
+ if (op_ret == -ENOENT) {
+ op_ret = -ERR_NO_ROLE_FOUND;
+ }
+
+ s->formatter->open_object_section("DeleteRoleResponse");
+ s->formatter->open_object_section("ResponseMetadata");
+ s->formatter->dump_string("RequestId", s->trans_id);
+ s->formatter->close_section();
+ s->formatter->close_section();
+}
+
+int RGWGetRole::verify_permission()
+{
+ return 0;
+}
+
+int RGWGetRole::_verify_permission(const RGWRole& role)
+{
+ if (s->auth.identity->is_anonymous()) {
+ return -EACCES;
+ }
+
+ if (int ret = check_caps(s->user->caps); ret == 0) {
+ return ret;
+ }
+
+ string resource_name = role.get_path() + role.get_name();
+ if (!verify_user_permission(this,
+ s,
+ rgw::ARN(resource_name,
+ "role",
+ s->user->user_id.tenant, true),
+ get_op())) {
+ return -EACCES;
+ }
+ return 0;
+}
+
+int RGWGetRole::get_params()
+{
+ role_name = s->info.args.get("RoleName");
+
+ if (role_name.empty()) {
+ ldout(s->cct, 20) << "ERROR: Role name is empty"<< dendl;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+void RGWGetRole::execute()
+{
+ op_ret = get_params();
+ if (op_ret < 0) {
+ return;
+ }
+ RGWRole role(s->cct, store, role_name, s->user->user_id.tenant);
+ op_ret = role.get();
+
+ if (op_ret == -ENOENT) {
+ op_ret = -ERR_NO_ROLE_FOUND;
+ return;
+ }
+
+ op_ret = _verify_permission(role);
+
+ if (op_ret == 0) {
+ s->formatter->open_object_section("GetRoleResponse");
+ s->formatter->open_object_section("ResponseMetadata");
+ s->formatter->dump_string("RequestId", s->trans_id);
+ s->formatter->close_section();
+ s->formatter->open_object_section("GetRoleResult");
+ s->formatter->open_object_section("Role");
+ role.dump(s->formatter);
+ s->formatter->close_section();
+ s->formatter->close_section();
+ s->formatter->close_section();
+ }
+}
+
+int RGWModifyRole::get_params()
+{
+ role_name = s->info.args.get("RoleName");
+ trust_policy = s->info.args.get("PolicyDocument");
+
+ if (role_name.empty() || trust_policy.empty()) {
+ ldout(s->cct, 20) << "ERROR: One of role name or trust policy is empty"<< dendl;
+ return -EINVAL;
+ }
+ JSONParser p;
+ if (!p.parse(trust_policy.c_str(), trust_policy.length())) {
+ ldout(s->cct, 20) << "ERROR: failed to parse assume role policy doc" << dendl;
+ return -ERR_MALFORMED_DOC;
+ }
+
+ return 0;
+}
+
+void RGWModifyRole::execute()
+{
+ op_ret = get_params();
+ if (op_ret < 0) {
+ return;
+ }
+
+ _role.update_trust_policy(trust_policy);
+ op_ret = _role.update();
+
+ s->formatter->open_object_section("UpdateAssumeRolePolicyResponse");
+ s->formatter->open_object_section("ResponseMetadata");
+ s->formatter->dump_string("RequestId", s->trans_id);
+ s->formatter->close_section();
+ s->formatter->close_section();
+}
+
+int RGWListRoles::verify_permission()
+{
+ if (s->auth.identity->is_anonymous()) {
+ return -EACCES;
+ }
+
+ if (int ret = check_caps(s->user->caps); ret == 0) {
+ return ret;
+ }
+
+ if (!verify_user_permission(this,
+ s,
+ rgw::ARN(),
+ get_op())) {
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+int RGWListRoles::get_params()
+{
+ path_prefix = s->info.args.get("PathPrefix");
+
+ return 0;
+}
+
+void RGWListRoles::execute()
+{
+ op_ret = get_params();
+ if (op_ret < 0) {
+ return;
+ }
+ vector<RGWRole> result;
+ op_ret = RGWRole::get_roles_by_path_prefix(store, s->cct, path_prefix, s->user->user_id.tenant, result);
+
+ if (op_ret == 0) {
+ s->formatter->open_array_section("ListRolesResponse");
+ s->formatter->open_object_section("ResponseMetadata");
+ s->formatter->dump_string("RequestId", s->trans_id);
+ s->formatter->close_section();
+ s->formatter->open_array_section("ListRolesResult");
+ s->formatter->open_object_section("Roles");
+ for (const auto& it : result) {
+ s->formatter->open_object_section("member");
+ it.dump(s->formatter);
+ s->formatter->close_section();
+ }
+ s->formatter->close_section();
+ s->formatter->close_section();
+ s->formatter->close_section();
+ }
+}
+
+int RGWPutRolePolicy::get_params()
+{
+ role_name = s->info.args.get("RoleName");
+ policy_name = s->info.args.get("PolicyName");
+ perm_policy = s->info.args.get("PolicyDocument");
+
+ if (role_name.empty() || policy_name.empty() || perm_policy.empty()) {
+ ldout(s->cct, 20) << "ERROR: One of role name, policy name or perm policy is empty"<< dendl;
+ return -EINVAL;
+ }
+ bufferlist bl = bufferlist::static_from_string(perm_policy);
+ try {
+ const rgw::IAM::Policy p(s->cct, s->user->user_id.tenant, bl);
+ }
+ catch (rgw::IAM::PolicyParseException& e) {
+ ldout(s->cct, 20) << "failed to parse policy: " << e.what() << dendl;
+ return -ERR_MALFORMED_DOC;
+ }
+ return 0;
+}
+
+void RGWPutRolePolicy::execute()
+{
+ op_ret = get_params();
+ if (op_ret < 0) {
+ return;
+ }
+
+ _role.set_perm_policy(policy_name, perm_policy);
+ op_ret = _role.update();
+
+ if (op_ret == 0) {
+ s->formatter->open_object_section("PutRolePolicyResponse");
+ s->formatter->open_object_section("ResponseMetadata");
+ s->formatter->dump_string("RequestId", s->trans_id);
+ s->formatter->close_section();
+ s->formatter->close_section();
+ }
+}
+
+int RGWGetRolePolicy::get_params()
+{
+ role_name = s->info.args.get("RoleName");
+ policy_name = s->info.args.get("PolicyName");
+
+ if (role_name.empty() || policy_name.empty()) {
+ ldout(s->cct, 20) << "ERROR: One of role name or policy name is empty"<< dendl;
+ return -EINVAL;
+ }
+ return 0;
+}
+
+void RGWGetRolePolicy::execute()
+{
+ op_ret = get_params();
+ if (op_ret < 0) {
+ return;
+ }
+
+ string perm_policy;
+ op_ret = _role.get_role_policy(policy_name, perm_policy);
+ if (op_ret == -ENOENT) {
+ op_ret = -ERR_NO_SUCH_ENTITY;
+ }
+
+ if (op_ret == 0) {
+ s->formatter->open_object_section("GetRolePolicyResponse");
+ s->formatter->open_object_section("ResponseMetadata");
+ s->formatter->dump_string("RequestId", s->trans_id);
+ s->formatter->close_section();
+ s->formatter->open_object_section("GetRolePolicyResult");
+ s->formatter->dump_string("PolicyName", policy_name);
+ s->formatter->dump_string("RoleName", role_name);
+ s->formatter->dump_string("PolicyDocument", perm_policy);
+ s->formatter->close_section();
+ s->formatter->close_section();
+ }
+}
+
+int RGWListRolePolicies::get_params()
+{
+ role_name = s->info.args.get("RoleName");
+
+ if (role_name.empty()) {
+ ldout(s->cct, 20) << "ERROR: Role name is empty"<< dendl;
+ return -EINVAL;
+ }
+ return 0;
+}
+
+void RGWListRolePolicies::execute()
+{
+ op_ret = get_params();
+ if (op_ret < 0) {
+ return;
+ }
+
+ std::vector<string> policy_names = _role.get_role_policy_names();
+ s->formatter->open_object_section("ListRolePoliciesResponse");
+ s->formatter->open_object_section("ResponseMetadata");
+ s->formatter->dump_string("RequestId", s->trans_id);
+ s->formatter->close_section();
+ s->formatter->open_object_section("ListRolePoliciesResult");
+ s->formatter->open_array_section("PolicyNames");
+ for (const auto& it : policy_names) {
+ s->formatter->dump_string("member", it);
+ }
+ s->formatter->close_section();
+ s->formatter->close_section();
+ s->formatter->close_section();
+}
+
+int RGWDeleteRolePolicy::get_params()
+{
+ role_name = s->info.args.get("RoleName");
+ policy_name = s->info.args.get("PolicyName");
+
+ if (role_name.empty() || policy_name.empty()) {
+ ldout(s->cct, 20) << "ERROR: One of role name or policy name is empty"<< dendl;
+ return -EINVAL;
+ }
+ return 0;
+}
+
+void RGWDeleteRolePolicy::execute()
+{
+ op_ret = get_params();
+ if (op_ret < 0) {
+ return;
+ }
+
+ op_ret = _role.delete_policy(policy_name);
+ if (op_ret == -ENOENT) {
+ op_ret = -ERR_NO_ROLE_FOUND;
+ }
+
+ if (op_ret == 0) {
+ op_ret = _role.update();
+ }
+
+ s->formatter->open_object_section("DeleteRolePoliciesResponse");
+ s->formatter->open_object_section("ResponseMetadata");
+ s->formatter->dump_string("RequestId", s->trans_id);
+ s->formatter->close_section();
+ s->formatter->close_section();
+}
diff --git a/src/rgw/rgw_rest_role.h b/src/rgw/rgw_rest_role.h
new file mode 100644
index 00000000..24e6bba6
--- /dev/null
+++ b/src/rgw/rgw_rest_role.h
@@ -0,0 +1,131 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_REST_ROLE_H
+#define CEPH_RGW_REST_ROLE_H
+
+#include "rgw_role.h"
+
+class RGWRestRole : public RGWRESTOp {
+protected:
+ string role_name;
+ string role_path;
+ string trust_policy;
+ string policy_name;
+ string perm_policy;
+ string path_prefix;
+ string max_session_duration;
+ RGWRole _role;
+public:
+ int verify_permission() override;
+ void send_response() override;
+ virtual uint64_t get_op() = 0;
+};
+
+class RGWRoleRead : public RGWRestRole {
+public:
+ RGWRoleRead() = default;
+ int check_caps(RGWUserCaps& caps) override;
+};
+
+class RGWRoleWrite : public RGWRestRole {
+public:
+ RGWRoleWrite() = default;
+ int check_caps(RGWUserCaps& caps) override;
+};
+
+class RGWCreateRole : public RGWRoleWrite {
+public:
+ RGWCreateRole() = default;
+ int verify_permission() override;
+ void execute() override;
+ int get_params();
+ const char* name() const override { return "create_role"; }
+ RGWOpType get_type() override { return RGW_OP_CREATE_ROLE; }
+ uint64_t get_op() { return rgw::IAM::iamCreateRole; }
+};
+
+class RGWDeleteRole : public RGWRoleWrite {
+public:
+ RGWDeleteRole() = default;
+ void execute() override;
+ int get_params();
+ const char* name() const override { return "delete_role"; }
+ RGWOpType get_type() override { return RGW_OP_DELETE_ROLE; }
+ uint64_t get_op() { return rgw::IAM::iamDeleteRole; }
+};
+
+class RGWGetRole : public RGWRoleRead {
+ int _verify_permission(const RGWRole& role);
+public:
+ RGWGetRole() = default;
+ int verify_permission() override;
+ void execute() override;
+ int get_params();
+ const char* name() const override { return "get_role"; }
+ RGWOpType get_type() override { return RGW_OP_GET_ROLE; }
+ uint64_t get_op() { return rgw::IAM::iamGetRole; }
+};
+
+class RGWModifyRole : public RGWRoleWrite {
+public:
+ RGWModifyRole() = default;
+ void execute() override;
+ int get_params();
+ const char* name() const override { return "modify_role"; }
+ RGWOpType get_type() override { return RGW_OP_MODIFY_ROLE; }
+ uint64_t get_op() { return rgw::IAM::iamModifyRole; }
+};
+
+class RGWListRoles : public RGWRoleRead {
+public:
+ RGWListRoles() = default;
+ int verify_permission() override;
+ void execute() override;
+ int get_params();
+ const char* name() const override { return "list_roles"; }
+ RGWOpType get_type() override { return RGW_OP_LIST_ROLES; }
+ uint64_t get_op() { return rgw::IAM::iamListRoles; }
+};
+
+class RGWPutRolePolicy : public RGWRoleWrite {
+public:
+ RGWPutRolePolicy() = default;
+ void execute() override;
+ int get_params();
+ const char* name() const override { return "put_role_policy"; }
+ RGWOpType get_type() override { return RGW_OP_PUT_ROLE_POLICY; }
+ uint64_t get_op() { return rgw::IAM::iamPutRolePolicy; }
+};
+
+class RGWGetRolePolicy : public RGWRoleRead {
+public:
+ RGWGetRolePolicy() = default;
+ void execute() override;
+ int get_params();
+ const char* name() const override { return "get_role_policy"; }
+ RGWOpType get_type() override { return RGW_OP_GET_ROLE_POLICY; }
+ uint64_t get_op() { return rgw::IAM::iamGetRolePolicy; }
+};
+
+class RGWListRolePolicies : public RGWRoleRead {
+public:
+ RGWListRolePolicies() = default;
+ void execute() override;
+ int get_params();
+ const char* name() const override { return "list_role_policies"; }
+ RGWOpType get_type() override { return RGW_OP_LIST_ROLE_POLICIES; }
+ uint64_t get_op() { return rgw::IAM::iamListRolePolicies; }
+};
+
+class RGWDeleteRolePolicy : public RGWRoleWrite {
+public:
+ RGWDeleteRolePolicy() = default;
+ void execute() override;
+ int get_params();
+ const char* name() const override { return "delete_role_policy"; }
+ RGWOpType get_type() override { return RGW_OP_DELETE_ROLE_POLICY; }
+ uint64_t get_op() { return rgw::IAM::iamDeleteRolePolicy; }
+};
+#endif /* CEPH_RGW_REST_ROLE_H */
+
diff --git a/src/rgw/rgw_rest_s3.cc b/src/rgw/rgw_rest_s3.cc
new file mode 100644
index 00000000..f25890f5
--- /dev/null
+++ b/src/rgw/rgw_rest_s3.cc
@@ -0,0 +1,5133 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <errno.h>
+#include <array>
+#include <string.h>
+
+#include "common/ceph_crypto.h"
+#include "common/Formatter.h"
+#include "common/utf8.h"
+#include "common/ceph_json.h"
+#include "common/safe_io.h"
+#include "auth/Crypto.h"
+#include <boost/algorithm/string.hpp>
+#include <boost/algorithm/string/replace.hpp>
+#include <boost/utility/string_view.hpp>
+#include <boost/tokenizer.hpp>
+
+#include <liboath/oath.h>
+
+#include "rgw_rest.h"
+#include "rgw_rest_s3.h"
+#include "rgw_rest_s3website.h"
+#include "rgw_rest_pubsub.h"
+#include "rgw_auth_s3.h"
+#include "rgw_acl.h"
+#include "rgw_policy_s3.h"
+#include "rgw_user.h"
+#include "rgw_cors.h"
+#include "rgw_cors_s3.h"
+#include "rgw_tag_s3.h"
+
+#include "rgw_client_io.h"
+
+#include "rgw_keystone.h"
+#include "rgw_auth_keystone.h"
+#include "rgw_auth_registry.h"
+
+#include "rgw_es_query.h"
+
+#include <typeinfo> // for 'typeid'
+
+#include "rgw_ldap.h"
+#include "rgw_token.h"
+#include "rgw_rest_role.h"
+#include "rgw_crypt.h"
+#include "rgw_crypt_sanitize.h"
+#include "rgw_rest_user_policy.h"
+#include "rgw_zone.h"
+
+#include "services/svc_zone.h"
+
+#include "include/ceph_assert.h"
+#include "rgw_role.h"
+#include "rgw_rest_sts.h"
+#include "rgw_rest_iam.h"
+#include "rgw_sts.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+using namespace rgw;
+using namespace ceph::crypto;
+
+using std::get;
+
+void list_all_buckets_start(struct req_state *s)
+{
+ s->formatter->open_array_section_in_ns("ListAllMyBucketsResult", XMLNS_AWS_S3);
+}
+
+void list_all_buckets_end(struct req_state *s)
+{
+ s->formatter->close_section();
+}
+
+void dump_bucket(struct req_state *s, RGWBucketEnt& obj)
+{
+ s->formatter->open_object_section("Bucket");
+ s->formatter->dump_string("Name", obj.bucket.name);
+ dump_time(s, "CreationDate", &obj.creation_time);
+ s->formatter->close_section();
+}
+
+void rgw_get_errno_s3(rgw_http_error *e , int err_no)
+{
+ rgw_http_errors::const_iterator r = rgw_http_s3_errors.find(err_no);
+
+ if (r != rgw_http_s3_errors.end()) {
+ e->http_ret = r->second.first;
+ e->s3_code = r->second.second;
+ } else {
+ e->http_ret = 500;
+ e->s3_code = "UnknownError";
+ }
+}
+
+static inline std::string get_s3_expiration_header(
+ struct req_state* s,
+ const ceph::real_time& mtime)
+{
+ return rgw::lc::s3_expiration_header(
+ s, s->object, s->tagset, mtime, s->bucket_attrs);
+}
+
+struct response_attr_param {
+ const char *param;
+ const char *http_attr;
+};
+
+static struct response_attr_param resp_attr_params[] = {
+ {"response-content-type", "Content-Type"},
+ {"response-content-language", "Content-Language"},
+ {"response-expires", "Expires"},
+ {"response-cache-control", "Cache-Control"},
+ {"response-content-disposition", "Content-Disposition"},
+ {"response-content-encoding", "Content-Encoding"},
+ {NULL, NULL},
+};
+
+int RGWGetObj_ObjStore_S3Website::send_response_data(bufferlist& bl, off_t bl_ofs, off_t bl_len) {
+ map<string, bufferlist>::iterator iter;
+ iter = attrs.find(RGW_ATTR_AMZ_WEBSITE_REDIRECT_LOCATION);
+ if (iter != attrs.end()) {
+ bufferlist &bl = iter->second;
+ s->redirect = bl.c_str();
+ s->err.http_ret = 301;
+ ldout(s->cct, 20) << __CEPH_ASSERT_FUNCTION << " redirecting per x-amz-website-redirect-location=" << s->redirect << dendl;
+ op_ret = -ERR_WEBSITE_REDIRECT;
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+ dump_content_length(s, 0);
+ dump_redirect(s, s->redirect);
+ end_header(s, this);
+ return op_ret;
+ } else {
+ return RGWGetObj_ObjStore_S3::send_response_data(bl, bl_ofs, bl_len);
+ }
+}
+
+int RGWGetObj_ObjStore_S3Website::send_response_data_error()
+{
+ return RGWGetObj_ObjStore_S3::send_response_data_error();
+}
+
+int RGWGetObj_ObjStore_S3::get_params()
+{
+ // for multisite sync requests, only read the slo manifest itself, rather than
+ // all of the data from its parts. the parts will sync as separate objects
+ skip_manifest = s->info.args.exists(RGW_SYS_PARAM_PREFIX "sync-manifest");
+
+ // multisite sync requests should fetch encrypted data, along with the
+ // attributes needed to support decryption on the other zone
+ if (s->system_request) {
+ skip_decrypt = s->info.args.exists(RGW_SYS_PARAM_PREFIX "skip-decrypt");
+ }
+
+ return RGWGetObj_ObjStore::get_params();
+}
+
+int RGWGetObj_ObjStore_S3::send_response_data_error()
+{
+ bufferlist bl;
+ return send_response_data(bl, 0 , 0);
+}
+
+template <class T>
+int decode_attr_bl_single_value(map<string, bufferlist>& attrs, const char *attr_name, T *result, T def_val)
+{
+ map<string, bufferlist>::iterator iter = attrs.find(attr_name);
+ if (iter == attrs.end()) {
+ *result = def_val;
+ return 0;
+ }
+ bufferlist& bl = iter->second;
+ if (bl.length() == 0) {
+ *result = def_val;
+ return 0;
+ }
+ auto bliter = bl.cbegin();
+ try {
+ decode(*result, bliter);
+ } catch (buffer::error& err) {
+ return -EIO;
+ }
+ return 0;
+}
+
+inline bool str_has_cntrl(const std::string s) {
+ return std::any_of(s.begin(), s.end(), ::iscntrl);
+}
+
+inline bool str_has_cntrl(const char* s) {
+ std::string _s(s);
+ return str_has_cntrl(_s);
+}
+
+int RGWGetObj_ObjStore_S3::send_response_data(bufferlist& bl, off_t bl_ofs,
+ off_t bl_len)
+{
+ const char *content_type = NULL;
+ string content_type_str;
+ map<string, string> response_attrs;
+ map<string, string>::iterator riter;
+ bufferlist metadata_bl;
+
+ string expires = get_s3_expiration_header(s, lastmod);
+
+ if (sent_header)
+ goto send_data;
+
+ if (custom_http_ret) {
+ set_req_state_err(s, 0);
+ dump_errno(s, custom_http_ret);
+ } else {
+ set_req_state_err(s, (partial_content && !op_ret) ? STATUS_PARTIAL_CONTENT
+ : op_ret);
+ dump_errno(s);
+ }
+
+ if (op_ret)
+ goto done;
+
+ if (range_str)
+ dump_range(s, start, end, s->obj_size);
+
+ if (s->system_request &&
+ s->info.args.exists(RGW_SYS_PARAM_PREFIX "prepend-metadata")) {
+
+ dump_header(s, "Rgwx-Object-Size", (long long)total_len);
+
+ if (rgwx_stat) {
+ /*
+ * in this case, we're not returning the object's content, only the prepended
+ * extra metadata
+ */
+ total_len = 0;
+ }
+
+ /* JSON encode object metadata */
+ JSONFormatter jf;
+ jf.open_object_section("obj_metadata");
+ encode_json("attrs", attrs, &jf);
+ utime_t ut(lastmod);
+ encode_json("mtime", ut, &jf);
+ jf.close_section();
+ stringstream ss;
+ jf.flush(ss);
+ metadata_bl.append(ss.str());
+ dump_header(s, "Rgwx-Embedded-Metadata-Len", metadata_bl.length());
+ total_len += metadata_bl.length();
+ }
+
+ if (s->system_request && !real_clock::is_zero(lastmod)) {
+ /* we end up dumping mtime in two different methods, a bit redundant */
+ dump_epoch_header(s, "Rgwx-Mtime", lastmod);
+ uint64_t pg_ver = 0;
+ int r = decode_attr_bl_single_value(attrs, RGW_ATTR_PG_VER, &pg_ver, (uint64_t)0);
+ if (r < 0) {
+ ldout(s->cct, 0) << "ERROR: failed to decode pg ver attr, ignoring" << dendl;
+ }
+ dump_header(s, "Rgwx-Obj-PG-Ver", pg_ver);
+
+ uint32_t source_zone_short_id = 0;
+ r = decode_attr_bl_single_value(attrs, RGW_ATTR_SOURCE_ZONE, &source_zone_short_id, (uint32_t)0);
+ if (r < 0) {
+ ldout(s->cct, 0) << "ERROR: failed to decode pg ver attr, ignoring" << dendl;
+ }
+ if (source_zone_short_id != 0) {
+ dump_header(s, "Rgwx-Source-Zone-Short-Id", source_zone_short_id);
+ }
+ }
+
+ for (auto &it : crypt_http_responses)
+ dump_header(s, it.first, it.second);
+
+ dump_content_length(s, total_len);
+ dump_last_modified(s, lastmod);
+ dump_header_if_nonempty(s, "x-amz-version-id", version_id);
+ dump_header_if_nonempty(s, "x-amz-expiration", expires);
+
+ if (attrs.find(RGW_ATTR_APPEND_PART_NUM) != attrs.end()) {
+ dump_header(s, "x-rgw-object-type", "Appendable");
+ dump_header(s, "x-rgw-next-append-position", s->obj_size);
+ } else {
+ dump_header(s, "x-rgw-object-type", "Normal");
+ }
+
+ if (! op_ret) {
+ if (! lo_etag.empty()) {
+ /* Handle etag of Swift API's large objects (DLO/SLO). It's entirerly
+ * legit to perform GET on them through S3 API. In such situation,
+ * a client should receive the composited content with corresponding
+ * etag value. */
+ dump_etag(s, lo_etag);
+ } else {
+ auto iter = attrs.find(RGW_ATTR_ETAG);
+ if (iter != attrs.end()) {
+ dump_etag(s, iter->second.to_str());
+ }
+ }
+
+ for (struct response_attr_param *p = resp_attr_params; p->param; p++) {
+ bool exists;
+ string val = s->info.args.get(p->param, &exists);
+ if (exists) {
+ /* reject unauthenticated response header manipulation, see
+ * https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetObject.html */
+ if (s->auth.identity->is_anonymous()) {
+ return -ERR_INVALID_REQUEST;
+ }
+ /* HTTP specification says no control characters should be present in
+ * header values: https://tools.ietf.org/html/rfc7230#section-3.2
+ * field-vchar = VCHAR / obs-text
+ *
+ * Failure to validate this permits a CRLF injection in HTTP headers,
+ * whereas S3 GetObject only permits specific headers.
+ */
+ if(str_has_cntrl(val)) {
+ /* TODO: return a more distinct error in future;
+ * stating what the problem is */
+ return -ERR_INVALID_REQUEST;
+ }
+
+ if (strcmp(p->param, "response-content-type") != 0) {
+ response_attrs[p->http_attr] = val;
+ } else {
+ content_type_str = val;
+ content_type = content_type_str.c_str();
+ }
+ }
+ }
+
+ for (auto iter = attrs.begin(); iter != attrs.end(); ++iter) {
+ const char *name = iter->first.c_str();
+ map<string, string>::iterator aiter = rgw_to_http_attrs.find(name);
+ if (aiter != rgw_to_http_attrs.end()) {
+ if (response_attrs.count(aiter->second) == 0) {
+ /* Was not already overridden by a response param. */
+
+ size_t len = iter->second.length();
+ string s(iter->second.c_str(), len);
+ while (len && !s[len - 1]) {
+ --len;
+ s.resize(len);
+ }
+ response_attrs[aiter->second] = s;
+ }
+ } else if (iter->first.compare(RGW_ATTR_CONTENT_TYPE) == 0) {
+ /* Special handling for content_type. */
+ if (!content_type) {
+ content_type_str = rgw_bl_str(iter->second);
+ content_type = content_type_str.c_str();
+ }
+ } else if (strcmp(name, RGW_ATTR_SLO_UINDICATOR) == 0) {
+ // this attr has an extra length prefix from encode() in prior versions
+ dump_header(s, "X-Object-Meta-Static-Large-Object", "True");
+ } else if (strncmp(name, RGW_ATTR_META_PREFIX,
+ sizeof(RGW_ATTR_META_PREFIX)-1) == 0) {
+ /* User custom metadata. */
+ name += sizeof(RGW_ATTR_PREFIX) - 1;
+ dump_header(s, name, iter->second);
+ } else if (iter->first.compare(RGW_ATTR_TAGS) == 0) {
+ RGWObjTags obj_tags;
+ try{
+ auto it = iter->second.cbegin();
+ obj_tags.decode(it);
+ } catch (buffer::error &err) {
+ ldout(s->cct,0) << "Error caught buffer::error couldn't decode TagSet " << dendl;
+ }
+ dump_header(s, RGW_AMZ_TAG_COUNT, obj_tags.count());
+ } else if (iter->first.compare(RGW_ATTR_OBJECT_RETENTION) == 0 && get_retention){
+ RGWObjectRetention retention;
+ try {
+ decode(retention, iter->second);
+ dump_header(s, "x-amz-object-lock-mode", retention.get_mode());
+ dump_time_header(s, "x-amz-object-lock-retain-until-date", retention.get_retain_until_date());
+ } catch (buffer::error& err) {
+ ldpp_dout(this, 0) << "ERROR: failed to decode RGWObjectRetention" << dendl;
+ }
+ } else if (iter->first.compare(RGW_ATTR_OBJECT_LEGAL_HOLD) == 0 && get_legal_hold) {
+ RGWObjectLegalHold legal_hold;
+ try {
+ decode(legal_hold, iter->second);
+ dump_header(s, "x-amz-object-lock-legal-hold",legal_hold.get_status());
+ } catch (buffer::error& err) {
+ ldpp_dout(this, 0) << "ERROR: failed to decode RGWObjectLegalHold" << dendl;
+ }
+ }
+ }
+ }
+
+done:
+ for (riter = response_attrs.begin(); riter != response_attrs.end();
+ ++riter) {
+ dump_header(s, riter->first, riter->second);
+ }
+
+ if (op_ret == -ERR_NOT_MODIFIED) {
+ end_header(s, this);
+ } else {
+ if (!content_type)
+ content_type = "binary/octet-stream";
+
+ end_header(s, this, content_type);
+ }
+
+ if (metadata_bl.length()) {
+ dump_body(s, metadata_bl);
+ }
+ sent_header = true;
+
+send_data:
+ if (get_data && !op_ret) {
+ int r = dump_body(s, bl.c_str() + bl_ofs, bl_len);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int RGWGetObj_ObjStore_S3::get_decrypt_filter(std::unique_ptr<RGWGetObj_Filter> *filter, RGWGetObj_Filter* cb, bufferlist* manifest_bl)
+{
+ if (skip_decrypt) { // bypass decryption for multisite sync requests
+ return 0;
+ }
+
+ int res = 0;
+ std::unique_ptr<BlockCrypt> block_crypt;
+ res = rgw_s3_prepare_decrypt(s, attrs, &block_crypt, crypt_http_responses);
+ if (res == 0) {
+ if (block_crypt != nullptr) {
+ auto f = std::make_unique<RGWGetObj_BlockDecrypt>(s->cct, cb, std::move(block_crypt));
+ if (manifest_bl != nullptr) {
+ res = f->read_manifest(*manifest_bl);
+ if (res == 0) {
+ *filter = std::move(f);
+ }
+ }
+ }
+ }
+ return res;
+}
+
+void RGWGetObjTags_ObjStore_S3::send_response_data(bufferlist& bl)
+{
+ dump_errno(s);
+ end_header(s, this, "application/xml");
+ dump_start(s);
+
+ s->formatter->open_object_section_in_ns("Tagging", XMLNS_AWS_S3);
+ s->formatter->open_object_section("TagSet");
+ if (has_tags){
+ RGWObjTagSet_S3 tagset;
+ auto iter = bl.cbegin();
+ try {
+ tagset.decode(iter);
+ } catch (buffer::error& err) {
+ ldout(s->cct,0) << "ERROR: caught buffer::error, couldn't decode TagSet" << dendl;
+ op_ret= -EIO;
+ return;
+ }
+ tagset.dump_xml(s->formatter);
+ }
+ s->formatter->close_section();
+ s->formatter->close_section();
+ rgw_flush_formatter_and_reset(s, s->formatter);
+}
+
+
+int RGWPutObjTags_ObjStore_S3::get_params()
+{
+ RGWXMLParser parser;
+
+ if (!parser.init()){
+ return -EINVAL;
+ }
+
+ const auto max_size = s->cct->_conf->rgw_max_put_param_size;
+
+ int r = 0;
+ bufferlist data;
+ std::tie(r, data) = rgw_rest_read_all_input(s, max_size, false);
+
+ if (r < 0)
+ return r;
+
+ if (!parser.parse(data.c_str(), data.length(), 1)) {
+ return -ERR_MALFORMED_XML;
+ }
+
+ RGWObjTagging_S3 tagging;
+
+ try {
+ RGWXMLDecoder::decode_xml("Tagging", tagging, &parser);
+ } catch (RGWXMLDecoder::err& err) {
+ ldout(s->cct, 5) << "Malformed tagging request: " << err << dendl;
+ return -ERR_MALFORMED_XML;
+ }
+
+ RGWObjTags obj_tags;
+ r = tagging.rebuild(obj_tags);
+ if (r < 0)
+ return r;
+
+ obj_tags.encode(tags_bl);
+ ldout(s->cct, 20) << "Read " << obj_tags.count() << "tags" << dendl;
+
+ return 0;
+}
+
+void RGWPutObjTags_ObjStore_S3::send_response()
+{
+ if (op_ret)
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+ end_header(s, this, "application/xml");
+ dump_start(s);
+
+}
+
+void RGWDeleteObjTags_ObjStore_S3::send_response()
+{
+ int r = op_ret;
+ if (r == -ENOENT)
+ r = 0;
+ if (!r)
+ r = STATUS_NO_CONTENT;
+
+ set_req_state_err(s, r);
+ dump_errno(s);
+ end_header(s, this);
+}
+
+void RGWListBuckets_ObjStore_S3::send_response_begin(bool has_buckets)
+{
+ if (op_ret)
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+ dump_start(s);
+ // Explicitly use chunked transfer encoding so that we can stream the result
+ // to the user without having to wait for the full length of it.
+ end_header(s, NULL, "application/xml", CHUNKED_TRANSFER_ENCODING);
+
+ if (! op_ret) {
+ list_all_buckets_start(s);
+ dump_owner(s, s->user->user_id, s->user->display_name);
+ s->formatter->open_array_section("Buckets");
+ sent_data = true;
+ }
+}
+
+void RGWListBuckets_ObjStore_S3::send_response_data(RGWUserBuckets& buckets)
+{
+ if (!sent_data)
+ return;
+
+ map<string, RGWBucketEnt>& m = buckets.get_buckets();
+ map<string, RGWBucketEnt>::iterator iter;
+
+ for (iter = m.begin(); iter != m.end(); ++iter) {
+ RGWBucketEnt obj = iter->second;
+ dump_bucket(s, obj);
+ }
+ rgw_flush_formatter(s, s->formatter);
+}
+
+void RGWListBuckets_ObjStore_S3::send_response_end()
+{
+ if (sent_data) {
+ s->formatter->close_section();
+ list_all_buckets_end(s);
+ rgw_flush_formatter_and_reset(s, s->formatter);
+ }
+}
+
+int RGWGetUsage_ObjStore_S3::get_params()
+{
+ start_date = s->info.args.get("start-date");
+ end_date = s->info.args.get("end-date");
+ return 0;
+}
+
+static void dump_usage_categories_info(Formatter *formatter, const rgw_usage_log_entry& entry, map<string, bool> *categories)
+{
+ formatter->open_array_section("categories");
+ map<string, rgw_usage_data>::const_iterator uiter;
+ for (uiter = entry.usage_map.begin(); uiter != entry.usage_map.end(); ++uiter) {
+ if (categories && !categories->empty() && !categories->count(uiter->first))
+ continue;
+ const rgw_usage_data& usage = uiter->second;
+ formatter->open_object_section("Entry");
+ formatter->dump_string("Category", uiter->first);
+ formatter->dump_int("BytesSent", usage.bytes_sent);
+ formatter->dump_int("BytesReceived", usage.bytes_received);
+ formatter->dump_int("Ops", usage.ops);
+ formatter->dump_int("SuccessfulOps", usage.successful_ops);
+ formatter->close_section(); // Entry
+ }
+ formatter->close_section(); // Category
+}
+
+static void dump_usage_bucket_info(Formatter *formatter, const std::string& name, const cls_user_bucket_entry& entry)
+{
+ formatter->open_object_section("Entry");
+ formatter->dump_string("Bucket", name);
+ formatter->dump_int("Bytes", entry.size);
+ formatter->dump_int("Bytes_Rounded", entry.size_rounded);
+ formatter->close_section(); // entry
+}
+
+void RGWGetUsage_ObjStore_S3::send_response()
+{
+ if (op_ret < 0)
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+
+ // Explicitly use chunked transfer encoding so that we can stream the result
+ // to the user without having to wait for the full length of it.
+ end_header(s, this, "application/xml", CHUNKED_TRANSFER_ENCODING);
+ dump_start(s);
+ if (op_ret < 0)
+ return;
+
+ Formatter *formatter = s->formatter;
+ string last_owner;
+ bool user_section_open = false;
+
+ formatter->open_object_section("Usage");
+ if (show_log_entries) {
+ formatter->open_array_section("Entries");
+ }
+ map<rgw_user_bucket, rgw_usage_log_entry>::iterator iter;
+ for (iter = usage.begin(); iter != usage.end(); ++iter) {
+ const rgw_user_bucket& ub = iter->first;
+ const rgw_usage_log_entry& entry = iter->second;
+
+ if (show_log_entries) {
+ if (ub.user.compare(last_owner) != 0) {
+ if (user_section_open) {
+ formatter->close_section();
+ formatter->close_section();
+ }
+ formatter->open_object_section("User");
+ formatter->dump_string("Owner", ub.user);
+ formatter->open_array_section("Buckets");
+ user_section_open = true;
+ last_owner = ub.user;
+ }
+ formatter->open_object_section("Bucket");
+ formatter->dump_string("Bucket", ub.bucket);
+ utime_t ut(entry.epoch, 0);
+ ut.gmtime(formatter->dump_stream("Time"));
+ formatter->dump_int("Epoch", entry.epoch);
+ dump_usage_categories_info(formatter, entry, &categories);
+ formatter->close_section(); // bucket
+ }
+
+ summary_map[ub.user].aggregate(entry, &categories);
+ }
+
+ if (show_log_entries) {
+ if (user_section_open) {
+ formatter->close_section(); // buckets
+ formatter->close_section(); //user
+ }
+ formatter->close_section(); // entries
+ }
+
+ if (show_log_sum) {
+ formatter->open_array_section("Summary");
+ map<string, rgw_usage_log_entry>::iterator siter;
+ for (siter = summary_map.begin(); siter != summary_map.end(); ++siter) {
+ const rgw_usage_log_entry& entry = siter->second;
+ formatter->open_object_section("User");
+ formatter->dump_string("User", siter->first);
+ dump_usage_categories_info(formatter, entry, &categories);
+ rgw_usage_data total_usage;
+ entry.sum(total_usage, categories);
+ formatter->open_object_section("Total");
+ formatter->dump_int("BytesSent", total_usage.bytes_sent);
+ formatter->dump_int("BytesReceived", total_usage.bytes_received);
+ formatter->dump_int("Ops", total_usage.ops);
+ formatter->dump_int("SuccessfulOps", total_usage.successful_ops);
+ formatter->close_section(); // total
+ formatter->close_section(); // user
+ }
+
+ if (s->cct->_conf->rgw_rest_getusage_op_compat) {
+ formatter->open_object_section("Stats");
+ }
+
+ formatter->dump_int("TotalBytes", header.stats.total_bytes);
+ formatter->dump_int("TotalBytesRounded", header.stats.total_bytes_rounded);
+ formatter->dump_int("TotalEntries", header.stats.total_entries);
+
+ if (s->cct->_conf->rgw_rest_getusage_op_compat) {
+ formatter->close_section(); //Stats
+ }
+
+ formatter->close_section(); // summary
+ }
+
+ formatter->open_array_section("CapacityUsed");
+ formatter->open_object_section("User");
+ formatter->open_array_section("Buckets");
+ for (const auto& biter : buckets_usage) {
+ const cls_user_bucket_entry& entry = biter.second;
+ dump_usage_bucket_info(formatter, biter.first, entry);
+ }
+ formatter->close_section(); // Buckets
+ formatter->close_section(); // User
+ formatter->close_section(); // CapacityUsed
+
+ formatter->close_section(); // usage
+ rgw_flush_formatter_and_reset(s, s->formatter);
+}
+
+int RGWListBucket_ObjStore_S3::get_common_params()
+{
+ list_versions = s->info.args.exists("versions");
+ prefix = s->info.args.get("prefix");
+
+ // non-standard
+ s->info.args.get_bool("allow-unordered", &allow_unordered, false);
+ delimiter = s->info.args.get("delimiter");
+ max_keys = s->info.args.get("max-keys");
+ op_ret = parse_max_keys();
+ if (op_ret < 0) {
+ return op_ret;
+ }
+ encoding_type = s->info.args.get("encoding-type");
+ if (s->system_request) {
+ s->info.args.get_bool("objs-container", &objs_container, false);
+ const char *shard_id_str = s->info.env->get("HTTP_RGWX_SHARD_ID");
+ if (shard_id_str) {
+ string err;
+ shard_id = strict_strtol(shard_id_str, 10, &err);
+ if (!err.empty()) {
+ ldout(s->cct, 5) << "bad shard id specified: " << shard_id_str << dendl;
+ return -EINVAL;
+ }
+ } else {
+ shard_id = s->bucket_instance_shard_id;
+ }
+ }
+ return 0;
+}
+
+int RGWListBucket_ObjStore_S3::get_params()
+{
+ int ret = get_common_params();
+ if (ret < 0) {
+ return ret;
+ }
+ if (!list_versions) {
+ marker = s->info.args.get("marker");
+ } else {
+ marker.name = s->info.args.get("key-marker");
+ marker.instance = s->info.args.get("version-id-marker");
+ }
+ return 0;
+}
+
+int RGWListBucket_ObjStore_S3v2::get_params()
+{
+int ret = get_common_params();
+if (ret < 0) {
+ return ret;
+}
+s->info.args.get_bool("fetch-owner", &fetchOwner, false);
+startAfter = s->info.args.get("start-after", &start_after_exist);
+continuation_token = s->info.args.get("continuation-token", &continuation_token_exist);
+if(!continuation_token_exist) {
+ marker = startAfter;
+} else {
+ marker = continuation_token;
+}
+return 0;
+}
+
+void RGWListBucket_ObjStore_S3::send_common_versioned_response()
+{
+ if (!s->bucket_tenant.empty()) {
+ s->formatter->dump_string("Tenant", s->bucket_tenant);
+ }
+ s->formatter->dump_string("Name", s->bucket_name);
+ s->formatter->dump_string("Prefix", prefix);
+ s->formatter->dump_int("MaxKeys", max);
+ if (!delimiter.empty()) {
+ s->formatter->dump_string("Delimiter", delimiter);
+ }
+ s->formatter->dump_string("IsTruncated", (max && is_truncated ? "true"
+ : "false"));
+
+ if (!common_prefixes.empty()) {
+ map<string, bool>::iterator pref_iter;
+ for (pref_iter = common_prefixes.begin();
+ pref_iter != common_prefixes.end(); ++pref_iter) {
+ s->formatter->open_array_section("CommonPrefixes");
+ if (encode_key) {
+ s->formatter->dump_string("Prefix", url_encode(pref_iter->first, false));
+ } else {
+ s->formatter->dump_string("Prefix", pref_iter->first);
+ }
+
+ s->formatter->close_section();
+ }
+ }
+ }
+
+void RGWListBucket_ObjStore_S3::send_versioned_response()
+{
+ s->formatter->open_object_section_in_ns("ListVersionsResult", XMLNS_AWS_S3);
+ if (strcasecmp(encoding_type.c_str(), "url") == 0) {
+ s->formatter->dump_string("EncodingType", "url");
+ encode_key = true;
+ }
+ RGWListBucket_ObjStore_S3::send_common_versioned_response();
+ s->formatter->dump_string("KeyMarker", marker.name);
+ s->formatter->dump_string("VersionIdMarker", marker.instance);
+ if (is_truncated && !next_marker.empty()) {
+ s->formatter->dump_string("NextKeyMarker", next_marker.name);
+ if (next_marker.instance.empty()) {
+ s->formatter->dump_string("NextVersionIdMarker", "null");
+ }
+ else {
+ s->formatter->dump_string("NextVersionIdMarker", next_marker.instance);
+ }
+ }
+
+ if (op_ret >= 0) {
+ if (objs_container) {
+ s->formatter->open_array_section("Entries");
+ }
+
+ vector<rgw_bucket_dir_entry>::iterator iter;
+ for (iter = objs.begin(); iter != objs.end(); ++iter) {
+ const char *section_name = (iter->is_delete_marker() ? "DeleteMarker"
+ : "Version");
+ s->formatter->open_object_section(section_name);
+ if (objs_container) {
+ s->formatter->dump_bool("IsDeleteMarker", iter->is_delete_marker());
+ }
+ rgw_obj_key key(iter->key);
+ if (encode_key) {
+ string key_name;
+ url_encode(key.name, key_name);
+ s->formatter->dump_string("Key", key_name);
+ }
+ else {
+ s->formatter->dump_string("Key", key.name);
+ }
+ string version_id = key.instance;
+ if (version_id.empty()) {
+ version_id = "null";
+ }
+ if (s->system_request) {
+ if (iter->versioned_epoch > 0) {
+ s->formatter->dump_int("VersionedEpoch", iter->versioned_epoch);
+ }
+ s->formatter->dump_string("RgwxTag", iter->tag);
+ utime_t ut(iter->meta.mtime);
+ ut.gmtime_nsec(s->formatter->dump_stream("RgwxMtime"));
+ }
+ s->formatter->dump_string("VersionId", version_id);
+ s->formatter->dump_bool("IsLatest", iter->is_current());
+ dump_time(s, "LastModified", &iter->meta.mtime);
+ if (!iter->is_delete_marker()) {
+ s->formatter->dump_format("ETag", "\"%s\"", iter->meta.etag.c_str());
+ s->formatter->dump_int("Size", iter->meta.accounted_size);
+ auto& storage_class = rgw_placement_rule::get_canonical_storage_class(iter->meta.storage_class);
+ s->formatter->dump_string("StorageClass", storage_class.c_str());
+ }
+ dump_owner(s, iter->meta.owner, iter->meta.owner_display_name);
+ if (iter->meta.appendable) {
+ s->formatter->dump_string("Type", "Appendable");
+ } else {
+ s->formatter->dump_string("Type", "Normal");
+ }
+ s->formatter->close_section(); // Version/DeleteMarker
+ }
+ if (objs_container) {
+ s->formatter->close_section(); // Entries
+ }
+ s->formatter->close_section(); // ListVersionsResult
+ }
+ rgw_flush_formatter_and_reset(s, s->formatter);
+}
+
+
+void RGWListBucket_ObjStore_S3::send_common_response()
+{
+ if (!s->bucket_tenant.empty()) {
+ s->formatter->dump_string("Tenant", s->bucket_tenant);
+ }
+ s->formatter->dump_string("Name", s->bucket_name);
+ s->formatter->dump_string("Prefix", prefix);
+ s->formatter->dump_int("MaxKeys", max);
+ if (!delimiter.empty()) {
+ s->formatter->dump_string("Delimiter", delimiter);
+ }
+ s->formatter->dump_string("IsTruncated", (max && is_truncated ? "true"
+ : "false"));
+
+ if (!common_prefixes.empty()) {
+ map<string, bool>::iterator pref_iter;
+ for (pref_iter = common_prefixes.begin();
+ pref_iter != common_prefixes.end(); ++pref_iter) {
+ s->formatter->open_array_section("CommonPrefixes");
+ if (encode_key) {
+ s->formatter->dump_string("Prefix", url_encode(pref_iter->first, false));
+ } else {
+ s->formatter->dump_string("Prefix", pref_iter->first);
+ }
+ s->formatter->close_section();
+ }
+ }
+ }
+
+void RGWListBucket_ObjStore_S3::send_response()
+{
+ if (op_ret < 0) {
+ set_req_state_err(s, op_ret);
+ }
+ dump_errno(s);
+
+ // Explicitly use chunked transfer encoding so that we can stream the result
+ // to the user without having to wait for the full length of it.
+ end_header(s, this, "application/xml", CHUNKED_TRANSFER_ENCODING);
+ dump_start(s);
+ if (op_ret < 0) {
+ return;
+ }
+ if (list_versions) {
+ send_versioned_response();
+ return;
+ }
+
+ s->formatter->open_object_section_in_ns("ListBucketResult", XMLNS_AWS_S3);
+ if (strcasecmp(encoding_type.c_str(), "url") == 0) {
+ s->formatter->dump_string("EncodingType", "url");
+ encode_key = true;
+ }
+ RGWListBucket_ObjStore_S3::send_common_response();
+ if (op_ret >= 0) {
+ vector<rgw_bucket_dir_entry>::iterator iter;
+ for (iter = objs.begin(); iter != objs.end(); ++iter) {
+ rgw_obj_key key(iter->key);
+ s->formatter->open_array_section("Contents");
+ if (encode_key) {
+ string key_name;
+ url_encode(key.name, key_name);
+ s->formatter->dump_string("Key", key_name);
+ } else {
+ s->formatter->dump_string("Key", key.name);
+ }
+ dump_time(s, "LastModified", &iter->meta.mtime);
+ s->formatter->dump_format("ETag", "\"%s\"", iter->meta.etag.c_str());
+ s->formatter->dump_int("Size", iter->meta.accounted_size);
+ auto& storage_class = rgw_placement_rule::get_canonical_storage_class(iter->meta.storage_class);
+ s->formatter->dump_string("StorageClass", storage_class.c_str());
+ dump_owner(s, iter->meta.owner, iter->meta.owner_display_name);
+ if (s->system_request) {
+ s->formatter->dump_string("RgwxTag", iter->tag);
+ }
+ if (iter->meta.appendable) {
+ s->formatter->dump_string("Type", "Appendable");
+ } else {
+ s->formatter->dump_string("Type", "Normal");
+ }
+ s->formatter->close_section();
+ }
+ }
+ s->formatter->dump_string("Marker", marker.name);
+ if (is_truncated && !next_marker.empty()) {
+ s->formatter->dump_string("NextMarker", next_marker.name);
+ }
+ s->formatter->close_section();
+ rgw_flush_formatter_and_reset(s, s->formatter);
+}
+
+void RGWListBucket_ObjStore_S3v2::send_versioned_response()
+{
+ s->formatter->open_object_section_in_ns("ListVersionsResult", XMLNS_AWS_S3);
+ RGWListBucket_ObjStore_S3v2::send_common_versioned_response();
+ s->formatter->dump_string("KeyContinuationToken", marker.name);
+ s->formatter->dump_string("VersionIdContinuationToken", marker.instance);
+ if (is_truncated && !next_marker.empty()) {
+ s->formatter->dump_string("NextKeyContinuationToken", next_marker.name);
+ s->formatter->dump_string("NextVersionIdContinuationToken", next_marker.instance);
+ }
+
+ if (strcasecmp(encoding_type.c_str(), "url") == 0) {
+ s->formatter->dump_string("EncodingType", "url");
+ encode_key = true;
+ }
+
+ if (op_ret >= 0) {
+ if (objs_container) {
+ s->formatter->open_array_section("Entries");
+ }
+
+ vector<rgw_bucket_dir_entry>::iterator iter;
+ for (iter = objs.begin(); iter != objs.end(); ++iter) {
+ const char *section_name = (iter->is_delete_marker() ? "DeleteContinuationToken"
+ : "Version");
+ s->formatter->open_object_section(section_name);
+ if (objs_container) {
+ s->formatter->dump_bool("IsDeleteContinuationToken", iter->is_delete_marker());
+ }
+ rgw_obj_key key(iter->key);
+ if (encode_key) {
+ string key_name;
+ url_encode(key.name, key_name);
+ s->formatter->dump_string("Key", key_name);
+ }
+ else {
+ s->formatter->dump_string("Key", key.name);
+ }
+ string version_id = key.instance;
+ if (version_id.empty()) {
+ version_id = "null";
+ }
+ if (s->system_request) {
+ if (iter->versioned_epoch > 0) {
+ s->formatter->dump_int("VersionedEpoch", iter->versioned_epoch);
+ }
+ s->formatter->dump_string("RgwxTag", iter->tag);
+ utime_t ut(iter->meta.mtime);
+ ut.gmtime_nsec(s->formatter->dump_stream("RgwxMtime"));
+ }
+ s->formatter->dump_string("VersionId", version_id);
+ s->formatter->dump_bool("IsLatest", iter->is_current());
+ dump_time(s, "LastModified", &iter->meta.mtime);
+ if (!iter->is_delete_marker()) {
+ s->formatter->dump_format("ETag", "\"%s\"", iter->meta.etag.c_str());
+ s->formatter->dump_int("Size", iter->meta.accounted_size);
+ auto& storage_class = rgw_placement_rule::get_canonical_storage_class(iter->meta.storage_class);
+ s->formatter->dump_string("StorageClass", storage_class.c_str());
+ }
+ if (fetchOwner == true) {
+ dump_owner(s, s->user->user_id, s->user->display_name);
+ }
+ s->formatter->close_section();
+ }
+
+
+ if (objs_container) {
+ s->formatter->close_section();
+ }
+
+ if (!common_prefixes.empty()) {
+ map<string, bool>::iterator pref_iter;
+ for (pref_iter = common_prefixes.begin();
+ pref_iter != common_prefixes.end(); ++pref_iter) {
+ s->formatter->open_array_section("CommonPrefixes");
+ if (encode_key) {
+ s->formatter->dump_string("Prefix", url_encode(pref_iter->first, false));
+ } else {
+ s->formatter->dump_string("Prefix", pref_iter->first);
+ }
+
+ s->formatter->dump_int("KeyCount",objs.size());
+ if (start_after_exist) {
+ s->formatter->dump_string("StartAfter", startAfter);
+ }
+ s->formatter->close_section();
+ }
+ }
+
+ s->formatter->close_section();
+ rgw_flush_formatter_and_reset(s, s->formatter);
+ }
+}
+
+void RGWListBucket_ObjStore_S3v2::send_response()
+{
+ if (op_ret < 0) {
+ set_req_state_err(s, op_ret);
+ }
+ dump_errno(s);
+
+ // Explicitly use chunked transfer encoding so that we can stream the result
+ // to the user without having to wait for the full length of it.
+ end_header(s, this, "application/xml", CHUNKED_TRANSFER_ENCODING);
+ dump_start(s);
+ if (op_ret < 0) {
+ return;
+ }
+ if (list_versions) {
+ send_versioned_response();
+ return;
+ }
+
+ s->formatter->open_object_section_in_ns("ListBucketResult", XMLNS_AWS_S3);
+ if (strcasecmp(encoding_type.c_str(), "url") == 0) {
+ s->formatter->dump_string("EncodingType", "url");
+ encode_key = true;
+ }
+
+ RGWListBucket_ObjStore_S3::send_common_response();
+ if (op_ret >= 0) {
+ vector<rgw_bucket_dir_entry>::iterator iter;
+ for (iter = objs.begin(); iter != objs.end(); ++iter) {
+ rgw_obj_key key(iter->key);
+ s->formatter->open_array_section("Contents");
+ if (encode_key) {
+ string key_name;
+ url_encode(key.name, key_name);
+ s->formatter->dump_string("Key", key_name);
+ }
+ else {
+ s->formatter->dump_string("Key", key.name);
+ }
+ dump_time(s, "LastModified", &iter->meta.mtime);
+ s->formatter->dump_format("ETag", "\"%s\"", iter->meta.etag.c_str());
+ s->formatter->dump_int("Size", iter->meta.accounted_size);
+ auto& storage_class = rgw_placement_rule::get_canonical_storage_class(iter->meta.storage_class);
+ s->formatter->dump_string("StorageClass", storage_class.c_str());
+ if (fetchOwner == true) {
+ dump_owner(s, s->user->user_id, s->user->display_name);
+ }
+ if (s->system_request) {
+ s->formatter->dump_string("RgwxTag", iter->tag);
+ }
+ if (iter->meta.appendable) {
+ s->formatter->dump_string("Type", "Appendable");
+ } else {
+ s->formatter->dump_string("Type", "Normal");
+ }
+ s->formatter->close_section();
+ }
+ }
+ if (continuation_token_exist) {
+ s->formatter->dump_string("ContinuationToken", continuation_token);
+ }
+ if (is_truncated && !next_marker.empty()) {
+ s->formatter->dump_string("NextContinuationToken", next_marker.name);
+ }
+ s->formatter->dump_int("KeyCount", objs.size() + common_prefixes.size());
+ if (start_after_exist) {
+ s->formatter->dump_string("StartAfter", startAfter);
+ }
+ s->formatter->close_section();
+ rgw_flush_formatter_and_reset(s, s->formatter);
+}
+
+void RGWGetBucketLogging_ObjStore_S3::send_response()
+{
+ dump_errno(s);
+ end_header(s, this, "application/xml");
+ dump_start(s);
+
+ s->formatter->open_object_section_in_ns("BucketLoggingStatus", XMLNS_AWS_S3);
+ s->formatter->close_section();
+ rgw_flush_formatter_and_reset(s, s->formatter);
+}
+
+void RGWGetBucketLocation_ObjStore_S3::send_response()
+{
+ dump_errno(s);
+ end_header(s, this);
+ dump_start(s);
+
+ RGWZoneGroup zonegroup;
+ string api_name;
+
+ int ret = store->svc.zone->get_zonegroup(s->bucket_info.zonegroup, zonegroup);
+ if (ret >= 0) {
+ api_name = zonegroup.api_name;
+ } else {
+ if (s->bucket_info.zonegroup != "default") {
+ api_name = s->bucket_info.zonegroup;
+ }
+ }
+
+ s->formatter->dump_format_ns("LocationConstraint", XMLNS_AWS_S3,
+ "%s", api_name.c_str());
+ rgw_flush_formatter_and_reset(s, s->formatter);
+}
+
+void RGWGetBucketVersioning_ObjStore_S3::send_response()
+{
+ if (op_ret)
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+ end_header(s, this, "application/xml");
+ dump_start(s);
+
+ s->formatter->open_object_section_in_ns("VersioningConfiguration", XMLNS_AWS_S3);
+ if (versioned) {
+ const char *status = (versioning_enabled ? "Enabled" : "Suspended");
+ s->formatter->dump_string("Status", status);
+ const char *mfa_status = (mfa_enabled ? "Enabled" : "Disabled");
+ s->formatter->dump_string("MfaDelete", mfa_status);
+ }
+ s->formatter->close_section();
+ rgw_flush_formatter_and_reset(s, s->formatter);
+}
+
+struct ver_config_status {
+ int status{VersioningSuspended};
+
+ enum MFAStatus {
+ MFA_UNKNOWN,
+ MFA_DISABLED,
+ MFA_ENABLED,
+ } mfa_status{MFA_UNKNOWN};
+ int retcode{0};
+
+ void decode_xml(XMLObj *obj) {
+ string status_str;
+ string mfa_str;
+ RGWXMLDecoder::decode_xml("Status", status_str, obj);
+ if (status_str == "Enabled") {
+ status = VersioningEnabled;
+ } else if (status_str != "Suspended") {
+ status = VersioningStatusInvalid;
+ }
+
+
+ if (RGWXMLDecoder::decode_xml("MfaDelete", mfa_str, obj)) {
+ if (mfa_str == "Enabled") {
+ mfa_status = MFA_ENABLED;
+ } else if (mfa_str == "Disabled") {
+ mfa_status = MFA_DISABLED;
+ } else {
+ retcode = -EINVAL;
+ }
+ }
+ }
+};
+
+int RGWSetBucketVersioning_ObjStore_S3::get_params()
+{
+ int r = 0;
+ bufferlist data;
+ std::tie(r, data) =
+ rgw_rest_read_all_input(s, s->cct->_conf->rgw_max_put_param_size, false);
+ if (r < 0) {
+ return r;
+ }
+
+ r = do_aws4_auth_completion();
+ if (r < 0) {
+ return r;
+ }
+
+ RGWXMLDecoder::XMLParser parser;
+ if (!parser.init()) {
+ ldout(s->cct, 0) << "ERROR: failed to initialize parser" << dendl;
+ return -EIO;
+ }
+
+ char* buf = data.c_str();
+ if (!parser.parse(buf, data.length(), 1)) {
+ ldout(s->cct, 10) << "NOTICE: failed to parse data: " << buf << dendl;
+ r = -EINVAL;
+ return r;
+ }
+
+ ver_config_status status_conf;
+
+ if (!RGWXMLDecoder::decode_xml("VersioningConfiguration", status_conf, &parser)) {
+ ldout(s->cct, 10) << "NOTICE: bad versioning config input" << dendl;
+ return -EINVAL;
+ }
+
+ if (!store->svc.zone->is_meta_master()) {
+ /* only need to keep this data around if we're not meta master */
+ in_data.append(data);
+ }
+
+ versioning_status = status_conf.status;
+ if (versioning_status == VersioningStatusInvalid) {
+ r = -EINVAL;
+ }
+
+ if (status_conf.mfa_status != ver_config_status::MFA_UNKNOWN) {
+ mfa_set_status = true;
+ switch (status_conf.mfa_status) {
+ case ver_config_status::MFA_DISABLED:
+ mfa_status = false;
+ break;
+ case ver_config_status::MFA_ENABLED:
+ mfa_status = true;
+ break;
+ default:
+ ldout(s->cct, 0) << "ERROR: RGWSetBucketVersioning_ObjStore_S3::get_params(): unexpected switch case mfa_status=" << status_conf.mfa_status << dendl;
+ r = -EIO;
+ }
+ } else if (status_conf.retcode < 0) {
+ r = status_conf.retcode;
+ }
+ return r;
+}
+
+void RGWSetBucketVersioning_ObjStore_S3::send_response()
+{
+ if (op_ret)
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+ end_header(s, this, "application/xml");
+}
+
+int RGWSetBucketWebsite_ObjStore_S3::get_params()
+{
+ const auto max_size = s->cct->_conf->rgw_max_put_param_size;
+
+ int r = 0;
+ bufferlist data;
+ std::tie(r, data) = rgw_rest_read_all_input(s, max_size, false);
+
+ if (r < 0) {
+ return r;
+ }
+
+ r = do_aws4_auth_completion();
+ if (r < 0) {
+ return r;
+ }
+
+ in_data.append(data);
+
+ RGWXMLDecoder::XMLParser parser;
+ if (!parser.init()) {
+ ldout(s->cct, 0) << "ERROR: failed to initialize parser" << dendl;
+ return -EIO;
+ }
+
+ char* buf = data.c_str();
+ if (!parser.parse(buf, data.length(), 1)) {
+ ldout(s->cct, 5) << "failed to parse xml: " << buf << dendl;
+ return -EINVAL;
+ }
+
+ try {
+ RGWXMLDecoder::decode_xml("WebsiteConfiguration", website_conf, &parser, true);
+ } catch (RGWXMLDecoder::err& err) {
+ ldout(s->cct, 5) << "unexpected xml: " << buf << dendl;
+ return -EINVAL;
+ }
+
+ if (website_conf.is_redirect_all && website_conf.redirect_all.hostname.empty()) {
+ s->err.message = "A host name must be provided to redirect all requests (e.g. \"example.com\").";
+ ldout(s->cct, 5) << s->err.message << dendl;
+ return -EINVAL;
+ } else if (!website_conf.is_redirect_all && !website_conf.is_set_index_doc) {
+ s->err.message = "A value for IndexDocument Suffix must be provided if RedirectAllRequestsTo is empty";
+ ldout(s->cct, 5) << s->err.message << dendl;
+ return -EINVAL;
+ } else if (!website_conf.is_redirect_all && website_conf.is_set_index_doc &&
+ website_conf.index_doc_suffix.empty()) {
+ s->err.message = "The IndexDocument Suffix is not well formed";
+ ldout(s->cct, 5) << s->err.message << dendl;
+ return -EINVAL;
+ }
+
+#define WEBSITE_ROUTING_RULES_MAX_NUM 50
+ int max_num = s->cct->_conf->rgw_website_routing_rules_max_num;
+ if (max_num < 0) {
+ max_num = WEBSITE_ROUTING_RULES_MAX_NUM;
+ }
+ int routing_rules_num = website_conf.routing_rules.rules.size();
+ if (routing_rules_num > max_num) {
+ ldout(s->cct, 4) << "An website routing config can have up to "
+ << max_num
+ << " rules, request website routing rules num: "
+ << routing_rules_num << dendl;
+ op_ret = -ERR_INVALID_WEBSITE_ROUTING_RULES_ERROR;
+ s->err.message = std::to_string(routing_rules_num) +" routing rules provided, the number of routing rules in a website configuration is limited to "
+ + std::to_string(max_num)
+ + ".";
+ return -ERR_INVALID_REQUEST;
+ }
+
+ return 0;
+}
+
+void RGWSetBucketWebsite_ObjStore_S3::send_response()
+{
+ if (op_ret < 0)
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+ end_header(s, this, "application/xml");
+}
+
+void RGWDeleteBucketWebsite_ObjStore_S3::send_response()
+{
+ if (op_ret == 0) {
+ op_ret = STATUS_NO_CONTENT;
+ }
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+ end_header(s, this, "application/xml");
+}
+
+void RGWGetBucketWebsite_ObjStore_S3::send_response()
+{
+ if (op_ret)
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+ end_header(s, this, "application/xml");
+ dump_start(s);
+
+ if (op_ret < 0) {
+ return;
+ }
+
+ RGWBucketWebsiteConf& conf = s->bucket_info.website_conf;
+
+ s->formatter->open_object_section_in_ns("WebsiteConfiguration", XMLNS_AWS_S3);
+ conf.dump_xml(s->formatter);
+ s->formatter->close_section(); // WebsiteConfiguration
+ rgw_flush_formatter_and_reset(s, s->formatter);
+}
+
+static void dump_bucket_metadata(struct req_state *s, RGWBucketEnt& bucket)
+{
+ dump_header(s, "X-RGW-Object-Count", static_cast<long long>(bucket.count));
+ dump_header(s, "X-RGW-Bytes-Used", static_cast<long long>(bucket.size));
+}
+
+void RGWStatBucket_ObjStore_S3::send_response()
+{
+ if (op_ret >= 0) {
+ dump_bucket_metadata(s, bucket);
+ }
+
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+
+ end_header(s, this);
+ dump_start(s);
+}
+
+static int create_s3_policy(struct req_state *s, RGWRados *store,
+ RGWAccessControlPolicy_S3& s3policy,
+ ACLOwner& owner)
+{
+ if (s->has_acl_header) {
+ if (!s->canned_acl.empty())
+ return -ERR_INVALID_REQUEST;
+
+ return s3policy.create_from_headers(store, s->info.env, owner);
+ }
+
+ return s3policy.create_canned(owner, s->bucket_owner, s->canned_acl);
+}
+
+class RGWLocationConstraint : public XMLObj
+{
+public:
+ RGWLocationConstraint() {}
+ ~RGWLocationConstraint() override {}
+ bool xml_end(const char *el) override {
+ if (!el)
+ return false;
+
+ location_constraint = get_data();
+
+ return true;
+ }
+
+ string location_constraint;
+};
+
+class RGWCreateBucketConfig : public XMLObj
+{
+public:
+ RGWCreateBucketConfig() {}
+ ~RGWCreateBucketConfig() override {}
+};
+
+class RGWCreateBucketParser : public RGWXMLParser
+{
+ XMLObj *alloc_obj(const char *el) override {
+ return new XMLObj;
+ }
+
+public:
+ RGWCreateBucketParser() {}
+ ~RGWCreateBucketParser() override {}
+
+ bool get_location_constraint(string& zone_group) {
+ XMLObj *config = find_first("CreateBucketConfiguration");
+ if (!config)
+ return false;
+
+ XMLObj *constraint = config->find_first("LocationConstraint");
+ if (!constraint)
+ return false;
+
+ zone_group = constraint->get_data();
+
+ return true;
+ }
+};
+
+int RGWCreateBucket_ObjStore_S3::get_params()
+{
+ RGWAccessControlPolicy_S3 s3policy(s->cct);
+
+ int r = create_s3_policy(s, store, s3policy, s->owner);
+ if (r < 0)
+ return r;
+
+ policy = s3policy;
+
+ const auto max_size = s->cct->_conf->rgw_max_put_param_size;
+
+ int op_ret = 0;
+ bufferlist data;
+ std::tie(op_ret, data) = rgw_rest_read_all_input(s, max_size, false);
+
+ if ((op_ret < 0) && (op_ret != -ERR_LENGTH_REQUIRED))
+ return op_ret;
+
+ const int auth_ret = do_aws4_auth_completion();
+ if (auth_ret < 0) {
+ return auth_ret;
+ }
+
+ in_data.append(data);
+
+ if (data.length()) {
+ RGWCreateBucketParser parser;
+
+ if (!parser.init()) {
+ ldout(s->cct, 0) << "ERROR: failed to initialize parser" << dendl;
+ return -EIO;
+ }
+
+ char* buf = data.c_str();
+ bool success = parser.parse(buf, data.length(), 1);
+ ldout(s->cct, 20) << "create bucket input data=" << buf << dendl;
+
+ if (!success) {
+ ldout(s->cct, 0) << "failed to parse input: " << buf << dendl;
+ return -EINVAL;
+ }
+
+ if (!parser.get_location_constraint(location_constraint)) {
+ ldout(s->cct, 0) << "provided input did not specify location constraint correctly" << dendl;
+ return -EINVAL;
+ }
+
+ ldout(s->cct, 10) << "create bucket location constraint: "
+ << location_constraint << dendl;
+ }
+
+ size_t pos = location_constraint.find(':');
+ if (pos != string::npos) {
+ placement_rule.init(location_constraint.substr(pos + 1), s->info.storage_class);
+ location_constraint = location_constraint.substr(0, pos);
+ } else {
+ placement_rule.storage_class = s->info.storage_class;
+ }
+ auto iter = s->info.x_meta_map.find("x-amz-bucket-object-lock-enabled");
+ if (iter != s->info.x_meta_map.end()) {
+ if (!boost::algorithm::iequals(iter->second, "true") && !boost::algorithm::iequals(iter->second, "false")) {
+ return -EINVAL;
+ }
+ obj_lock_enabled = boost::algorithm::iequals(iter->second, "true");
+ }
+ return 0;
+}
+
+void RGWCreateBucket_ObjStore_S3::send_response()
+{
+ if (op_ret == -ERR_BUCKET_EXISTS)
+ op_ret = 0;
+ if (op_ret)
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+ end_header(s);
+
+ if (op_ret < 0)
+ return;
+
+ if (s->system_request) {
+ JSONFormatter f; /* use json formatter for system requests output */
+
+ f.open_object_section("info");
+ encode_json("entry_point_object_ver", ep_objv, &f);
+ encode_json("object_ver", info.objv_tracker.read_version, &f);
+ encode_json("bucket_info", info, &f);
+ f.close_section();
+ rgw_flush_formatter_and_reset(s, &f);
+ }
+}
+
+void RGWDeleteBucket_ObjStore_S3::send_response()
+{
+ int r = op_ret;
+ if (!r)
+ r = STATUS_NO_CONTENT;
+
+ set_req_state_err(s, r);
+ dump_errno(s);
+ end_header(s, this);
+}
+
+static inline void map_qs_metadata(struct req_state* s)
+{
+ /* merge S3 valid user metadata from the query-string into
+ * x_meta_map, which maps them to attributes */
+ const auto& params = const_cast<RGWHTTPArgs&>(s->info.args).get_params();
+ for (const auto& elt : params) {
+ std::string k = boost::algorithm::to_lower_copy(elt.first);
+ if (k.find("x-amz-meta-") == /* offset */ 0) {
+ add_amz_meta_header(s->info.x_meta_map, k, elt.second);
+ }
+ }
+}
+
+int RGWPutObj_ObjStore_S3::get_params()
+{
+ if (!s->length)
+ return -ERR_LENGTH_REQUIRED;
+
+ map<string, bufferlist> src_attrs;
+ size_t pos;
+ int ret;
+
+ map_qs_metadata(s);
+
+ RGWAccessControlPolicy_S3 s3policy(s->cct);
+ ret = create_s3_policy(s, store, s3policy, s->owner);
+ if (ret < 0)
+ return ret;
+
+ policy = s3policy;
+
+ if_match = s->info.env->get("HTTP_IF_MATCH");
+ if_nomatch = s->info.env->get("HTTP_IF_NONE_MATCH");
+ copy_source = url_decode(s->info.env->get("HTTP_X_AMZ_COPY_SOURCE", ""));
+ copy_source_range = s->info.env->get("HTTP_X_AMZ_COPY_SOURCE_RANGE");
+
+ /* handle x-amz-copy-source */
+ boost::string_view cs_view(copy_source);
+ if (! cs_view.empty()) {
+ if (cs_view[0] == '/')
+ cs_view.remove_prefix(1);
+ copy_source_bucket_name = cs_view.to_string();
+ pos = copy_source_bucket_name.find("/");
+ if (pos == std::string::npos) {
+ ret = -EINVAL;
+ ldout(s->cct, 5) << "x-amz-copy-source bad format" << dendl;
+ return ret;
+ }
+ copy_source_object_name =
+ copy_source_bucket_name.substr(pos + 1, copy_source_bucket_name.size());
+ copy_source_bucket_name = copy_source_bucket_name.substr(0, pos);
+#define VERSION_ID_STR "?versionId="
+ pos = copy_source_object_name.find(VERSION_ID_STR);
+ if (pos == std::string::npos) {
+ copy_source_object_name = url_decode(copy_source_object_name);
+ } else {
+ copy_source_version_id =
+ copy_source_object_name.substr(pos + sizeof(VERSION_ID_STR) - 1);
+ copy_source_object_name =
+ url_decode(copy_source_object_name.substr(0, pos));
+ }
+ pos = copy_source_bucket_name.find(":");
+ if (pos == std::string::npos) {
+ copy_source_tenant_name = s->src_tenant_name;
+ } else {
+ copy_source_tenant_name = copy_source_bucket_name.substr(0, pos);
+ copy_source_bucket_name = copy_source_bucket_name.substr(pos + 1, copy_source_bucket_name.size());
+ if (copy_source_bucket_name.empty()) {
+ ret = -EINVAL;
+ ldout(s->cct, 5) << "source bucket name is empty" << dendl;
+ return ret;
+ }
+ }
+ ret = store->get_bucket_info(*s->sysobj_ctx,
+ copy_source_tenant_name,
+ copy_source_bucket_name,
+ copy_source_bucket_info,
+ NULL, &src_attrs);
+ if (ret < 0) {
+ ldout(s->cct, 5) << __func__ << "(): get_bucket_info() returned ret=" << ret << dendl;
+ return ret;
+ }
+
+ /* handle x-amz-copy-source-range */
+
+ if (copy_source_range) {
+ string range = copy_source_range;
+ pos = range.find("bytes=");
+ if (pos == std::string::npos || pos != 0) {
+ ret = -EINVAL;
+ ldout(s->cct, 5) << "x-amz-copy-source-range bad format" << dendl;
+ return ret;
+ }
+ /* 6 is the length of "bytes=" */
+ range = range.substr(pos + 6);
+ pos = range.find("-");
+ if (pos == std::string::npos) {
+ ret = -EINVAL;
+ ldout(s->cct, 5) << "x-amz-copy-source-range bad format" << dendl;
+ return ret;
+ }
+ string first = range.substr(0, pos);
+ string last = range.substr(pos + 1);
+ if (first.find_first_not_of("0123456789") != std::string::npos || last.find_first_not_of("0123456789") != std::string::npos)
+ {
+ ldpp_dout(this, 5) << "x-amz-copy-source-range bad format not an integer" << dendl;
+ ret = -EINVAL;
+ return ret;
+ }
+ copy_source_range_fst = strtoull(first.c_str(), NULL, 10);
+ copy_source_range_lst = strtoull(last.c_str(), NULL, 10);
+ if (copy_source_range_fst > copy_source_range_lst)
+ {
+ ret = -ERANGE;
+ ldpp_dout(this, 5) << "x-amz-copy-source-range bad format first number bigger than second" << dendl;
+ return ret;
+ }
+ }
+
+ } /* copy_source */
+
+ /* handle object tagging */
+ auto tag_str = s->info.env->get("HTTP_X_AMZ_TAGGING");
+ if (tag_str){
+ obj_tags = std::make_unique<RGWObjTags>();
+ ret = obj_tags->set_from_string(tag_str);
+ if (ret < 0){
+ ldout(s->cct,0) << "setting obj tags failed with " << ret << dendl;
+ if (ret == -ERR_INVALID_TAG){
+ ret = -EINVAL; //s3 returns only -EINVAL for PUT requests
+ }
+
+ return ret;
+ }
+ }
+
+ //handle object lock
+ auto obj_lock_mode_str = s->info.env->get("HTTP_X_AMZ_OBJECT_LOCK_MODE");
+ auto obj_lock_date_str = s->info.env->get("HTTP_X_AMZ_OBJECT_LOCK_RETAIN_UNTIL_DATE");
+ auto obj_legal_hold_str = s->info.env->get("HTTP_X_AMZ_OBJECT_LOCK_LEGAL_HOLD");
+ if (obj_lock_mode_str && obj_lock_date_str) {
+ boost::optional<ceph::real_time> date = ceph::from_iso_8601(obj_lock_date_str);
+ if (boost::none == date || ceph::real_clock::to_time_t(*date) <= ceph_clock_now()) {
+ ret = -EINVAL;
+ ldpp_dout(this,0) << "invalid x-amz-object-lock-retain-until-date value" << dendl;
+ return ret;
+ }
+ if (strcmp(obj_lock_mode_str, "GOVERNANCE") != 0 && strcmp(obj_lock_mode_str, "COMPLIANCE") != 0) {
+ ret = -EINVAL;
+ ldpp_dout(this,0) << "invalid x-amz-object-lock-mode value" << dendl;
+ return ret;
+ }
+ obj_retention = new RGWObjectRetention(obj_lock_mode_str, *date);
+ } else if ((obj_lock_mode_str && !obj_lock_date_str) || (!obj_lock_mode_str && obj_lock_date_str)) {
+ ret = -EINVAL;
+ ldpp_dout(this,0) << "need both x-amz-object-lock-mode and x-amz-object-lock-retain-until-date " << dendl;
+ return ret;
+ }
+ if (obj_legal_hold_str) {
+ if (strcmp(obj_legal_hold_str, "ON") != 0 && strcmp(obj_legal_hold_str, "OFF") != 0) {
+ ret = -EINVAL;
+ ldpp_dout(this,0) << "invalid x-amz-object-lock-legal-hold value" << dendl;
+ return ret;
+ }
+ obj_legal_hold = new RGWObjectLegalHold(obj_legal_hold_str);
+ }
+ if (!s->bucket_info.obj_lock_enabled() && (obj_retention || obj_legal_hold)) {
+ ldpp_dout(this, 0) << "ERROR: object retention or legal hold can't be set if bucket object lock not configured" << dendl;
+ ret = -ERR_INVALID_REQUEST;
+ return ret;
+ }
+ multipart_upload_id = s->info.args.get("uploadId");
+ multipart_part_str = s->info.args.get("partNumber");
+ if (!multipart_part_str.empty()) {
+ string err;
+ multipart_part_num = strict_strtol(multipart_part_str.c_str(), 10, &err);
+ if (!err.empty()) {
+ ldpp_dout(s, 10) << "bad part number: " << multipart_part_str << ": " << err << dendl;
+ return -EINVAL;
+ }
+ } else if (!multipart_upload_id.empty()) {
+ ldpp_dout(s, 10) << "part number with no multipart upload id" << dendl;
+ return -EINVAL;
+ }
+
+ append = s->info.args.exists("append");
+ if (append) {
+ string pos_str = s->info.args.get("position");
+ if (pos_str.empty()) {
+ return -EINVAL;
+ } else {
+ position = strtoull(pos_str.c_str(), NULL, 10);
+ }
+ }
+
+ return RGWPutObj_ObjStore::get_params();
+}
+
+int RGWPutObj_ObjStore_S3::get_data(bufferlist& bl)
+{
+ const int ret = RGWPutObj_ObjStore::get_data(bl);
+ if (ret == 0) {
+ const int ret_auth = do_aws4_auth_completion();
+ if (ret_auth < 0) {
+ return ret_auth;
+ }
+ }
+
+ return ret;
+}
+
+static int get_success_retcode(int code)
+{
+ switch (code) {
+ case 201:
+ return STATUS_CREATED;
+ case 204:
+ return STATUS_NO_CONTENT;
+ }
+ return 0;
+}
+
+void RGWPutObj_ObjStore_S3::send_response()
+{
+ if (op_ret) {
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+ } else {
+ if (s->cct->_conf->rgw_s3_success_create_obj_status) {
+ op_ret = get_success_retcode(
+ s->cct->_conf->rgw_s3_success_create_obj_status);
+ set_req_state_err(s, op_ret);
+ }
+
+ string expires = get_s3_expiration_header(s, mtime);
+
+ if (copy_source.empty()) {
+ dump_errno(s);
+ dump_etag(s, etag);
+ dump_content_length(s, 0);
+ dump_header_if_nonempty(s, "x-amz-version-id", version_id);
+ dump_header_if_nonempty(s, "x-amz-expiration", expires);
+ for (auto &it : crypt_http_responses)
+ dump_header(s, it.first, it.second);
+ } else {
+ dump_errno(s);
+ dump_header_if_nonempty(s, "x-amz-version-id", version_id);
+ dump_header_if_nonempty(s, "x-amz-expiration", expires);
+ end_header(s, this, "application/xml");
+ dump_start(s);
+ struct tm tmp;
+ utime_t ut(mtime);
+ time_t secs = (time_t)ut.sec();
+ gmtime_r(&secs, &tmp);
+ char buf[TIME_BUF_SIZE];
+ s->formatter->open_object_section_in_ns("CopyPartResult",
+ "http://s3.amazonaws.com/doc/2006-03-01/");
+ if (strftime(buf, sizeof(buf), "%Y-%m-%dT%T.000Z", &tmp) > 0) {
+ s->formatter->dump_string("LastModified", buf);
+ }
+ s->formatter->dump_string("ETag", etag);
+ s->formatter->close_section();
+ rgw_flush_formatter_and_reset(s, s->formatter);
+ return;
+ }
+ }
+ if (append) {
+ if (op_ret == 0 || op_ret == -ERR_POSITION_NOT_EQUAL_TO_LENGTH) {
+ dump_header(s, "x-rgw-next-append-position", cur_accounted_size);
+ }
+ }
+ if (s->system_request && !real_clock::is_zero(mtime)) {
+ dump_epoch_header(s, "Rgwx-Mtime", mtime);
+ }
+ end_header(s, this);
+}
+
+static inline int get_obj_attrs(RGWRados *store, struct req_state *s, rgw_obj& obj, map<string, bufferlist>& attrs)
+{
+ RGWRados::Object op_target(store, s->bucket_info, *static_cast<RGWObjectCtx *>(s->obj_ctx), obj);
+ RGWRados::Object::Read read_op(&op_target);
+
+ read_op.params.attrs = &attrs;
+
+ return read_op.prepare();
+}
+
+static inline void set_attr(map<string, bufferlist>& attrs, const char* key, const std::string& value)
+{
+ bufferlist bl;
+ encode(value,bl);
+ attrs.emplace(key, std::move(bl));
+}
+
+static inline void set_attr(map<string, bufferlist>& attrs, const char* key, const char* value)
+{
+ bufferlist bl;
+ encode(value,bl);
+ attrs.emplace(key, std::move(bl));
+}
+
+int RGWPutObj_ObjStore_S3::get_decrypt_filter(
+ std::unique_ptr<RGWGetObj_Filter>* filter,
+ RGWGetObj_Filter* cb,
+ map<string, bufferlist>& attrs,
+ bufferlist* manifest_bl)
+{
+ std::map<std::string, std::string> crypt_http_responses_unused;
+
+ int res = 0;
+ std::unique_ptr<BlockCrypt> block_crypt;
+ res = rgw_s3_prepare_decrypt(s, attrs, &block_crypt, crypt_http_responses_unused);
+ if (res == 0) {
+ if (block_crypt != nullptr) {
+ auto f = std::unique_ptr<RGWGetObj_BlockDecrypt>(new RGWGetObj_BlockDecrypt(s->cct, cb, std::move(block_crypt)));
+ //RGWGetObj_BlockDecrypt* f = new RGWGetObj_BlockDecrypt(s->cct, cb, std::move(block_crypt));
+ if (f != nullptr) {
+ if (manifest_bl != nullptr) {
+ res = f->read_manifest(*manifest_bl);
+ if (res == 0) {
+ *filter = std::move(f);
+ }
+ }
+ }
+ }
+ }
+ return res;
+}
+
+int RGWPutObj_ObjStore_S3::get_encrypt_filter(
+ std::unique_ptr<rgw::putobj::DataProcessor> *filter,
+ rgw::putobj::DataProcessor *cb)
+{
+ int res = 0;
+ if (!multipart_upload_id.empty()) {
+ RGWMPObj mp(s->object.name, multipart_upload_id);
+ rgw_obj obj;
+ obj.init_ns(s->bucket, mp.get_meta(), RGW_OBJ_NS_MULTIPART);
+ obj.set_in_extra_data(true);
+ map<string, bufferlist> xattrs;
+ res = get_obj_attrs(store, s, obj, xattrs);
+ if (res == 0) {
+ std::unique_ptr<BlockCrypt> block_crypt;
+ /* We are adding to existing object.
+ * We use crypto mode that configured as if we were decrypting. */
+ res = rgw_s3_prepare_decrypt(s, xattrs, &block_crypt, crypt_http_responses);
+ if (res == 0 && block_crypt != nullptr)
+ filter->reset(new RGWPutObj_BlockEncrypt(s->cct, cb, std::move(block_crypt)));
+ }
+ /* it is ok, to not have encryption at all */
+ }
+ else
+ {
+ std::unique_ptr<BlockCrypt> block_crypt;
+ res = rgw_s3_prepare_encrypt(s, attrs, nullptr, &block_crypt, crypt_http_responses);
+ if (res == 0 && block_crypt != nullptr) {
+ filter->reset(new RGWPutObj_BlockEncrypt(s->cct, cb, std::move(block_crypt)));
+ }
+ }
+ return res;
+}
+
+void RGWPostObj_ObjStore_S3::rebuild_key(string& key)
+{
+ static string var = "${filename}";
+ int pos = key.find(var);
+ if (pos < 0)
+ return;
+
+ string new_key = key.substr(0, pos);
+ new_key.append(filename);
+ new_key.append(key.substr(pos + var.size()));
+
+ key = new_key;
+}
+
+std::string RGWPostObj_ObjStore_S3::get_current_filename() const
+{
+ return s->object.name;
+}
+
+std::string RGWPostObj_ObjStore_S3::get_current_content_type() const
+{
+ return content_type;
+}
+
+int RGWPostObj_ObjStore_S3::get_params()
+{
+ op_ret = RGWPostObj_ObjStore::get_params();
+ if (op_ret < 0) {
+ return op_ret;
+ }
+
+ map_qs_metadata(s);
+
+ ldout(s->cct, 20) << "adding bucket to policy env: " << s->bucket.name
+ << dendl;
+ env.add_var("bucket", s->bucket.name);
+
+ bool done;
+ do {
+ struct post_form_part part;
+ int r = read_form_part_header(&part, done);
+ if (r < 0)
+ return r;
+
+ if (s->cct->_conf->subsys.should_gather<ceph_subsys_rgw, 20>()) {
+ ldout(s->cct, 20) << "read part header -- part.name="
+ << part.name << dendl;
+
+ for (const auto& pair : part.fields) {
+ ldout(s->cct, 20) << "field.name=" << pair.first << dendl;
+ ldout(s->cct, 20) << "field.val=" << pair.second.val << dendl;
+ ldout(s->cct, 20) << "field.params:" << dendl;
+
+ for (const auto& param_pair : pair.second.params) {
+ ldout(s->cct, 20) << " " << param_pair.first
+ << " -> " << param_pair.second << dendl;
+ }
+ }
+ }
+
+ if (done) { /* unexpected here */
+ err_msg = "Malformed request";
+ return -EINVAL;
+ }
+
+ if (stringcasecmp(part.name, "file") == 0) { /* beginning of data transfer */
+ struct post_part_field& field = part.fields["Content-Disposition"];
+ map<string, string>::iterator iter = field.params.find("filename");
+ if (iter != field.params.end()) {
+ filename = iter->second;
+ }
+ parts[part.name] = part;
+ break;
+ }
+
+ bool boundary;
+ uint64_t chunk_size = s->cct->_conf->rgw_max_chunk_size;
+ r = read_data(part.data, chunk_size, boundary, done);
+ if (r < 0 || !boundary) {
+ err_msg = "Couldn't find boundary";
+ return -EINVAL;
+ }
+ parts[part.name] = part;
+ string part_str(part.data.c_str(), part.data.length());
+ env.add_var(part.name, part_str);
+ } while (!done);
+
+ string object_str;
+ if (!part_str(parts, "key", &object_str)) {
+ err_msg = "Key not specified";
+ return -EINVAL;
+ }
+
+ s->object = rgw_obj_key(object_str);
+
+ rebuild_key(s->object.name);
+
+ if (s->object.empty()) {
+ err_msg = "Empty object name";
+ return -EINVAL;
+ }
+
+ env.add_var("key", s->object.name);
+
+ part_str(parts, "Content-Type", &content_type);
+
+ /* AWS permits POST without Content-Type: http://tracker.ceph.com/issues/20201 */
+ if (! content_type.empty()) {
+ env.add_var("Content-Type", content_type);
+ }
+
+ map<string, struct post_form_part, ltstr_nocase>::iterator piter =
+ parts.upper_bound(RGW_AMZ_META_PREFIX);
+ for (; piter != parts.end(); ++piter) {
+ string n = piter->first;
+ if (strncasecmp(n.c_str(), RGW_AMZ_META_PREFIX,
+ sizeof(RGW_AMZ_META_PREFIX) - 1) != 0)
+ break;
+
+ string attr_name = RGW_ATTR_PREFIX;
+ attr_name.append(n);
+
+ /* need to null terminate it */
+ bufferlist& data = piter->second.data;
+ string str = string(data.c_str(), data.length());
+
+ bufferlist attr_bl;
+ attr_bl.append(str.c_str(), str.size() + 1);
+
+ attrs[attr_name] = attr_bl;
+ }
+ // TODO: refactor this and the above loop to share code
+ piter = parts.find(RGW_AMZ_WEBSITE_REDIRECT_LOCATION);
+ if (piter != parts.end()) {
+ string n = piter->first;
+ string attr_name = RGW_ATTR_PREFIX;
+ attr_name.append(n);
+ /* need to null terminate it */
+ bufferlist& data = piter->second.data;
+ string str = string(data.c_str(), data.length());
+
+ bufferlist attr_bl;
+ attr_bl.append(str.c_str(), str.size() + 1);
+
+ attrs[attr_name] = attr_bl;
+ }
+
+ int r = get_policy();
+ if (r < 0)
+ return r;
+
+ r = get_tags();
+ if (r < 0)
+ return r;
+
+
+ min_len = post_policy.min_length;
+ max_len = post_policy.max_length;
+
+
+
+ return 0;
+}
+
+int RGWPostObj_ObjStore_S3::get_tags()
+{
+ string tags_str;
+ if (part_str(parts, "tagging", &tags_str)) {
+ RGWXMLParser parser;
+ if (!parser.init()){
+ ldout(s->cct, 0) << "Couldn't init RGWObjTags XML parser" << dendl;
+ err_msg = "Server couldn't process the request";
+ return -EINVAL; // TODO: This class of errors in rgw code should be a 5XX error
+ }
+ if (!parser.parse(tags_str.c_str(), tags_str.size(), 1)) {
+ ldout(s->cct,0 ) << "Invalid Tagging XML" << dendl;
+ err_msg = "Invalid Tagging XML";
+ return -EINVAL;
+ }
+
+ RGWObjTagging_S3 tagging;
+
+ try {
+ RGWXMLDecoder::decode_xml("Tagging", tagging, &parser);
+ } catch (RGWXMLDecoder::err& err) {
+ ldout(s->cct, 5) << "Malformed tagging request: " << err << dendl;
+ return -EINVAL;
+ }
+
+ RGWObjTags obj_tags;
+ int r = tagging.rebuild(obj_tags);
+ if (r < 0)
+ return r;
+
+ bufferlist tags_bl;
+ obj_tags.encode(tags_bl);
+ ldout(s->cct, 20) << "Read " << obj_tags.count() << "tags" << dendl;
+ attrs[RGW_ATTR_TAGS] = tags_bl;
+ }
+
+
+ return 0;
+}
+
+int RGWPostObj_ObjStore_S3::get_policy()
+{
+ if (part_bl(parts, "policy", &s->auth.s3_postobj_creds.encoded_policy)) {
+ bool aws4_auth = false;
+
+ /* x-amz-algorithm handling */
+ using rgw::auth::s3::AWS4_HMAC_SHA256_STR;
+ if ((part_str(parts, "x-amz-algorithm", &s->auth.s3_postobj_creds.x_amz_algorithm)) &&
+ (s->auth.s3_postobj_creds.x_amz_algorithm == AWS4_HMAC_SHA256_STR)) {
+ ldout(s->cct, 0) << "Signature verification algorithm AWS v4 (AWS4-HMAC-SHA256)" << dendl;
+ aws4_auth = true;
+ } else {
+ ldout(s->cct, 0) << "Signature verification algorithm AWS v2" << dendl;
+ }
+
+ // check that the signature matches the encoded policy
+ if (aws4_auth) {
+ /* AWS4 */
+
+ /* x-amz-credential handling */
+ if (!part_str(parts, "x-amz-credential",
+ &s->auth.s3_postobj_creds.x_amz_credential)) {
+ ldout(s->cct, 0) << "No S3 aws4 credential found!" << dendl;
+ err_msg = "Missing aws4 credential";
+ return -EINVAL;
+ }
+
+ /* x-amz-signature handling */
+ if (!part_str(parts, "x-amz-signature",
+ &s->auth.s3_postobj_creds.signature)) {
+ ldout(s->cct, 0) << "No aws4 signature found!" << dendl;
+ err_msg = "Missing aws4 signature";
+ return -EINVAL;
+ }
+
+ /* x-amz-date handling */
+ std::string received_date_str;
+ if (!part_str(parts, "x-amz-date", &received_date_str)) {
+ ldout(s->cct, 0) << "No aws4 date found!" << dendl;
+ err_msg = "Missing aws4 date";
+ return -EINVAL;
+ }
+ } else {
+ /* AWS2 */
+
+ // check that the signature matches the encoded policy
+ if (!part_str(parts, "AWSAccessKeyId",
+ &s->auth.s3_postobj_creds.access_key)) {
+ ldout(s->cct, 0) << "No S3 aws2 access key found!" << dendl;
+ err_msg = "Missing aws2 access key";
+ return -EINVAL;
+ }
+
+ if (!part_str(parts, "signature", &s->auth.s3_postobj_creds.signature)) {
+ ldout(s->cct, 0) << "No aws2 signature found!" << dendl;
+ err_msg = "Missing aws2 signature";
+ return -EINVAL;
+ }
+ }
+
+ part_str(parts, "x-amz-security-token", &s->auth.s3_postobj_creds.x_amz_security_token);
+
+ /* FIXME: this is a makeshift solution. The browser upload authentication will be
+ * handled by an instance of rgw::auth::Completer spawned in Handler's authorize()
+ * method. */
+ const int ret = rgw::auth::Strategy::apply(this, auth_registry_ptr->get_s3_post(), s);
+ if (ret != 0) {
+ return -EACCES;
+ } else {
+ /* Populate the owner info. */
+ s->owner.set_id(s->user->user_id);
+ s->owner.set_name(s->user->display_name);
+ ldout(s->cct, 20) << "Successful Signature Verification!" << dendl;
+ }
+
+ ceph::bufferlist decoded_policy;
+ try {
+ decoded_policy.decode_base64(s->auth.s3_postobj_creds.encoded_policy);
+ } catch (buffer::error& err) {
+ ldout(s->cct, 0) << "failed to decode_base64 policy" << dendl;
+ err_msg = "Could not decode policy";
+ return -EINVAL;
+ }
+
+ decoded_policy.append('\0'); // NULL terminate
+ ldout(s->cct, 20) << "POST policy: " << decoded_policy.c_str() << dendl;
+
+
+ int r = post_policy.from_json(decoded_policy, err_msg);
+ if (r < 0) {
+ if (err_msg.empty()) {
+ err_msg = "Failed to parse policy";
+ }
+ ldout(s->cct, 0) << "failed to parse policy" << dendl;
+ return -EINVAL;
+ }
+
+ if (aws4_auth) {
+ /* AWS4 */
+ post_policy.set_var_checked("x-amz-signature");
+ } else {
+ /* AWS2 */
+ post_policy.set_var_checked("AWSAccessKeyId");
+ post_policy.set_var_checked("signature");
+ }
+ post_policy.set_var_checked("policy");
+
+ r = post_policy.check(&env, err_msg);
+ if (r < 0) {
+ if (err_msg.empty()) {
+ err_msg = "Policy check failed";
+ }
+ ldout(s->cct, 0) << "policy check failed" << dendl;
+ return r;
+ }
+
+ } else {
+ ldout(s->cct, 0) << "No attached policy found!" << dendl;
+ }
+
+ string canned_acl;
+ part_str(parts, "acl", &canned_acl);
+
+ RGWAccessControlPolicy_S3 s3policy(s->cct);
+ ldout(s->cct, 20) << "canned_acl=" << canned_acl << dendl;
+ if (s3policy.create_canned(s->owner, s->bucket_owner, canned_acl) < 0) {
+ err_msg = "Bad canned ACLs";
+ return -EINVAL;
+ }
+
+ policy = s3policy;
+
+ return 0;
+}
+
+int RGWPostObj_ObjStore_S3::complete_get_params()
+{
+ bool done;
+ do {
+ struct post_form_part part;
+ int r = read_form_part_header(&part, done);
+ if (r < 0) {
+ return r;
+ }
+
+ ceph::bufferlist part_data;
+ bool boundary;
+ uint64_t chunk_size = s->cct->_conf->rgw_max_chunk_size;
+ r = read_data(part.data, chunk_size, boundary, done);
+ if (r < 0 || !boundary) {
+ return -EINVAL;
+ }
+
+ /* Just reading the data but not storing any results of that. */
+ } while (!done);
+
+ return 0;
+}
+
+int RGWPostObj_ObjStore_S3::get_data(ceph::bufferlist& bl, bool& again)
+{
+ bool boundary;
+ bool done;
+
+ const uint64_t chunk_size = s->cct->_conf->rgw_max_chunk_size;
+ int r = read_data(bl, chunk_size, boundary, done);
+ if (r < 0) {
+ return r;
+ }
+
+ if (boundary) {
+ if (!done) {
+ /* Reached end of data, let's drain the rest of the params */
+ r = complete_get_params();
+ if (r < 0) {
+ return r;
+ }
+ }
+ }
+
+ again = !boundary;
+ return bl.length();
+}
+
+void RGWPostObj_ObjStore_S3::send_response()
+{
+ if (op_ret == 0 && parts.count("success_action_redirect")) {
+ string redirect;
+
+ part_str(parts, "success_action_redirect", &redirect);
+
+ string tenant;
+ string bucket;
+ string key;
+ string etag_str = "\"";
+
+ etag_str.append(etag);
+ etag_str.append("\"");
+
+ string etag_url;
+
+ url_encode(s->bucket_tenant, tenant); /* surely overkill, but cheap */
+ url_encode(s->bucket_name, bucket);
+ url_encode(s->object.name, key);
+ url_encode(etag_str, etag_url);
+
+ if (!s->bucket_tenant.empty()) {
+ /*
+ * What we really would like is to quaily the bucket name, so
+ * that the client could simply copy it and paste into next request.
+ * Unfortunately, in S3 we cannot know if the client will decide
+ * to come through DNS, with "bucket.tenant" sytanx, or through
+ * URL with "tenant\bucket" syntax. Therefore, we provide the
+ * tenant separately.
+ */
+ redirect.append("?tenant=");
+ redirect.append(tenant);
+ redirect.append("&bucket=");
+ redirect.append(bucket);
+ } else {
+ redirect.append("?bucket=");
+ redirect.append(bucket);
+ }
+ redirect.append("&key=");
+ redirect.append(key);
+ redirect.append("&etag=");
+ redirect.append(etag_url);
+
+ int r = check_utf8(redirect.c_str(), redirect.size());
+ if (r < 0) {
+ op_ret = r;
+ goto done;
+ }
+ dump_redirect(s, redirect);
+ op_ret = STATUS_REDIRECT;
+ } else if (op_ret == 0 && parts.count("success_action_status")) {
+ string status_string;
+ uint32_t status_int;
+
+ part_str(parts, "success_action_status", &status_string);
+
+ int r = stringtoul(status_string, &status_int);
+ if (r < 0) {
+ op_ret = r;
+ goto done;
+ }
+
+ switch (status_int) {
+ case 200:
+ break;
+ case 201:
+ op_ret = STATUS_CREATED;
+ break;
+ default:
+ op_ret = STATUS_NO_CONTENT;
+ break;
+ }
+ } else if (! op_ret) {
+ op_ret = STATUS_NO_CONTENT;
+ }
+
+done:
+ if (op_ret == STATUS_CREATED) {
+ for (auto &it : crypt_http_responses)
+ dump_header(s, it.first, it.second);
+ s->formatter->open_object_section("PostResponse");
+ std::string base_uri = compute_domain_uri(s);
+ if (!s->bucket_tenant.empty()){
+ s->formatter->dump_format("Location", "%s/%s:%s/%s",
+ base_uri.c_str(),
+ url_encode(s->bucket_tenant).c_str(),
+ url_encode(s->bucket_name).c_str(),
+ url_encode(s->object.name).c_str());
+ s->formatter->dump_string("Tenant", s->bucket_tenant);
+ } else {
+ s->formatter->dump_format("Location", "%s/%s/%s",
+ base_uri.c_str(),
+ url_encode(s->bucket_name).c_str(),
+ url_encode(s->object.name).c_str());
+ }
+ s->formatter->dump_string("Bucket", s->bucket_name);
+ s->formatter->dump_string("Key", s->object.name);
+ s->formatter->dump_string("ETag", etag);
+ s->formatter->close_section();
+ }
+ s->err.message = err_msg;
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+ if (op_ret >= 0) {
+ dump_content_length(s, s->formatter->get_len());
+ }
+ end_header(s, this);
+ if (op_ret != STATUS_CREATED)
+ return;
+
+ rgw_flush_formatter_and_reset(s, s->formatter);
+}
+
+int RGWPostObj_ObjStore_S3::get_encrypt_filter(
+ std::unique_ptr<rgw::putobj::DataProcessor> *filter,
+ rgw::putobj::DataProcessor *cb)
+{
+ std::unique_ptr<BlockCrypt> block_crypt;
+ int res = rgw_s3_prepare_encrypt(s, attrs, &parts, &block_crypt,
+ crypt_http_responses);
+ if (res == 0 && block_crypt != nullptr) {
+ filter->reset(new RGWPutObj_BlockEncrypt(s->cct, cb, std::move(block_crypt)));
+ }
+ return res;
+}
+
+int RGWDeleteObj_ObjStore_S3::get_params()
+{
+ const char *if_unmod = s->info.env->get("HTTP_X_AMZ_DELETE_IF_UNMODIFIED_SINCE");
+
+ if (s->system_request) {
+ s->info.args.get_bool(RGW_SYS_PARAM_PREFIX "no-precondition-error", &no_precondition_error, false);
+ }
+
+ if (if_unmod) {
+ std::string if_unmod_decoded = url_decode(if_unmod);
+ uint64_t epoch;
+ uint64_t nsec;
+ if (utime_t::parse_date(if_unmod_decoded, &epoch, &nsec) < 0) {
+ ldout(s->cct, 10) << "failed to parse time: " << if_unmod_decoded << dendl;
+ return -EINVAL;
+ }
+ unmod_since = utime_t(epoch, nsec).to_real_time();
+ }
+
+ const char *bypass_gov_header = s->info.env->get("HTTP_X_AMZ_BYPASS_GOVERNANCE_RETENTION");
+ if (bypass_gov_header) {
+ std::string bypass_gov_decoded = url_decode(bypass_gov_header);
+ bypass_governance_mode = boost::algorithm::iequals(bypass_gov_decoded, "true");
+ }
+
+ return 0;
+}
+
+void RGWDeleteObj_ObjStore_S3::send_response()
+{
+ int r = op_ret;
+ if (r == -ENOENT)
+ r = 0;
+ if (!r)
+ r = STATUS_NO_CONTENT;
+
+ set_req_state_err(s, r);
+ dump_errno(s);
+ dump_header_if_nonempty(s, "x-amz-version-id", version_id);
+ if (delete_marker) {
+ dump_header(s, "x-amz-delete-marker", "true");
+ }
+ end_header(s, this);
+}
+
+int RGWCopyObj_ObjStore_S3::init_dest_policy()
+{
+ RGWAccessControlPolicy_S3 s3policy(s->cct);
+
+ /* build a policy for the target object */
+ int r = create_s3_policy(s, store, s3policy, s->owner);
+ if (r < 0)
+ return r;
+
+ dest_policy = s3policy;
+
+ return 0;
+}
+
+int RGWCopyObj_ObjStore_S3::get_params()
+{
+ if_mod = s->info.env->get("HTTP_X_AMZ_COPY_IF_MODIFIED_SINCE");
+ if_unmod = s->info.env->get("HTTP_X_AMZ_COPY_IF_UNMODIFIED_SINCE");
+ if_match = s->info.env->get("HTTP_X_AMZ_COPY_IF_MATCH");
+ if_nomatch = s->info.env->get("HTTP_X_AMZ_COPY_IF_NONE_MATCH");
+
+ src_tenant_name = s->src_tenant_name;
+ src_bucket_name = s->src_bucket_name;
+ src_object = s->src_object;
+ dest_tenant_name = s->bucket.tenant;
+ dest_bucket_name = s->bucket.name;
+ dest_object = s->object.name;
+
+ if (s->system_request) {
+ source_zone = s->info.args.get(RGW_SYS_PARAM_PREFIX "source-zone");
+ s->info.args.get_bool(RGW_SYS_PARAM_PREFIX "copy-if-newer", &copy_if_newer, false);
+ }
+
+ copy_source = s->info.env->get("HTTP_X_AMZ_COPY_SOURCE");
+ auto tmp_md_d = s->info.env->get("HTTP_X_AMZ_METADATA_DIRECTIVE");
+ if (tmp_md_d) {
+ if (strcasecmp(tmp_md_d, "COPY") == 0) {
+ attrs_mod = RGWRados::ATTRSMOD_NONE;
+ } else if (strcasecmp(tmp_md_d, "REPLACE") == 0) {
+ attrs_mod = RGWRados::ATTRSMOD_REPLACE;
+ } else if (!source_zone.empty()) {
+ attrs_mod = RGWRados::ATTRSMOD_NONE; // default for intra-zone_group copy
+ } else {
+ s->err.message = "Unknown metadata directive.";
+ ldout(s->cct, 0) << s->err.message << dendl;
+ return -EINVAL;
+ }
+ md_directive = tmp_md_d;
+ }
+
+ if (source_zone.empty() &&
+ (dest_tenant_name.compare(src_tenant_name) == 0) &&
+ (dest_bucket_name.compare(src_bucket_name) == 0) &&
+ (dest_object.compare(src_object.name) == 0) &&
+ src_object.instance.empty() &&
+ (attrs_mod != RGWRados::ATTRSMOD_REPLACE)) {
+ need_to_check_storage_class = true;
+ }
+
+ return 0;
+}
+
+int RGWCopyObj_ObjStore_S3::check_storage_class(const rgw_placement_rule& src_placement)
+{
+ if (src_placement == s->dest_placement) {
+ /* can only copy object into itself if replacing attrs */
+ s->err.message = "This copy request is illegal because it is trying to copy "
+ "an object to itself without changing the object's metadata, "
+ "storage class, website redirect location or encryption attributes.";
+ ldout(s->cct, 0) << s->err.message << dendl;
+ return -ERR_INVALID_REQUEST;
+ }
+ return 0;
+}
+
+void RGWCopyObj_ObjStore_S3::send_partial_response(off_t ofs)
+{
+ if (! sent_header) {
+ if (op_ret)
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+
+ // Explicitly use chunked transfer encoding so that we can stream the result
+ // to the user without having to wait for the full length of it.
+ end_header(s, this, "application/xml", CHUNKED_TRANSFER_ENCODING);
+ dump_start(s);
+ if (op_ret == 0) {
+ s->formatter->open_object_section_in_ns("CopyObjectResult", XMLNS_AWS_S3);
+ }
+ sent_header = true;
+ } else {
+ /* Send progress field. Note that this diverge from the original S3
+ * spec. We do this in order to keep connection alive.
+ */
+ s->formatter->dump_int("Progress", (uint64_t)ofs);
+ }
+ rgw_flush_formatter(s, s->formatter);
+}
+
+void RGWCopyObj_ObjStore_S3::send_response()
+{
+ if (!sent_header)
+ send_partial_response(0);
+
+ if (op_ret == 0) {
+ dump_time(s, "LastModified", &mtime);
+ if (!etag.empty()) {
+ s->formatter->dump_string("ETag", std::move(etag));
+ }
+ s->formatter->close_section();
+ rgw_flush_formatter_and_reset(s, s->formatter);
+ }
+}
+
+void RGWGetACLs_ObjStore_S3::send_response()
+{
+ if (op_ret)
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+ end_header(s, this, "application/xml");
+ dump_start(s);
+ rgw_flush_formatter(s, s->formatter);
+ dump_body(s, acls);
+}
+
+int RGWPutACLs_ObjStore_S3::get_params()
+{
+ int ret = RGWPutACLs_ObjStore::get_params();
+ if (ret >= 0) {
+ const int ret_auth = do_aws4_auth_completion();
+ if (ret_auth < 0) {
+ return ret_auth;
+ }
+ }
+ return ret;
+}
+
+int RGWPutACLs_ObjStore_S3::get_policy_from_state(RGWRados *store,
+ struct req_state *s,
+ stringstream& ss)
+{
+ RGWAccessControlPolicy_S3 s3policy(s->cct);
+
+ // bucket-* canned acls do not apply to bucket
+ if (s->object.empty()) {
+ if (s->canned_acl.find("bucket") != string::npos)
+ s->canned_acl.clear();
+ }
+
+ int r = create_s3_policy(s, store, s3policy, owner);
+ if (r < 0)
+ return r;
+
+ s3policy.to_xml(ss);
+
+ return 0;
+}
+
+void RGWPutACLs_ObjStore_S3::send_response()
+{
+ if (op_ret)
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+ end_header(s, this, "application/xml");
+ dump_start(s);
+}
+
+void RGWGetLC_ObjStore_S3::execute()
+{
+ config.set_ctx(s->cct);
+
+ map<string, bufferlist>::iterator aiter = s->bucket_attrs.find(RGW_ATTR_LC);
+ if (aiter == s->bucket_attrs.end()) {
+ op_ret = -ENOENT;
+ return;
+ }
+
+ bufferlist::const_iterator iter{&aiter->second};
+ try {
+ config.decode(iter);
+ } catch (const buffer::error& e) {
+ ldout(s->cct, 0) << __func__ << "decode life cycle config failed" << dendl;
+ op_ret = -EIO;
+ return;
+ }
+}
+
+void RGWGetLC_ObjStore_S3::send_response()
+{
+ if (op_ret) {
+ if (op_ret == -ENOENT) {
+ set_req_state_err(s, ERR_NO_SUCH_LC);
+ } else {
+ set_req_state_err(s, op_ret);
+ }
+ }
+ dump_errno(s);
+ end_header(s, this, "application/xml");
+ dump_start(s);
+
+ if (op_ret < 0)
+ return;
+
+ encode_xml("LifecycleConfiguration", XMLNS_AWS_S3, config, s->formatter);
+ rgw_flush_formatter_and_reset(s, s->formatter);
+}
+
+void RGWPutLC_ObjStore_S3::send_response()
+{
+ if (op_ret)
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+ end_header(s, this, "application/xml");
+ dump_start(s);
+}
+
+void RGWDeleteLC_ObjStore_S3::send_response()
+{
+ if (op_ret == 0)
+ op_ret = STATUS_NO_CONTENT;
+ if (op_ret) {
+ set_req_state_err(s, op_ret);
+ }
+ dump_errno(s);
+ end_header(s, this, "application/xml");
+ dump_start(s);
+}
+
+void RGWGetCORS_ObjStore_S3::send_response()
+{
+ if (op_ret) {
+ if (op_ret == -ENOENT)
+ set_req_state_err(s, ERR_NO_SUCH_CORS_CONFIGURATION);
+ else
+ set_req_state_err(s, op_ret);
+ }
+ dump_errno(s);
+ end_header(s, NULL, "application/xml");
+ dump_start(s);
+ if (! op_ret) {
+ string cors;
+ RGWCORSConfiguration_S3 *s3cors =
+ static_cast<RGWCORSConfiguration_S3 *>(&bucket_cors);
+ stringstream ss;
+
+ s3cors->to_xml(ss);
+ cors = ss.str();
+ dump_body(s, cors);
+ }
+}
+
+int RGWPutCORS_ObjStore_S3::get_params()
+{
+ RGWCORSXMLParser_S3 parser(s->cct);
+ RGWCORSConfiguration_S3 *cors_config;
+
+ const auto max_size = s->cct->_conf->rgw_max_put_param_size;
+
+ int r = 0;
+ bufferlist data;
+ std::tie(r, data) = rgw_rest_read_all_input(s, max_size, false);
+ if (r < 0) {
+ return r;
+ }
+
+ r = do_aws4_auth_completion();
+ if (r < 0) {
+ return r;
+ }
+
+ if (!parser.init()) {
+ return -EINVAL;
+ }
+
+ char* buf = data.c_str();
+ if (!buf || !parser.parse(buf, data.length(), 1)) {
+ return -ERR_MALFORMED_XML;
+ }
+ cors_config =
+ static_cast<RGWCORSConfiguration_S3 *>(parser.find_first(
+ "CORSConfiguration"));
+ if (!cors_config) {
+ return -ERR_MALFORMED_XML;
+ }
+
+#define CORS_RULES_MAX_NUM 100
+ int max_num = s->cct->_conf->rgw_cors_rules_max_num;
+ if (max_num < 0) {
+ max_num = CORS_RULES_MAX_NUM;
+ }
+ int cors_rules_num = cors_config->get_rules().size();
+ if (cors_rules_num > max_num) {
+ ldout(s->cct, 4) << "An cors config can have up to "
+ << max_num
+ << " rules, request cors rules num: "
+ << cors_rules_num << dendl;
+ op_ret = -ERR_INVALID_CORS_RULES_ERROR;
+ s->err.message = "The number of CORS rules should not exceed allowed limit of "
+ + std::to_string(max_num) + " rules.";
+ return -ERR_INVALID_REQUEST;
+ }
+
+ // forward bucket cors requests to meta master zone
+ if (!store->svc.zone->is_meta_master()) {
+ /* only need to keep this data around if we're not meta master */
+ in_data.append(data);
+ }
+
+ if (s->cct->_conf->subsys.should_gather<ceph_subsys_rgw, 15>()) {
+ ldout(s->cct, 15) << "CORSConfiguration";
+ cors_config->to_xml(*_dout);
+ *_dout << dendl;
+ }
+
+ cors_config->encode(cors_bl);
+
+ return 0;
+}
+
+void RGWPutCORS_ObjStore_S3::send_response()
+{
+ if (op_ret)
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+ end_header(s, NULL, "application/xml");
+ dump_start(s);
+}
+
+void RGWDeleteCORS_ObjStore_S3::send_response()
+{
+ int r = op_ret;
+ if (!r || r == -ENOENT)
+ r = STATUS_NO_CONTENT;
+
+ set_req_state_err(s, r);
+ dump_errno(s);
+ end_header(s, NULL);
+}
+
+void RGWOptionsCORS_ObjStore_S3::send_response()
+{
+ string hdrs, exp_hdrs;
+ uint32_t max_age = CORS_MAX_AGE_INVALID;
+ /*EACCES means, there is no CORS registered yet for the bucket
+ *ENOENT means, there is no match of the Origin in the list of CORSRule
+ */
+ if (op_ret == -ENOENT)
+ op_ret = -EACCES;
+ if (op_ret < 0) {
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+ end_header(s, NULL);
+ return;
+ }
+ get_response_params(hdrs, exp_hdrs, &max_age);
+
+ dump_errno(s);
+ dump_access_control(s, origin, req_meth, hdrs.c_str(), exp_hdrs.c_str(),
+ max_age);
+ end_header(s, NULL);
+}
+
+void RGWGetRequestPayment_ObjStore_S3::send_response()
+{
+ dump_errno(s);
+ end_header(s, this, "application/xml");
+ dump_start(s);
+
+ s->formatter->open_object_section_in_ns("RequestPaymentConfiguration", XMLNS_AWS_S3);
+ const char *payer = requester_pays ? "Requester" : "BucketOwner";
+ s->formatter->dump_string("Payer", payer);
+ s->formatter->close_section();
+ rgw_flush_formatter_and_reset(s, s->formatter);
+}
+
+class RGWSetRequestPaymentParser : public RGWXMLParser
+{
+ XMLObj *alloc_obj(const char *el) override {
+ return new XMLObj;
+ }
+
+public:
+ RGWSetRequestPaymentParser() {}
+ ~RGWSetRequestPaymentParser() override {}
+
+ int get_request_payment_payer(bool *requester_pays) {
+ XMLObj *config = find_first("RequestPaymentConfiguration");
+ if (!config)
+ return -EINVAL;
+
+ *requester_pays = false;
+
+ XMLObj *field = config->find_first("Payer");
+ if (!field)
+ return 0;
+
+ auto& s = field->get_data();
+
+ if (stringcasecmp(s, "Requester") == 0) {
+ *requester_pays = true;
+ } else if (stringcasecmp(s, "BucketOwner") != 0) {
+ return -EINVAL;
+ }
+
+ return 0;
+ }
+};
+
+int RGWSetRequestPayment_ObjStore_S3::get_params()
+{
+ const auto max_size = s->cct->_conf->rgw_max_put_param_size;
+
+ int r = 0;
+ std::tie(r, in_data) = rgw_rest_read_all_input(s, max_size, false);
+
+ if (r < 0) {
+ return r;
+ }
+
+
+ RGWSetRequestPaymentParser parser;
+
+ if (!parser.init()) {
+ ldout(s->cct, 0) << "ERROR: failed to initialize parser" << dendl;
+ return -EIO;
+ }
+
+ char* buf = in_data.c_str();
+ if (!parser.parse(buf, in_data.length(), 1)) {
+ ldout(s->cct, 10) << "failed to parse data: " << buf << dendl;
+ return -EINVAL;
+ }
+
+ return parser.get_request_payment_payer(&requester_pays);
+}
+
+void RGWSetRequestPayment_ObjStore_S3::send_response()
+{
+ if (op_ret)
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+ end_header(s);
+}
+
+int RGWInitMultipart_ObjStore_S3::get_params()
+{
+ RGWAccessControlPolicy_S3 s3policy(s->cct);
+ op_ret = create_s3_policy(s, store, s3policy, s->owner);
+ if (op_ret < 0)
+ return op_ret;
+
+ policy = s3policy;
+
+ return 0;
+}
+
+void RGWInitMultipart_ObjStore_S3::send_response()
+{
+ if (op_ret)
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+ for (auto &it : crypt_http_responses)
+ dump_header(s, it.first, it.second);
+ end_header(s, this, "application/xml");
+ if (op_ret == 0) {
+ dump_start(s);
+ s->formatter->open_object_section_in_ns("InitiateMultipartUploadResult", XMLNS_AWS_S3);
+ if (!s->bucket_tenant.empty())
+ s->formatter->dump_string("Tenant", s->bucket_tenant);
+ s->formatter->dump_string("Bucket", s->bucket_name);
+ s->formatter->dump_string("Key", s->object.name);
+ s->formatter->dump_string("UploadId", upload_id);
+ s->formatter->close_section();
+ rgw_flush_formatter_and_reset(s, s->formatter);
+ }
+}
+
+int RGWInitMultipart_ObjStore_S3::prepare_encryption(map<string, bufferlist>& attrs)
+{
+ int res = 0;
+ res = rgw_s3_prepare_encrypt(s, attrs, nullptr, nullptr, crypt_http_responses);
+ return res;
+}
+
+int RGWCompleteMultipart_ObjStore_S3::get_params()
+{
+ int ret = RGWCompleteMultipart_ObjStore::get_params();
+ if (ret < 0) {
+ return ret;
+ }
+
+ map_qs_metadata(s);
+
+ return do_aws4_auth_completion();
+}
+
+void RGWCompleteMultipart_ObjStore_S3::send_response()
+{
+ if (op_ret)
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+ dump_header_if_nonempty(s, "x-amz-version-id", version_id);
+ end_header(s, this, "application/xml");
+ if (op_ret == 0) {
+ dump_start(s);
+ s->formatter->open_object_section_in_ns("CompleteMultipartUploadResult", XMLNS_AWS_S3);
+ std::string base_uri = compute_domain_uri(s);
+ if (!s->bucket_tenant.empty()) {
+ s->formatter->dump_format("Location", "%s/%s:%s/%s",
+ base_uri.c_str(),
+ s->bucket_tenant.c_str(),
+ s->bucket_name.c_str(),
+ s->object.name.c_str()
+ );
+ s->formatter->dump_string("Tenant", s->bucket_tenant);
+ } else {
+ s->formatter->dump_format("Location", "%s/%s/%s",
+ base_uri.c_str(),
+ s->bucket_name.c_str(),
+ s->object.name.c_str()
+ );
+ }
+ s->formatter->dump_string("Bucket", s->bucket_name);
+ s->formatter->dump_string("Key", s->object.name);
+ s->formatter->dump_string("ETag", etag);
+ s->formatter->close_section();
+ rgw_flush_formatter_and_reset(s, s->formatter);
+ }
+}
+
+void RGWAbortMultipart_ObjStore_S3::send_response()
+{
+ int r = op_ret;
+ if (!r)
+ r = STATUS_NO_CONTENT;
+
+ set_req_state_err(s, r);
+ dump_errno(s);
+ end_header(s, this);
+}
+
+void RGWListMultipart_ObjStore_S3::send_response()
+{
+ if (op_ret)
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+ // Explicitly use chunked transfer encoding so that we can stream the result
+ // to the user without having to wait for the full length of it.
+ end_header(s, this, "application/xml", CHUNKED_TRANSFER_ENCODING);
+
+ if (op_ret == 0) {
+ dump_start(s);
+ s->formatter->open_object_section_in_ns("ListPartsResult", XMLNS_AWS_S3);
+ map<uint32_t, RGWUploadPartInfo>::iterator iter;
+ map<uint32_t, RGWUploadPartInfo>::reverse_iterator test_iter;
+ int cur_max = 0;
+
+ iter = parts.begin();
+ test_iter = parts.rbegin();
+ if (test_iter != parts.rend()) {
+ cur_max = test_iter->first;
+ }
+ if (!s->bucket_tenant.empty())
+ s->formatter->dump_string("Tenant", s->bucket_tenant);
+ s->formatter->dump_string("Bucket", s->bucket_name);
+ s->formatter->dump_string("Key", s->object.name);
+ s->formatter->dump_string("UploadId", upload_id);
+ s->formatter->dump_string("StorageClass", "STANDARD");
+ s->formatter->dump_int("PartNumberMarker", marker);
+ s->formatter->dump_int("NextPartNumberMarker", cur_max);
+ s->formatter->dump_int("MaxParts", max_parts);
+ s->formatter->dump_string("IsTruncated", (truncated ? "true" : "false"));
+
+ ACLOwner& owner = policy.get_owner();
+ dump_owner(s, owner.get_id(), owner.get_display_name());
+
+ for (; iter != parts.end(); ++iter) {
+ RGWUploadPartInfo& info = iter->second;
+
+ s->formatter->open_object_section("Part");
+
+ dump_time(s, "LastModified", &info.modified);
+
+ s->formatter->dump_unsigned("PartNumber", info.num);
+ s->formatter->dump_format("ETag", "\"%s\"", info.etag.c_str());
+ s->formatter->dump_unsigned("Size", info.accounted_size);
+ s->formatter->close_section();
+ }
+ s->formatter->close_section();
+ rgw_flush_formatter_and_reset(s, s->formatter);
+ }
+}
+
+void RGWListBucketMultiparts_ObjStore_S3::send_response()
+{
+ if (op_ret < 0)
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+
+ // Explicitly use chunked transfer encoding so that we can stream the result
+ // to the user without having to wait for the full length of it.
+ end_header(s, this, "application/xml", CHUNKED_TRANSFER_ENCODING);
+ dump_start(s);
+ if (op_ret < 0)
+ return;
+
+ s->formatter->open_object_section_in_ns("ListMultipartUploadsResult", XMLNS_AWS_S3);
+ if (!s->bucket_tenant.empty())
+ s->formatter->dump_string("Tenant", s->bucket_tenant);
+ s->formatter->dump_string("Bucket", s->bucket_name);
+ if (!prefix.empty())
+ s->formatter->dump_string("ListMultipartUploadsResult.Prefix", prefix);
+ const string& key_marker = marker.get_key();
+ if (!key_marker.empty())
+ s->formatter->dump_string("KeyMarker", key_marker);
+ const string& upload_id_marker = marker.get_upload_id();
+ if (!upload_id_marker.empty())
+ s->formatter->dump_string("UploadIdMarker", upload_id_marker);
+ string next_key = next_marker.mp.get_key();
+ if (!next_key.empty())
+ s->formatter->dump_string("NextKeyMarker", next_key);
+ string next_upload_id = next_marker.mp.get_upload_id();
+ if (!next_upload_id.empty())
+ s->formatter->dump_string("NextUploadIdMarker", next_upload_id);
+ s->formatter->dump_int("MaxUploads", max_uploads);
+ if (!delimiter.empty())
+ s->formatter->dump_string("Delimiter", delimiter);
+ s->formatter->dump_string("IsTruncated", (is_truncated ? "true" : "false"));
+
+ if (op_ret >= 0) {
+ vector<RGWMultipartUploadEntry>::iterator iter;
+ for (iter = uploads.begin(); iter != uploads.end(); ++iter) {
+ RGWMPObj& mp = iter->mp;
+ s->formatter->open_array_section("Upload");
+ s->formatter->dump_string("Key", mp.get_key());
+ s->formatter->dump_string("UploadId", mp.get_upload_id());
+ dump_owner(s, s->user->user_id, s->user->display_name, "Initiator");
+ dump_owner(s, s->user->user_id, s->user->display_name);
+ s->formatter->dump_string("StorageClass", "STANDARD");
+ dump_time(s, "Initiated", &iter->obj.meta.mtime);
+ s->formatter->close_section();
+ }
+ if (!common_prefixes.empty()) {
+ s->formatter->open_array_section("CommonPrefixes");
+ map<string, bool>::iterator pref_iter;
+ for (pref_iter = common_prefixes.begin();
+ pref_iter != common_prefixes.end(); ++pref_iter) {
+ s->formatter->dump_string("CommonPrefixes.Prefix", pref_iter->first);
+ }
+ s->formatter->close_section();
+ }
+ }
+ s->formatter->close_section();
+ rgw_flush_formatter_and_reset(s, s->formatter);
+}
+
+int RGWDeleteMultiObj_ObjStore_S3::get_params()
+{
+ int ret = RGWDeleteMultiObj_ObjStore::get_params();
+ if (ret < 0) {
+ return ret;
+ }
+
+ return do_aws4_auth_completion();
+}
+
+void RGWDeleteMultiObj_ObjStore_S3::send_status()
+{
+ if (! status_dumped) {
+ if (op_ret < 0)
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+ status_dumped = true;
+ }
+}
+
+void RGWDeleteMultiObj_ObjStore_S3::begin_response()
+{
+
+ if (!status_dumped) {
+ send_status();
+ }
+
+ dump_start(s);
+ // Explicitly use chunked transfer encoding so that we can stream the result
+ // to the user without having to wait for the full length of it.
+ end_header(s, this, "application/xml", CHUNKED_TRANSFER_ENCODING);
+ s->formatter->open_object_section_in_ns("DeleteResult", XMLNS_AWS_S3);
+
+ rgw_flush_formatter(s, s->formatter);
+}
+
+void RGWDeleteMultiObj_ObjStore_S3::send_partial_response(rgw_obj_key& key,
+ bool delete_marker,
+ const string& marker_version_id, int ret)
+{
+ if (!key.empty()) {
+ if (ret == 0 && !quiet) {
+ s->formatter->open_object_section("Deleted");
+ s->formatter->dump_string("Key", key.name);
+ if (!key.instance.empty()) {
+ s->formatter->dump_string("VersionId", key.instance);
+ }
+ if (delete_marker) {
+ s->formatter->dump_bool("DeleteMarker", true);
+ s->formatter->dump_string("DeleteMarkerVersionId", marker_version_id);
+ }
+ s->formatter->close_section();
+ } else if (ret < 0) {
+ struct rgw_http_error r;
+ int err_no;
+
+ s->formatter->open_object_section("Error");
+
+ err_no = -ret;
+ rgw_get_errno_s3(&r, err_no);
+
+ s->formatter->dump_string("Key", key.name);
+ s->formatter->dump_string("VersionId", key.instance);
+ s->formatter->dump_string("Code", r.s3_code);
+ s->formatter->dump_string("Message", r.s3_code);
+ s->formatter->close_section();
+ }
+
+ rgw_flush_formatter(s, s->formatter);
+ }
+}
+
+void RGWDeleteMultiObj_ObjStore_S3::end_response()
+{
+
+ s->formatter->close_section();
+ rgw_flush_formatter_and_reset(s, s->formatter);
+}
+
+void RGWGetObjLayout_ObjStore_S3::send_response()
+{
+ if (op_ret)
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+ end_header(s, this, "application/json");
+
+ JSONFormatter f;
+
+ if (op_ret < 0) {
+ return;
+ }
+
+ f.open_object_section("result");
+ ::encode_json("head", head_obj, &f);
+ ::encode_json("manifest", *manifest, &f);
+ f.open_array_section("data_location");
+ for (auto miter = manifest->obj_begin(); miter != manifest->obj_end(); ++miter) {
+ f.open_object_section("obj");
+ rgw_raw_obj raw_loc = miter.get_location().get_raw_obj(store);
+ uint64_t ofs = miter.get_ofs();
+ uint64_t left = manifest->get_obj_size() - ofs;
+ ::encode_json("ofs", miter.get_ofs(), &f);
+ ::encode_json("loc", raw_loc, &f);
+ ::encode_json("loc_ofs", miter.location_ofs(), &f);
+ uint64_t loc_size = miter.get_stripe_size();
+ if (loc_size > left) {
+ loc_size = left;
+ }
+ ::encode_json("loc_size", loc_size, &f);
+ f.close_section();
+ rgw_flush_formatter(s, &f);
+ }
+ f.close_section();
+ f.close_section();
+ rgw_flush_formatter(s, &f);
+}
+
+int RGWConfigBucketMetaSearch_ObjStore_S3::get_params()
+{
+ auto iter = s->info.x_meta_map.find("x-amz-meta-search");
+ if (iter == s->info.x_meta_map.end()) {
+ s->err.message = "X-Rgw-Meta-Search header not provided";
+ ldout(s->cct, 5) << s->err.message << dendl;
+ return -EINVAL;
+ }
+
+ list<string> expressions;
+ get_str_list(iter->second, ",", expressions);
+
+ for (auto& expression : expressions) {
+ vector<string> args;
+ get_str_vec(expression, ";", args);
+
+ if (args.empty()) {
+ s->err.message = "invalid empty expression";
+ ldout(s->cct, 5) << s->err.message << dendl;
+ return -EINVAL;
+ }
+ if (args.size() > 2) {
+ s->err.message = string("invalid expression: ") + expression;
+ ldout(s->cct, 5) << s->err.message << dendl;
+ return -EINVAL;
+ }
+
+ string key = boost::algorithm::to_lower_copy(rgw_trim_whitespace(args[0]));
+ string val;
+ if (args.size() > 1) {
+ val = boost::algorithm::to_lower_copy(rgw_trim_whitespace(args[1]));
+ }
+
+ if (!boost::algorithm::starts_with(key, RGW_AMZ_META_PREFIX)) {
+ s->err.message = string("invalid expression, key must start with '" RGW_AMZ_META_PREFIX "' : ") + expression;
+ ldout(s->cct, 5) << s->err.message << dendl;
+ return -EINVAL;
+ }
+
+ key = key.substr(sizeof(RGW_AMZ_META_PREFIX) - 1);
+
+ ESEntityTypeMap::EntityType entity_type;
+
+ if (val.empty() || val == "str" || val == "string") {
+ entity_type = ESEntityTypeMap::ES_ENTITY_STR;
+ } else if (val == "int" || val == "integer") {
+ entity_type = ESEntityTypeMap::ES_ENTITY_INT;
+ } else if (val == "date" || val == "datetime") {
+ entity_type = ESEntityTypeMap::ES_ENTITY_DATE;
+ } else {
+ s->err.message = string("invalid entity type: ") + val;
+ ldout(s->cct, 5) << s->err.message << dendl;
+ return -EINVAL;
+ }
+
+ mdsearch_config[key] = entity_type;
+ }
+
+ return 0;
+}
+
+void RGWConfigBucketMetaSearch_ObjStore_S3::send_response()
+{
+ if (op_ret)
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+ end_header(s, this);
+}
+
+void RGWGetBucketMetaSearch_ObjStore_S3::send_response()
+{
+ if (op_ret)
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+ end_header(s, NULL, "application/xml");
+
+ Formatter *f = s->formatter;
+ f->open_array_section("GetBucketMetaSearchResult");
+ for (auto& e : s->bucket_info.mdsearch_config) {
+ f->open_object_section("Entry");
+ string k = string("x-amz-meta-") + e.first;
+ f->dump_string("Key", k.c_str());
+ const char *type;
+ switch (e.second) {
+ case ESEntityTypeMap::ES_ENTITY_INT:
+ type = "int";
+ break;
+ case ESEntityTypeMap::ES_ENTITY_DATE:
+ type = "date";
+ break;
+ default:
+ type = "str";
+ }
+ f->dump_string("Type", type);
+ f->close_section();
+ }
+ f->close_section();
+ rgw_flush_formatter(s, f);
+}
+
+void RGWDelBucketMetaSearch_ObjStore_S3::send_response()
+{
+ if (op_ret)
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+ end_header(s, this);
+}
+
+void RGWPutBucketObjectLock_ObjStore_S3::send_response()
+{
+ if (op_ret) {
+ set_req_state_err(s, op_ret);
+ }
+ dump_errno(s);
+ end_header(s);
+}
+
+void RGWGetBucketObjectLock_ObjStore_S3::send_response()
+{
+ if (op_ret) {
+ set_req_state_err(s, op_ret);
+ }
+ dump_errno(s);
+ end_header(s, this, "application/xml");
+ dump_start(s);
+
+ if (op_ret) {
+ return;
+ }
+ encode_xml("ObjectLockConfiguration", s->bucket_info.obj_lock, s->formatter);
+ rgw_flush_formatter_and_reset(s, s->formatter);
+}
+
+
+int RGWPutObjRetention_ObjStore_S3::get_params()
+{
+ const char *bypass_gov_header = s->info.env->get("HTTP_X_AMZ_BYPASS_GOVERNANCE_RETENTION");
+ if (bypass_gov_header) {
+ std::string bypass_gov_decoded = url_decode(bypass_gov_header);
+ bypass_governance_mode = boost::algorithm::iequals(bypass_gov_decoded, "true");
+ }
+
+ const auto max_size = s->cct->_conf->rgw_max_put_param_size;
+ std::tie(op_ret, data) = rgw_rest_read_all_input(s, max_size, false);
+ return op_ret;
+}
+
+void RGWPutObjRetention_ObjStore_S3::send_response()
+{
+ if (op_ret) {
+ set_req_state_err(s, op_ret);
+ }
+ dump_errno(s);
+ end_header(s);
+}
+
+void RGWGetObjRetention_ObjStore_S3::send_response()
+{
+ if (op_ret) {
+ set_req_state_err(s, op_ret);
+ }
+ dump_errno(s);
+ end_header(s, this, "application/xml");
+ dump_start(s);
+
+ if (op_ret) {
+ return;
+ }
+ encode_xml("Retention", obj_retention, s->formatter);
+ rgw_flush_formatter_and_reset(s, s->formatter);
+}
+
+void RGWPutObjLegalHold_ObjStore_S3::send_response()
+{
+ if (op_ret) {
+ set_req_state_err(s, op_ret);
+ }
+ dump_errno(s);
+ end_header(s);
+}
+
+void RGWGetObjLegalHold_ObjStore_S3::send_response()
+{
+ if (op_ret) {
+ set_req_state_err(s, op_ret);
+ }
+ dump_errno(s);
+ end_header(s, this, "application/xml");
+ dump_start(s);
+
+ if (op_ret) {
+ return;
+ }
+ encode_xml("LegalHold", obj_legal_hold, s->formatter);
+ rgw_flush_formatter_and_reset(s, s->formatter);
+}
+
+
+RGWOp *RGWHandler_REST_Service_S3::op_get()
+{
+ if (is_usage_op()) {
+ return new RGWGetUsage_ObjStore_S3;
+ } else {
+ return new RGWListBuckets_ObjStore_S3;
+ }
+}
+
+RGWOp *RGWHandler_REST_Service_S3::op_head()
+{
+ return new RGWListBuckets_ObjStore_S3;
+}
+
+RGWOp *RGWHandler_REST_Service_S3::op_post()
+{
+ const auto max_size = s->cct->_conf->rgw_max_put_param_size;
+
+ int ret = 0;
+ bufferlist data;
+ std::tie(ret, data) = rgw_rest_read_all_input(s, max_size, false);
+ string post_body = data.to_str();
+
+ if (this->isSTSenabled) {
+ RGWHandler_REST_STS sts_handler(auth_registry, post_body);
+ sts_handler.init(store, s, s->cio);
+ auto op = sts_handler.get_op(store);
+ if (op) {
+ return op;
+ }
+ }
+
+ if (this->isIAMenabled) {
+ RGWHandler_REST_IAM iam_handler(auth_registry, post_body);
+ iam_handler.init(store, s, s->cio);
+ auto op = iam_handler.get_op(store);
+ if (op) {
+ return op;
+ }
+ }
+
+ if (isPSenabled) {
+ RGWHandler_REST_PSTopic_AWS topic_handler(auth_registry, post_body);
+ topic_handler.init(store, s, s->cio);
+ auto op = topic_handler.get_op(store);
+ if (op) {
+ return op;
+ }
+ }
+
+ return NULL;
+}
+
+RGWOp *RGWHandler_REST_Bucket_S3::get_obj_op(bool get_data)
+{
+ // Non-website mode
+ if (get_data) {
+ int list_type = 1;
+ s->info.args.get_int("list-type", &list_type, 1);
+ switch (list_type) {
+ case 1:
+ return new RGWListBucket_ObjStore_S3;
+ case 2:
+ return new RGWListBucket_ObjStore_S3v2;
+ default:
+ ldpp_dout(s, 5) << __func__ << ": unsupported list-type " << list_type << dendl;
+ return new RGWListBucket_ObjStore_S3;
+ }
+ } else {
+ return new RGWStatBucket_ObjStore_S3;
+ }
+}
+
+RGWOp *RGWHandler_REST_Bucket_S3::op_get()
+{
+ if (s->info.args.sub_resource_exists("logging"))
+ return new RGWGetBucketLogging_ObjStore_S3;
+
+ if (s->info.args.sub_resource_exists("location"))
+ return new RGWGetBucketLocation_ObjStore_S3;
+
+ if (s->info.args.sub_resource_exists("versioning"))
+ return new RGWGetBucketVersioning_ObjStore_S3;
+
+ if (s->info.args.sub_resource_exists("website")) {
+ if (!s->cct->_conf->rgw_enable_static_website) {
+ return NULL;
+ }
+ return new RGWGetBucketWebsite_ObjStore_S3;
+ }
+
+ if (s->info.args.exists("mdsearch")) {
+ return new RGWGetBucketMetaSearch_ObjStore_S3;
+ }
+
+ if (is_acl_op()) {
+ return new RGWGetACLs_ObjStore_S3;
+ } else if (is_cors_op()) {
+ return new RGWGetCORS_ObjStore_S3;
+ } else if (is_request_payment_op()) {
+ return new RGWGetRequestPayment_ObjStore_S3;
+ } else if (s->info.args.exists("uploads")) {
+ return new RGWListBucketMultiparts_ObjStore_S3;
+ } else if(is_lc_op()) {
+ return new RGWGetLC_ObjStore_S3;
+ } else if(is_policy_op()) {
+ return new RGWGetBucketPolicy;
+ } else if (is_object_lock_op()) {
+ return new RGWGetBucketObjectLock_ObjStore_S3;
+ } else if (is_notification_op()) {
+ return RGWHandler_REST_PSNotifs_S3::create_get_op();
+ }
+ return get_obj_op(true);
+}
+
+RGWOp *RGWHandler_REST_Bucket_S3::op_head()
+{
+ if (is_acl_op()) {
+ return new RGWGetACLs_ObjStore_S3;
+ } else if (s->info.args.exists("uploads")) {
+ return new RGWListBucketMultiparts_ObjStore_S3;
+ }
+ return get_obj_op(false);
+}
+
+RGWOp *RGWHandler_REST_Bucket_S3::op_put()
+{
+ if (s->info.args.sub_resource_exists("logging"))
+ return NULL;
+ if (s->info.args.sub_resource_exists("versioning"))
+ return new RGWSetBucketVersioning_ObjStore_S3;
+ if (s->info.args.sub_resource_exists("website")) {
+ if (!s->cct->_conf->rgw_enable_static_website) {
+ return NULL;
+ }
+ return new RGWSetBucketWebsite_ObjStore_S3;
+ }
+ if (is_acl_op()) {
+ return new RGWPutACLs_ObjStore_S3;
+ } else if (is_cors_op()) {
+ return new RGWPutCORS_ObjStore_S3;
+ } else if (is_request_payment_op()) {
+ return new RGWSetRequestPayment_ObjStore_S3;
+ } else if(is_lc_op()) {
+ return new RGWPutLC_ObjStore_S3;
+ } else if(is_policy_op()) {
+ return new RGWPutBucketPolicy;
+ } else if (is_object_lock_op()) {
+ return new RGWPutBucketObjectLock_ObjStore_S3;
+ } else if (is_notification_op()) {
+ return RGWHandler_REST_PSNotifs_S3::create_put_op();
+ }
+ return new RGWCreateBucket_ObjStore_S3;
+}
+
+RGWOp *RGWHandler_REST_Bucket_S3::op_delete()
+{
+ if (is_cors_op()) {
+ return new RGWDeleteCORS_ObjStore_S3;
+ } else if(is_lc_op()) {
+ return new RGWDeleteLC_ObjStore_S3;
+ } else if(is_policy_op()) {
+ return new RGWDeleteBucketPolicy;
+ } else if (is_notification_op()) {
+ return RGWHandler_REST_PSNotifs_S3::create_delete_op();
+ }
+
+ if (s->info.args.sub_resource_exists("website")) {
+ if (!s->cct->_conf->rgw_enable_static_website) {
+ return NULL;
+ }
+ return new RGWDeleteBucketWebsite_ObjStore_S3;
+ }
+
+ if (s->info.args.exists("mdsearch")) {
+ return new RGWDelBucketMetaSearch_ObjStore_S3;
+ }
+
+ return new RGWDeleteBucket_ObjStore_S3;
+}
+
+RGWOp *RGWHandler_REST_Bucket_S3::op_post()
+{
+ if (s->info.args.exists("delete")) {
+ return new RGWDeleteMultiObj_ObjStore_S3;
+ }
+
+ if (s->info.args.exists("mdsearch")) {
+ return new RGWConfigBucketMetaSearch_ObjStore_S3;
+ }
+
+ return new RGWPostObj_ObjStore_S3;
+}
+
+RGWOp *RGWHandler_REST_Bucket_S3::op_options()
+{
+ return new RGWOptionsCORS_ObjStore_S3;
+}
+
+RGWOp *RGWHandler_REST_Obj_S3::get_obj_op(bool get_data)
+{
+ RGWGetObj_ObjStore_S3 *get_obj_op = new RGWGetObj_ObjStore_S3;
+ get_obj_op->set_get_data(get_data);
+ return get_obj_op;
+}
+
+RGWOp *RGWHandler_REST_Obj_S3::op_get()
+{
+ if (is_acl_op()) {
+ return new RGWGetACLs_ObjStore_S3;
+ } else if (s->info.args.exists("uploadId")) {
+ return new RGWListMultipart_ObjStore_S3;
+ } else if (s->info.args.exists("layout")) {
+ return new RGWGetObjLayout_ObjStore_S3;
+ } else if (is_tagging_op()) {
+ return new RGWGetObjTags_ObjStore_S3;
+ } else if (is_obj_retention_op()) {
+ return new RGWGetObjRetention_ObjStore_S3;
+ } else if (is_obj_legal_hold_op()) {
+ return new RGWGetObjLegalHold_ObjStore_S3;
+ }
+ return get_obj_op(true);
+}
+
+RGWOp *RGWHandler_REST_Obj_S3::op_head()
+{
+ if (is_acl_op()) {
+ return new RGWGetACLs_ObjStore_S3;
+ } else if (s->info.args.exists("uploadId")) {
+ return new RGWListMultipart_ObjStore_S3;
+ }
+ return get_obj_op(false);
+}
+
+RGWOp *RGWHandler_REST_Obj_S3::op_put()
+{
+ if (is_acl_op()) {
+ return new RGWPutACLs_ObjStore_S3;
+ } else if (is_tagging_op()) {
+ return new RGWPutObjTags_ObjStore_S3;
+ } else if (is_obj_retention_op()) {
+ return new RGWPutObjRetention_ObjStore_S3;
+ } else if (is_obj_legal_hold_op()) {
+ return new RGWPutObjLegalHold_ObjStore_S3;
+ }
+
+ if (s->init_state.src_bucket.empty())
+ return new RGWPutObj_ObjStore_S3;
+ else
+ return new RGWCopyObj_ObjStore_S3;
+}
+
+RGWOp *RGWHandler_REST_Obj_S3::op_delete()
+{
+ if (is_tagging_op()) {
+ return new RGWDeleteObjTags_ObjStore_S3;
+ }
+ string upload_id = s->info.args.get("uploadId");
+
+ if (upload_id.empty())
+ return new RGWDeleteObj_ObjStore_S3;
+ else
+ return new RGWAbortMultipart_ObjStore_S3;
+}
+
+RGWOp *RGWHandler_REST_Obj_S3::op_post()
+{
+ if (s->info.args.exists("uploadId"))
+ return new RGWCompleteMultipart_ObjStore_S3;
+
+ if (s->info.args.exists("uploads"))
+ return new RGWInitMultipart_ObjStore_S3;
+
+ return new RGWPostObj_ObjStore_S3;
+}
+
+RGWOp *RGWHandler_REST_Obj_S3::op_options()
+{
+ return new RGWOptionsCORS_ObjStore_S3;
+}
+
+int RGWHandler_REST_S3::init_from_header(struct req_state* s,
+ int default_formatter,
+ bool configurable_format)
+{
+ string req;
+ string first;
+
+ const char *req_name = s->relative_uri.c_str();
+ const char *p;
+
+ if (*req_name == '?') {
+ p = req_name;
+ } else {
+ p = s->info.request_params.c_str();
+ }
+
+ s->info.args.set(p);
+ s->info.args.parse();
+
+ /* must be called after the args parsing */
+ int ret = allocate_formatter(s, default_formatter, configurable_format);
+ if (ret < 0)
+ return ret;
+
+ if (*req_name != '/')
+ return 0;
+
+ req_name++;
+
+ if (!*req_name)
+ return 0;
+
+ req = req_name;
+ int pos = req.find('/');
+ if (pos >= 0) {
+ first = req.substr(0, pos);
+ } else {
+ first = req;
+ }
+
+ /*
+ * XXX The intent of the check for empty is apparently to let the bucket
+ * name from DNS to be set ahead. However, we currently take the DNS
+ * bucket and re-insert it into URL in rgw_rest.cc:RGWREST::preprocess().
+ * So, this check is meaningless.
+ *
+ * Rather than dropping this, the code needs to be changed into putting
+ * the bucket (and its tenant) from DNS and Host: header (HTTP_HOST)
+ * into req_status.bucket_name directly.
+ */
+ if (s->init_state.url_bucket.empty()) {
+ // Save bucket to tide us over until token is parsed.
+ s->init_state.url_bucket = first;
+ if (pos >= 0) {
+ string encoded_obj_str = req.substr(pos+1);
+ s->object = rgw_obj_key(encoded_obj_str, s->info.args.get("versionId"));
+ }
+ } else {
+ s->object = rgw_obj_key(req_name, s->info.args.get("versionId"));
+ }
+ return 0;
+}
+
+static int verify_mfa(RGWRados *store, RGWUserInfo *user, const string& mfa_str, bool *verified)
+{
+ vector<string> params;
+ get_str_vec(mfa_str, " ", params);
+
+ if (params.size() != 2) {
+ ldout(store->ctx(), 5) << "NOTICE: invalid mfa string provided: " << mfa_str << dendl;
+ return -EINVAL;
+ }
+
+ string& serial = params[0];
+ string& pin = params[1];
+
+ auto i = user->mfa_ids.find(serial);
+ if (i == user->mfa_ids.end()) {
+ ldout(store->ctx(), 5) << "NOTICE: user does not have mfa device with serial=" << serial << dendl;
+ return -EACCES;
+ }
+
+ int ret = store->check_mfa(user->user_id, serial, pin);
+ if (ret < 0) {
+ ldout(store->ctx(), 20) << "NOTICE: failed to check MFA, serial=" << serial << dendl;
+ return -EACCES;
+ }
+
+ *verified = true;
+
+ return 0;
+}
+
+int RGWHandler_REST_S3::postauth_init()
+{
+ struct req_init_state *t = &s->init_state;
+ bool relaxed_names = s->cct->_conf->rgw_relaxed_s3_bucket_names;
+
+ rgw_parse_url_bucket(t->url_bucket, s->user->user_id.tenant,
+ s->bucket_tenant, s->bucket_name);
+
+ dout(10) << "s->object=" << (!s->object.empty() ? s->object : rgw_obj_key("<NULL>"))
+ << " s->bucket=" << rgw_make_bucket_entry_name(s->bucket_tenant, s->bucket_name) << dendl;
+
+ int ret;
+ ret = rgw_validate_tenant_name(s->bucket_tenant);
+ if (ret)
+ return ret;
+ if (!s->bucket_name.empty()) {
+ ret = valid_s3_bucket_name(s->bucket_name, relaxed_names);
+ if (ret)
+ return ret;
+ ret = validate_object_name(s->object.name);
+ if (ret)
+ return ret;
+ }
+
+ if (!t->src_bucket.empty()) {
+ rgw_parse_url_bucket(t->src_bucket, s->user->user_id.tenant,
+ s->src_tenant_name, s->src_bucket_name);
+ ret = rgw_validate_tenant_name(s->src_tenant_name);
+ if (ret)
+ return ret;
+ ret = valid_s3_bucket_name(s->src_bucket_name, relaxed_names);
+ if (ret)
+ return ret;
+ }
+
+ const char *mfa = s->info.env->get("HTTP_X_AMZ_MFA");
+ if (mfa) {
+ ret = verify_mfa(store, s->user, string(mfa), &s->mfa_verified);
+ }
+
+ return 0;
+}
+
+int RGWHandler_REST_S3::init(RGWRados *store, struct req_state *s,
+ rgw::io::BasicClient *cio)
+{
+ int ret;
+
+ s->dialect = "s3";
+
+ ret = rgw_validate_tenant_name(s->bucket_tenant);
+ if (ret)
+ return ret;
+ bool relaxed_names = s->cct->_conf->rgw_relaxed_s3_bucket_names;
+ if (!s->bucket_name.empty()) {
+ ret = valid_s3_bucket_name(s->bucket_name, relaxed_names);
+ if (ret)
+ return ret;
+ ret = validate_object_name(s->object.name);
+ if (ret)
+ return ret;
+ }
+
+ const char *cacl = s->info.env->get("HTTP_X_AMZ_ACL");
+ if (cacl)
+ s->canned_acl = cacl;
+
+ s->has_acl_header = s->info.env->exists_prefix("HTTP_X_AMZ_GRANT");
+
+ const char *copy_source = s->info.env->get("HTTP_X_AMZ_COPY_SOURCE");
+ if (copy_source &&
+ (! s->info.env->get("HTTP_X_AMZ_COPY_SOURCE_RANGE")) &&
+ (! s->info.args.exists("uploadId"))) {
+
+ ret = RGWCopyObj::parse_copy_location(copy_source,
+ s->init_state.src_bucket,
+ s->src_object);
+ if (!ret) {
+ ldout(s->cct, 0) << "failed to parse copy location" << dendl;
+ return -EINVAL; // XXX why not -ERR_INVALID_BUCKET_NAME or -ERR_BAD_URL?
+ }
+ }
+
+ const char *sc = s->info.env->get("HTTP_X_AMZ_STORAGE_CLASS");
+ if (sc) {
+ s->info.storage_class = sc;
+ }
+
+ return RGWHandler_REST::init(store, s, cio);
+}
+
+int RGWHandler_REST_S3::authorize(const DoutPrefixProvider *dpp)
+{
+ if (s->info.args.exists("Action") && s->info.args.get("Action") == "AssumeRoleWithWebIdentity") {
+ return RGW_Auth_STS::authorize(dpp, store, auth_registry, s);
+ }
+ return RGW_Auth_S3::authorize(dpp, store, auth_registry, s);
+}
+
+enum class AwsVersion {
+ UNKNOWN,
+ V2,
+ V4
+};
+
+enum class AwsRoute {
+ UNKNOWN,
+ QUERY_STRING,
+ HEADERS
+};
+
+static inline std::pair<AwsVersion, AwsRoute>
+discover_aws_flavour(const req_info& info)
+{
+ using rgw::auth::s3::AWS4_HMAC_SHA256_STR;
+
+ AwsVersion version = AwsVersion::UNKNOWN;
+ AwsRoute route = AwsRoute::UNKNOWN;
+
+ const char* http_auth = info.env->get("HTTP_AUTHORIZATION");
+ if (http_auth && http_auth[0]) {
+ /* Authorization in Header */
+ route = AwsRoute::HEADERS;
+
+ if (!strncmp(http_auth, AWS4_HMAC_SHA256_STR,
+ strlen(AWS4_HMAC_SHA256_STR))) {
+ /* AWS v4 */
+ version = AwsVersion::V4;
+ } else if (!strncmp(http_auth, "AWS ", 4)) {
+ /* AWS v2 */
+ version = AwsVersion::V2;
+ }
+ } else {
+ route = AwsRoute::QUERY_STRING;
+
+ if (info.args.get("X-Amz-Algorithm") == AWS4_HMAC_SHA256_STR) {
+ /* AWS v4 */
+ version = AwsVersion::V4;
+ } else if (!info.args.get("AWSAccessKeyId").empty()) {
+ /* AWS v2 */
+ version = AwsVersion::V2;
+ }
+ }
+
+ return std::make_pair(version, route);
+}
+
+/*
+ * verify that a signed request comes from the keyholder
+ * by checking the signature against our locally-computed version
+ *
+ * it tries AWS v4 before AWS v2
+ */
+int RGW_Auth_S3::authorize(const DoutPrefixProvider *dpp,
+ RGWRados* const store,
+ const rgw::auth::StrategyRegistry& auth_registry,
+ struct req_state* const s)
+{
+
+ /* neither keystone and rados enabled; warn and exit! */
+ if (!store->ctx()->_conf->rgw_s3_auth_use_rados &&
+ !store->ctx()->_conf->rgw_s3_auth_use_keystone &&
+ !store->ctx()->_conf->rgw_s3_auth_use_ldap) {
+ ldpp_dout(dpp, 0) << "WARNING: no authorization backend enabled! Users will never authenticate." << dendl;
+ return -EPERM;
+ }
+
+ const auto ret = rgw::auth::Strategy::apply(dpp, auth_registry.get_s3_main(), s);
+ if (ret == 0) {
+ /* Populate the owner info. */
+ s->owner.set_id(s->user->user_id);
+ s->owner.set_name(s->user->display_name);
+ }
+ return ret;
+}
+
+int RGWHandler_Auth_S3::init(RGWRados *store, struct req_state *state,
+ rgw::io::BasicClient *cio)
+{
+ int ret = RGWHandler_REST_S3::init_from_header(state, RGW_FORMAT_JSON,
+ true);
+ if (ret < 0)
+ return ret;
+
+ return RGWHandler_REST::init(store, state, cio);
+}
+
+RGWHandler_REST* RGWRESTMgr_S3::get_handler(struct req_state* const s,
+ const rgw::auth::StrategyRegistry& auth_registry,
+ const std::string& frontend_prefix)
+{
+ bool is_s3website = enable_s3website && (s->prot_flags & RGW_REST_WEBSITE);
+ int ret =
+ RGWHandler_REST_S3::init_from_header(s,
+ is_s3website ? RGW_FORMAT_HTML :
+ RGW_FORMAT_XML, true);
+ if (ret < 0)
+ return NULL;
+
+ RGWHandler_REST* handler;
+ // TODO: Make this more readable
+ if (is_s3website) {
+ if (s->init_state.url_bucket.empty()) {
+ handler = new RGWHandler_REST_Service_S3Website(auth_registry);
+ } else if (s->object.empty()) {
+ handler = new RGWHandler_REST_Bucket_S3Website(auth_registry);
+ } else {
+ handler = new RGWHandler_REST_Obj_S3Website(auth_registry);
+ }
+ } else {
+ if (s->init_state.url_bucket.empty()) {
+ handler = new RGWHandler_REST_Service_S3(auth_registry, enable_sts, enable_iam, enable_pubsub);
+ } else if (s->object.empty()) {
+ handler = new RGWHandler_REST_Bucket_S3(auth_registry, enable_pubsub);
+ } else {
+ handler = new RGWHandler_REST_Obj_S3(auth_registry);
+ }
+ }
+
+ ldout(s->cct, 20) << __func__ << " handler=" << typeid(*handler).name()
+ << dendl;
+ return handler;
+}
+
+bool RGWHandler_REST_S3Website::web_dir() const {
+ std::string subdir_name = url_decode(s->object.name);
+
+ if (subdir_name.empty()) {
+ return false;
+ } else if (subdir_name.back() == '/' && subdir_name.size() > 1) {
+ subdir_name.pop_back();
+ }
+
+ rgw_obj obj(s->bucket, subdir_name);
+
+ RGWObjectCtx& obj_ctx = *static_cast<RGWObjectCtx *>(s->obj_ctx);
+ obj_ctx.set_atomic(obj);
+ obj_ctx.set_prefetch_data(obj);
+
+ RGWObjState* state = nullptr;
+ if (store->get_obj_state(&obj_ctx, s->bucket_info, obj, &state, false) < 0) {
+ return false;
+ }
+ if (! state->exists) {
+ return false;
+ }
+ return state->exists;
+}
+
+int RGWHandler_REST_S3Website::init(RGWRados *store, req_state *s,
+ rgw::io::BasicClient* cio)
+{
+ // save the original object name before retarget() replaces it with the
+ // result of get_effective_key(). the error_handler() needs the original
+ // object name for redirect handling
+ original_object_name = s->object.name;
+
+ return RGWHandler_REST_S3::init(store, s, cio);
+}
+
+int RGWHandler_REST_S3Website::retarget(RGWOp* op, RGWOp** new_op) {
+ *new_op = op;
+ ldout(s->cct, 10) << __func__ << " Starting retarget" << dendl;
+
+ if (!(s->prot_flags & RGW_REST_WEBSITE))
+ return 0;
+
+ int ret = store->get_bucket_info(*s->sysobj_ctx, s->bucket_tenant,
+ s->bucket_name, s->bucket_info, NULL,
+ &s->bucket_attrs);
+ if (ret < 0) {
+ // TODO-FUTURE: if the bucket does not exist, maybe expose it here?
+ return -ERR_NO_SUCH_BUCKET;
+ }
+ if (!s->bucket_info.has_website) {
+ // TODO-FUTURE: if the bucket has no WebsiteConfig, expose it here
+ return -ERR_NO_SUCH_WEBSITE_CONFIGURATION;
+ }
+
+ rgw_obj_key new_obj;
+ bool get_res = s->bucket_info.website_conf.get_effective_key(s->object.name, &new_obj.name, web_dir());
+ if (!get_res) {
+ s->err.message = "The IndexDocument Suffix is not configurated or not well formed!";
+ ldout(s->cct, 5) << s->err.message << dendl;
+ return -EINVAL;
+ }
+
+ ldout(s->cct, 10) << "retarget get_effective_key " << s->object << " -> "
+ << new_obj << dendl;
+
+ RGWBWRoutingRule rrule;
+ bool should_redirect =
+ s->bucket_info.website_conf.should_redirect(new_obj.name, 0, &rrule);
+
+ if (should_redirect) {
+ const string& hostname = s->info.env->get("HTTP_HOST", "");
+ const string& protocol =
+ (s->info.env->get("SERVER_PORT_SECURE") ? "https" : "http");
+ int redirect_code = 0;
+ rrule.apply_rule(protocol, hostname, s->object.name, &s->redirect,
+ &redirect_code);
+ // APply a custom HTTP response code
+ if (redirect_code > 0)
+ s->err.http_ret = redirect_code; // Apply a custom HTTP response code
+ ldout(s->cct, 10) << "retarget redirect code=" << redirect_code
+ << " proto+host:" << protocol << "://" << hostname
+ << " -> " << s->redirect << dendl;
+ return -ERR_WEBSITE_REDIRECT;
+ }
+
+ /*
+ * FIXME: if s->object != new_obj, drop op and create a new op to handle
+ * operation. Or remove this comment if it's not applicable anymore
+ */
+
+ s->object = new_obj;
+
+ return 0;
+}
+
+RGWOp* RGWHandler_REST_S3Website::op_get()
+{
+ return get_obj_op(true);
+}
+
+RGWOp* RGWHandler_REST_S3Website::op_head()
+{
+ return get_obj_op(false);
+}
+
+int RGWHandler_REST_S3Website::serve_errordoc(int http_ret, const string& errordoc_key) {
+ int ret = 0;
+ s->formatter->reset(); /* Try to throw it all away */
+
+ std::shared_ptr<RGWGetObj_ObjStore_S3Website> getop( static_cast<RGWGetObj_ObjStore_S3Website*>(op_get()));
+ if (getop.get() == NULL) {
+ return -1; // Trigger double error handler
+ }
+ getop->init(store, s, this);
+ getop->range_str = NULL;
+ getop->if_mod = NULL;
+ getop->if_unmod = NULL;
+ getop->if_match = NULL;
+ getop->if_nomatch = NULL;
+ s->object = errordoc_key;
+
+ ret = init_permissions(getop.get());
+ if (ret < 0) {
+ ldout(s->cct, 20) << "serve_errordoc failed, init_permissions ret=" << ret << dendl;
+ return -1; // Trigger double error handler
+ }
+
+ ret = read_permissions(getop.get());
+ if (ret < 0) {
+ ldout(s->cct, 20) << "serve_errordoc failed, read_permissions ret=" << ret << dendl;
+ return -1; // Trigger double error handler
+ }
+
+ if (http_ret) {
+ getop->set_custom_http_response(http_ret);
+ }
+
+ ret = getop->init_processing();
+ if (ret < 0) {
+ ldout(s->cct, 20) << "serve_errordoc failed, init_processing ret=" << ret << dendl;
+ return -1; // Trigger double error handler
+ }
+
+ ret = getop->verify_op_mask();
+ if (ret < 0) {
+ ldout(s->cct, 20) << "serve_errordoc failed, verify_op_mask ret=" << ret << dendl;
+ return -1; // Trigger double error handler
+ }
+
+ ret = getop->verify_permission();
+ if (ret < 0) {
+ ldout(s->cct, 20) << "serve_errordoc failed, verify_permission ret=" << ret << dendl;
+ return -1; // Trigger double error handler
+ }
+
+ ret = getop->verify_params();
+ if (ret < 0) {
+ ldout(s->cct, 20) << "serve_errordoc failed, verify_params ret=" << ret << dendl;
+ return -1; // Trigger double error handler
+ }
+
+ // No going back now
+ getop->pre_exec();
+ /*
+ * FIXME Missing headers:
+ * With a working errordoc, the s3 error fields are rendered as HTTP headers,
+ * x-amz-error-code: NoSuchKey
+ * x-amz-error-message: The specified key does not exist.
+ * x-amz-error-detail-Key: foo
+ */
+ getop->execute();
+ getop->complete();
+ return 0;
+
+}
+
+int RGWHandler_REST_S3Website::error_handler(int err_no,
+ string* error_content) {
+ int new_err_no = -1;
+ rgw_http_errors::const_iterator r = rgw_http_s3_errors.find(err_no > 0 ? err_no : -err_no);
+ int http_error_code = -1;
+
+ if (r != rgw_http_s3_errors.end()) {
+ http_error_code = r->second.first;
+ }
+ ldout(s->cct, 10) << "RGWHandler_REST_S3Website::error_handler err_no=" << err_no << " http_ret=" << http_error_code << dendl;
+
+ RGWBWRoutingRule rrule;
+ bool should_redirect =
+ s->bucket_info.website_conf.should_redirect(original_object_name,
+ http_error_code, &rrule);
+
+ if (should_redirect) {
+ const string& hostname = s->info.env->get("HTTP_HOST", "");
+ const string& protocol =
+ (s->info.env->get("SERVER_PORT_SECURE") ? "https" : "http");
+ int redirect_code = 0;
+ rrule.apply_rule(protocol, hostname, original_object_name,
+ &s->redirect, &redirect_code);
+ // Apply a custom HTTP response code
+ if (redirect_code > 0)
+ s->err.http_ret = redirect_code; // Apply a custom HTTP response code
+ ldout(s->cct, 10) << "error handler redirect code=" << redirect_code
+ << " proto+host:" << protocol << "://" << hostname
+ << " -> " << s->redirect << dendl;
+ return -ERR_WEBSITE_REDIRECT;
+ } else if (err_no == -ERR_WEBSITE_REDIRECT) {
+ // Do nothing here, this redirect will be handled in abort_early's ERR_WEBSITE_REDIRECT block
+ // Do NOT fire the ErrorDoc handler
+ } else if (!s->bucket_info.website_conf.error_doc.empty()) {
+ /* This serves an entire page!
+ On success, it will return zero, and no further content should be sent to the socket
+ On failure, we need the double-error handler
+ */
+ new_err_no = RGWHandler_REST_S3Website::serve_errordoc(http_error_code, s->bucket_info.website_conf.error_doc);
+ if (new_err_no != -1) {
+ err_no = new_err_no;
+ }
+ } else {
+ ldout(s->cct, 20) << "No special error handling today!" << dendl;
+ }
+
+ return err_no;
+}
+
+RGWOp* RGWHandler_REST_Obj_S3Website::get_obj_op(bool get_data)
+{
+ /** If we are in website mode, then it is explicitly impossible to run GET or
+ * HEAD on the actual directory. We must convert the request to run on the
+ * suffix object instead!
+ */
+ RGWGetObj_ObjStore_S3Website* op = new RGWGetObj_ObjStore_S3Website;
+ op->set_get_data(get_data);
+ return op;
+}
+
+RGWOp* RGWHandler_REST_Bucket_S3Website::get_obj_op(bool get_data)
+{
+ /** If we are in website mode, then it is explicitly impossible to run GET or
+ * HEAD on the actual directory. We must convert the request to run on the
+ * suffix object instead!
+ */
+ RGWGetObj_ObjStore_S3Website* op = new RGWGetObj_ObjStore_S3Website;
+ op->set_get_data(get_data);
+ return op;
+}
+
+RGWOp* RGWHandler_REST_Service_S3Website::get_obj_op(bool get_data)
+{
+ /** If we are in website mode, then it is explicitly impossible to run GET or
+ * HEAD on the actual directory. We must convert the request to run on the
+ * suffix object instead!
+ */
+ RGWGetObj_ObjStore_S3Website* op = new RGWGetObj_ObjStore_S3Website;
+ op->set_get_data(get_data);
+ return op;
+}
+
+
+namespace rgw {
+namespace auth {
+namespace s3 {
+
+static rgw::auth::Completer::cmplptr_t
+null_completer_factory(const boost::optional<std::string>& secret_key)
+{
+ return nullptr;
+}
+
+
+AWSEngine::VersionAbstractor::auth_data_t
+AWSGeneralAbstractor::get_auth_data(const req_state* const s) const
+{
+ AwsVersion version;
+ AwsRoute route;
+ std::tie(version, route) = discover_aws_flavour(s->info);
+
+ if (version == AwsVersion::V2) {
+ return get_auth_data_v2(s);
+ } else if (version == AwsVersion::V4) {
+ return get_auth_data_v4(s, route == AwsRoute::QUERY_STRING);
+ } else {
+ /* FIXME(rzarzynski): handle anon user. */
+ throw -EINVAL;
+ }
+}
+
+boost::optional<std::string>
+AWSGeneralAbstractor::get_v4_canonical_headers(
+ const req_info& info,
+ const boost::string_view& signedheaders,
+ const bool using_qs) const
+{
+ return rgw::auth::s3::get_v4_canonical_headers(info, signedheaders,
+ using_qs, false);
+}
+
+AWSEngine::VersionAbstractor::auth_data_t
+AWSGeneralAbstractor::get_auth_data_v4(const req_state* const s,
+ const bool using_qs) const
+{
+ boost::string_view access_key_id;
+ boost::string_view signed_hdrs;
+
+ boost::string_view date;
+ boost::string_view credential_scope;
+ boost::string_view client_signature;
+ boost::string_view session_token;
+
+ int ret = rgw::auth::s3::parse_v4_credentials(s->info,
+ access_key_id,
+ credential_scope,
+ signed_hdrs,
+ client_signature,
+ date,
+ session_token,
+ using_qs);
+ if (ret < 0) {
+ throw ret;
+ }
+
+ /* craft canonical headers */
+ boost::optional<std::string> canonical_headers = \
+ get_v4_canonical_headers(s->info, signed_hdrs, using_qs);
+ if (canonical_headers) {
+ using sanitize = rgw::crypt_sanitize::log_content;
+ ldout(s->cct, 10) << "canonical headers format = "
+ << sanitize{*canonical_headers} << dendl;
+ } else {
+ throw -EPERM;
+ }
+
+ bool is_non_s3_op = false;
+ if (s->op_type == RGW_STS_GET_SESSION_TOKEN ||
+ s->op_type == RGW_STS_ASSUME_ROLE ||
+ s->op_type == RGW_STS_ASSUME_ROLE_WEB_IDENTITY ||
+ s->op_type == RGW_OP_CREATE_ROLE ||
+ s->op_type == RGW_OP_DELETE_ROLE ||
+ s->op_type == RGW_OP_GET_ROLE ||
+ s->op_type == RGW_OP_MODIFY_ROLE ||
+ s->op_type == RGW_OP_LIST_ROLES ||
+ s->op_type == RGW_OP_PUT_ROLE_POLICY ||
+ s->op_type == RGW_OP_GET_ROLE_POLICY ||
+ s->op_type == RGW_OP_LIST_ROLE_POLICIES ||
+ s->op_type == RGW_OP_DELETE_ROLE_POLICY ||
+ s->op_type == RGW_OP_PUT_USER_POLICY ||
+ s->op_type == RGW_OP_GET_USER_POLICY ||
+ s->op_type == RGW_OP_LIST_USER_POLICIES ||
+ s->op_type == RGW_OP_DELETE_USER_POLICY) {
+ is_non_s3_op = true;
+ }
+
+ const char* exp_payload_hash = nullptr;
+ string payload_hash;
+ if (is_non_s3_op) {
+ //For non s3 ops, we need to calculate the payload hash
+ payload_hash = s->info.args.get("PayloadHash");
+ exp_payload_hash = payload_hash.c_str();
+ } else {
+ /* Get the expected hash. */
+ exp_payload_hash = rgw::auth::s3::get_v4_exp_payload_hash(s->info);
+ }
+
+ /* Craft canonical URI. Using std::move later so let it be non-const. */
+ auto canonical_uri = rgw::auth::s3::get_v4_canonical_uri(s->info);
+
+ /* Craft canonical query string. std::moving later so non-const here. */
+ auto canonical_qs = rgw::auth::s3::get_v4_canonical_qs(s->info, using_qs);
+
+ /* Craft canonical request. */
+ auto canonical_req_hash = \
+ rgw::auth::s3::get_v4_canon_req_hash(s->cct,
+ s->info.method,
+ std::move(canonical_uri),
+ std::move(canonical_qs),
+ std::move(*canonical_headers),
+ signed_hdrs,
+ exp_payload_hash);
+
+ auto string_to_sign = \
+ rgw::auth::s3::get_v4_string_to_sign(s->cct,
+ AWS4_HMAC_SHA256_STR,
+ date,
+ credential_scope,
+ std::move(canonical_req_hash));
+
+ const auto sig_factory = std::bind(rgw::auth::s3::get_v4_signature,
+ credential_scope,
+ std::placeholders::_1,
+ std::placeholders::_2,
+ std::placeholders::_3);
+
+ /* Requests authenticated with the Query Parameters are treated as unsigned.
+ * From "Authenticating Requests: Using Query Parameters (AWS Signature
+ * Version 4)":
+ *
+ * You don't include a payload hash in the Canonical Request, because
+ * when you create a presigned URL, you don't know the payload content
+ * because the URL is used to upload an arbitrary payload. Instead, you
+ * use a constant string UNSIGNED-PAYLOAD.
+ *
+ * This means we have absolutely no business in spawning completer. Both
+ * aws4_auth_needs_complete and aws4_auth_streaming_mode are set to false
+ * by default. We don't need to change that. */
+ if (is_v4_payload_unsigned(exp_payload_hash) || is_v4_payload_empty(s) || is_non_s3_op) {
+ return {
+ access_key_id,
+ client_signature,
+ session_token,
+ std::move(string_to_sign),
+ sig_factory,
+ null_completer_factory
+ };
+ } else {
+ /* We're going to handle a signed payload. Be aware that even empty HTTP
+ * body (no payload) requires verification:
+ *
+ * The x-amz-content-sha256 header is required for all AWS Signature
+ * Version 4 requests. It provides a hash of the request payload. If
+ * there is no payload, you must provide the hash of an empty string. */
+ if (!is_v4_payload_streamed(exp_payload_hash)) {
+ ldout(s->cct, 10) << "delaying v4 auth" << dendl;
+
+ /* payload in a single chunk */
+ switch (s->op_type)
+ {
+ case RGW_OP_CREATE_BUCKET:
+ case RGW_OP_PUT_OBJ:
+ case RGW_OP_PUT_ACLS:
+ case RGW_OP_PUT_CORS:
+ case RGW_OP_INIT_MULTIPART: // in case that Init Multipart uses CHUNK encoding
+ case RGW_OP_COMPLETE_MULTIPART:
+ case RGW_OP_SET_BUCKET_VERSIONING:
+ case RGW_OP_DELETE_MULTI_OBJ:
+ case RGW_OP_ADMIN_SET_METADATA:
+ case RGW_OP_SET_BUCKET_WEBSITE:
+ case RGW_OP_PUT_BUCKET_POLICY:
+ case RGW_OP_PUT_OBJ_TAGGING:
+ case RGW_OP_PUT_LC:
+ case RGW_OP_SET_REQUEST_PAYMENT:
+ case RGW_OP_PUBSUB_NOTIF_CREATE:
+ case RGW_OP_PUT_BUCKET_OBJ_LOCK:
+ case RGW_OP_PUT_OBJ_RETENTION:
+ case RGW_OP_PUT_OBJ_LEGAL_HOLD:
+ case RGW_STS_GET_SESSION_TOKEN:
+ case RGW_STS_ASSUME_ROLE:
+ break;
+ default:
+ dout(10) << "ERROR: AWS4 completion for this operation NOT IMPLEMENTED" << dendl;
+ throw -ERR_NOT_IMPLEMENTED;
+ }
+
+ const auto cmpl_factory = std::bind(AWSv4ComplSingle::create,
+ s,
+ std::placeholders::_1);
+ return {
+ access_key_id,
+ client_signature,
+ session_token,
+ std::move(string_to_sign),
+ sig_factory,
+ cmpl_factory
+ };
+ } else {
+ /* IMHO "streamed" doesn't fit too good here. I would prefer to call
+ * it "chunked" but let's be coherent with Amazon's terminology. */
+
+ dout(10) << "body content detected in multiple chunks" << dendl;
+
+ /* payload in multiple chunks */
+
+ switch(s->op_type)
+ {
+ case RGW_OP_PUT_OBJ:
+ break;
+ default:
+ dout(10) << "ERROR: AWS4 completion for this operation NOT IMPLEMENTED (streaming mode)" << dendl;
+ throw -ERR_NOT_IMPLEMENTED;
+ }
+
+ dout(10) << "aws4 seed signature ok... delaying v4 auth" << dendl;
+
+ /* In the case of streamed payload client sets the x-amz-content-sha256
+ * to "STREAMING-AWS4-HMAC-SHA256-PAYLOAD" but uses "UNSIGNED-PAYLOAD"
+ * when constructing the Canonical Request. */
+
+ /* In the case of single-chunk upload client set the header's value is
+ * coherent with the one used for Canonical Request crafting. */
+
+ /* In the case of query string-based authentication there should be no
+ * x-amz-content-sha256 header and the value "UNSIGNED-PAYLOAD" is used
+ * for CanonReq. */
+ const auto cmpl_factory = std::bind(AWSv4ComplMulti::create,
+ s,
+ date,
+ credential_scope,
+ client_signature,
+ std::placeholders::_1);
+ return {
+ access_key_id,
+ client_signature,
+ session_token,
+ std::move(string_to_sign),
+ sig_factory,
+ cmpl_factory
+ };
+ }
+ }
+}
+
+
+boost::optional<std::string>
+AWSGeneralBoto2Abstractor::get_v4_canonical_headers(
+ const req_info& info,
+ const boost::string_view& signedheaders,
+ const bool using_qs) const
+{
+ return rgw::auth::s3::get_v4_canonical_headers(info, signedheaders,
+ using_qs, true);
+}
+
+
+AWSEngine::VersionAbstractor::auth_data_t
+AWSGeneralAbstractor::get_auth_data_v2(const req_state* const s) const
+{
+ boost::string_view access_key_id;
+ boost::string_view signature;
+ boost::string_view session_token;
+ bool qsr = false;
+
+ const char* http_auth = s->info.env->get("HTTP_AUTHORIZATION");
+ if (! http_auth || http_auth[0] == '\0') {
+ /* Credentials are provided in query string. We also need to verify
+ * the "Expires" parameter now. */
+ access_key_id = s->info.args.get("AWSAccessKeyId");
+ signature = s->info.args.get("Signature");
+ qsr = true;
+
+ boost::string_view expires = s->info.args.get("Expires");
+ if (expires.empty()) {
+ throw -EPERM;
+ }
+
+ /* It looks we have the guarantee that expires is a null-terminated,
+ * and thus string_view::data() can be safely used. */
+ const time_t exp = atoll(expires.data());
+ time_t now;
+ time(&now);
+
+ if (now >= exp) {
+ throw -EPERM;
+ }
+ if (s->info.args.exists("X-Amz-Security-Token")) {
+ session_token = s->info.args.get("X-Amz-Security-Token");
+ if (session_token.size() == 0) {
+ throw -EPERM;
+ }
+ }
+
+ } else {
+ /* The "Authorization" HTTP header is being used. */
+ const boost::string_view auth_str(http_auth + strlen("AWS "));
+ const size_t pos = auth_str.rfind(':');
+ if (pos != boost::string_view::npos) {
+ access_key_id = auth_str.substr(0, pos);
+ signature = auth_str.substr(pos + 1);
+ }
+
+ if (s->info.env->exists("HTTP_X_AMZ_SECURITY_TOKEN")) {
+ session_token = s->info.env->get("HTTP_X_AMZ_SECURITY_TOKEN");
+ if (session_token.size() == 0) {
+ throw -EPERM;
+ }
+ }
+ }
+
+ /* Let's canonize the HTTP headers that are covered by the AWS auth v2. */
+ std::string string_to_sign;
+ utime_t header_time;
+ if (! rgw_create_s3_canonical_header(s->info, &header_time, string_to_sign,
+ qsr)) {
+ ldout(cct, 10) << "failed to create the canonized auth header\n"
+ << rgw::crypt_sanitize::auth{s,string_to_sign} << dendl;
+ throw -EPERM;
+ }
+
+ ldout(cct, 10) << "string_to_sign:\n"
+ << rgw::crypt_sanitize::auth{s,string_to_sign} << dendl;
+
+ if (!qsr && !is_time_skew_ok(header_time)) {
+ throw -ERR_REQUEST_TIME_SKEWED;
+ }
+
+ return {
+ std::move(access_key_id),
+ std::move(signature),
+ std::move(session_token),
+ std::move(string_to_sign),
+ rgw::auth::s3::get_v2_signature,
+ null_completer_factory
+ };
+}
+
+
+AWSEngine::VersionAbstractor::auth_data_t
+AWSBrowserUploadAbstractor::get_auth_data_v2(const req_state* const s) const
+{
+ return {
+ s->auth.s3_postobj_creds.access_key,
+ s->auth.s3_postobj_creds.signature,
+ s->auth.s3_postobj_creds.x_amz_security_token,
+ s->auth.s3_postobj_creds.encoded_policy.to_str(),
+ rgw::auth::s3::get_v2_signature,
+ null_completer_factory
+ };
+}
+
+AWSEngine::VersionAbstractor::auth_data_t
+AWSBrowserUploadAbstractor::get_auth_data_v4(const req_state* const s) const
+{
+ const boost::string_view credential = s->auth.s3_postobj_creds.x_amz_credential;
+
+ /* grab access key id */
+ const size_t pos = credential.find("/");
+ const boost::string_view access_key_id = credential.substr(0, pos);
+ dout(10) << "access key id = " << access_key_id << dendl;
+
+ /* grab credential scope */
+ const boost::string_view credential_scope = credential.substr(pos + 1);
+ dout(10) << "credential scope = " << credential_scope << dendl;
+
+ const auto sig_factory = std::bind(rgw::auth::s3::get_v4_signature,
+ credential_scope,
+ std::placeholders::_1,
+ std::placeholders::_2,
+ std::placeholders::_3);
+
+ return {
+ access_key_id,
+ s->auth.s3_postobj_creds.signature,
+ s->auth.s3_postobj_creds.x_amz_security_token,
+ s->auth.s3_postobj_creds.encoded_policy.to_str(),
+ sig_factory,
+ null_completer_factory
+ };
+}
+
+AWSEngine::VersionAbstractor::auth_data_t
+AWSBrowserUploadAbstractor::get_auth_data(const req_state* const s) const
+{
+ if (s->auth.s3_postobj_creds.x_amz_algorithm == AWS4_HMAC_SHA256_STR) {
+ ldout(s->cct, 0) << "Signature verification algorithm AWS v4"
+ << " (AWS4-HMAC-SHA256)" << dendl;
+ return get_auth_data_v4(s);
+ } else {
+ ldout(s->cct, 0) << "Signature verification algorithm AWS v2" << dendl;
+ return get_auth_data_v2(s);
+ }
+}
+
+AWSEngine::result_t
+AWSEngine::authenticate(const DoutPrefixProvider* dpp, const req_state* const s) const
+{
+ /* Small reminder: an ver_abstractor is allowed to throw! */
+ const auto auth_data = ver_abstractor.get_auth_data(s);
+
+ if (auth_data.access_key_id.empty() || auth_data.client_signature.empty()) {
+ return result_t::deny(-EINVAL);
+ } else {
+ return authenticate(dpp,
+ auth_data.access_key_id,
+ auth_data.client_signature,
+ auth_data.session_token,
+ auth_data.string_to_sign,
+ auth_data.signature_factory,
+ auth_data.completer_factory,
+ s);
+ }
+}
+
+} /* namespace s3 */
+} /* namespace auth */
+} /* namespace rgw */
+
+rgw::LDAPHelper* rgw::auth::s3::LDAPEngine::ldh = nullptr;
+std::mutex rgw::auth::s3::LDAPEngine::mtx;
+
+void rgw::auth::s3::LDAPEngine::init(CephContext* const cct)
+{
+ if (! cct->_conf->rgw_s3_auth_use_ldap ||
+ cct->_conf->rgw_ldap_uri.empty()) {
+ return;
+ }
+
+ if (! ldh) {
+ std::lock_guard<std::mutex> lck(mtx);
+ if (! ldh) {
+ const string& ldap_uri = cct->_conf->rgw_ldap_uri;
+ const string& ldap_binddn = cct->_conf->rgw_ldap_binddn;
+ const string& ldap_searchdn = cct->_conf->rgw_ldap_searchdn;
+ const string& ldap_searchfilter = cct->_conf->rgw_ldap_searchfilter;
+ const string& ldap_dnattr = cct->_conf->rgw_ldap_dnattr;
+ std::string ldap_bindpw = parse_rgw_ldap_bindpw(cct);
+
+ ldh = new rgw::LDAPHelper(ldap_uri, ldap_binddn, ldap_bindpw,
+ ldap_searchdn, ldap_searchfilter, ldap_dnattr);
+
+ ldh->init();
+ ldh->bind();
+ }
+ }
+}
+
+bool rgw::auth::s3::LDAPEngine::valid() {
+ std::lock_guard<std::mutex> lck(mtx);
+ return (!!ldh);
+}
+
+rgw::auth::RemoteApplier::acl_strategy_t
+rgw::auth::s3::LDAPEngine::get_acl_strategy() const
+{
+ //This is based on the assumption that the default acl strategy in
+ // get_perms_from_aclspec, will take care. Extra acl spec is not required.
+ return nullptr;
+}
+
+rgw::auth::RemoteApplier::AuthInfo
+rgw::auth::s3::LDAPEngine::get_creds_info(const rgw::RGWToken& token) const noexcept
+{
+ /* The short form of "using" can't be used here -- we're aliasing a class'
+ * member. */
+ using acct_privilege_t = \
+ rgw::auth::RemoteApplier::AuthInfo::acct_privilege_t;
+
+ return rgw::auth::RemoteApplier::AuthInfo {
+ rgw_user(token.id),
+ token.id,
+ RGW_PERM_FULL_CONTROL,
+ acct_privilege_t::IS_PLAIN_ACCT,
+ TYPE_LDAP
+ };
+}
+
+rgw::auth::Engine::result_t
+rgw::auth::s3::LDAPEngine::authenticate(
+ const DoutPrefixProvider* dpp,
+ const boost::string_view& access_key_id,
+ const boost::string_view& signature,
+ const boost::string_view& session_token,
+ const string_to_sign_t& string_to_sign,
+ const signature_factory_t&,
+ const completer_factory_t& completer_factory,
+ const req_state* const s) const
+{
+ /* boost filters and/or string_ref may throw on invalid input */
+ rgw::RGWToken base64_token;
+ try {
+ base64_token = rgw::from_base64(access_key_id);
+ } catch (...) {
+ base64_token = std::string("");
+ }
+
+ if (! base64_token.valid()) {
+ return result_t::deny();
+ }
+
+ //TODO: Uncomment, when we have a migration plan in place.
+ //Check if a user of type other than 'ldap' is already present, if yes, then
+ //return error.
+ /*RGWUserInfo user_info;
+ user_info.user_id = base64_token.id;
+ if (rgw_get_user_info_by_uid(store, user_info.user_id, user_info) >= 0) {
+ if (user_info.type != TYPE_LDAP) {
+ ldpp_dout(dpp, 10) << "ERROR: User id of type: " << user_info.type << " is already present" << dendl;
+ return nullptr;
+ }
+ }*/
+
+ if (ldh->auth(base64_token.id, base64_token.key) != 0) {
+ return result_t::deny(-ERR_INVALID_ACCESS_KEY);
+ }
+
+ auto apl = apl_factory->create_apl_remote(cct, s, get_acl_strategy(),
+ get_creds_info(base64_token));
+ return result_t::grant(std::move(apl), completer_factory(boost::none));
+} /* rgw::auth::s3::LDAPEngine::authenticate */
+
+void rgw::auth::s3::LDAPEngine::shutdown() {
+ if (ldh) {
+ delete ldh;
+ ldh = nullptr;
+ }
+}
+
+/* LocalEngine */
+rgw::auth::Engine::result_t
+rgw::auth::s3::LocalEngine::authenticate(
+ const DoutPrefixProvider* dpp,
+ const boost::string_view& _access_key_id,
+ const boost::string_view& signature,
+ const boost::string_view& session_token,
+ const string_to_sign_t& string_to_sign,
+ const signature_factory_t& signature_factory,
+ const completer_factory_t& completer_factory,
+ const req_state* const s) const
+{
+ /* get the user info */
+ RGWUserInfo user_info;
+ /* TODO(rzarzynski): we need to have string-view taking variant. */
+ const std::string access_key_id = _access_key_id.to_string();
+ if (rgw_get_user_info_by_access_key(store, access_key_id, user_info) < 0) {
+ ldpp_dout(dpp, 5) << "error reading user info, uid=" << access_key_id
+ << " can't authenticate" << dendl;
+ return result_t::deny(-ERR_INVALID_ACCESS_KEY);
+ }
+ //TODO: Uncomment, when we have a migration plan in place.
+ /*else {
+ if (s->user->type != TYPE_RGW) {
+ ldpp_dout(dpp, 10) << "ERROR: User id of type: " << s->user->type
+ << " is present" << dendl;
+ throw -EPERM;
+ }
+ }*/
+
+ const auto iter = user_info.access_keys.find(access_key_id);
+ if (iter == std::end(user_info.access_keys)) {
+ ldpp_dout(dpp, 0) << "ERROR: access key not encoded in user info" << dendl;
+ return result_t::deny(-EPERM);
+ }
+ const RGWAccessKey& k = iter->second;
+
+ const VersionAbstractor::server_signature_t server_signature = \
+ signature_factory(cct, k.key, string_to_sign);
+ auto compare = signature.compare(server_signature);
+
+ ldpp_dout(dpp, 15) << "string_to_sign="
+ << rgw::crypt_sanitize::log_content{string_to_sign}
+ << dendl;
+ ldpp_dout(dpp, 15) << "server signature=" << server_signature << dendl;
+ ldpp_dout(dpp, 15) << "client signature=" << signature << dendl;
+ ldpp_dout(dpp, 15) << "compare=" << compare << dendl;
+
+ if (compare != 0) {
+ return result_t::deny(-ERR_SIGNATURE_NO_MATCH);
+ }
+
+ auto apl = apl_factory->create_apl_local(cct, s, user_info, k.subuser, boost::none);
+ return result_t::grant(std::move(apl), completer_factory(k.key));
+}
+
+rgw::auth::RemoteApplier::AuthInfo
+rgw::auth::s3::STSEngine::get_creds_info(const STS::SessionToken& token) const noexcept
+{
+ using acct_privilege_t = \
+ rgw::auth::RemoteApplier::AuthInfo::acct_privilege_t;
+
+ return rgw::auth::RemoteApplier::AuthInfo {
+ token.user,
+ token.acct_name,
+ token.perm_mask,
+ (token.is_admin) ? acct_privilege_t::IS_ADMIN_ACCT: acct_privilege_t::IS_PLAIN_ACCT,
+ token.acct_type
+ };
+}
+
+int
+rgw::auth::s3::STSEngine::get_session_token(const boost::string_view& session_token,
+ STS::SessionToken& token) const
+{
+ string decodedSessionToken;
+ try {
+ decodedSessionToken = rgw::from_base64(session_token);
+ } catch (...) {
+ ldout(cct, 0) << "ERROR: Invalid session token, not base64 encoded." << dendl;
+ return -EINVAL;
+ }
+
+ auto* cryptohandler = cct->get_crypto_handler(CEPH_CRYPTO_AES);
+ if (! cryptohandler) {
+ return -EINVAL;
+ }
+ string secret_s = cct->_conf->rgw_sts_key;
+ buffer::ptr secret(secret_s.c_str(), secret_s.length());
+ int ret = 0;
+ if (ret = cryptohandler->validate_secret(secret); ret < 0) {
+ ldout(cct, 0) << "ERROR: Invalid secret key" << dendl;
+ return -EINVAL;
+ }
+ string error;
+ auto* keyhandler = cryptohandler->get_key_handler(secret, error);
+ if (! keyhandler) {
+ return -EINVAL;
+ }
+ error.clear();
+
+ string decrypted_str;
+ buffer::list en_input, dec_output;
+ en_input = buffer::list::static_from_string(decodedSessionToken);
+
+ ret = keyhandler->decrypt(en_input, dec_output, &error);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: Decryption failed: " << error << dendl;
+ return -EPERM;
+ } else {
+ try {
+ dec_output.append('\0');
+ auto iter = dec_output.cbegin();
+ decode(token, iter);
+ } catch (const buffer::error& e) {
+ ldout(cct, 0) << "ERROR: decode SessionToken failed: " << error << dendl;
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+rgw::auth::Engine::result_t
+rgw::auth::s3::STSEngine::authenticate(
+ const DoutPrefixProvider* dpp,
+ const boost::string_view& _access_key_id,
+ const boost::string_view& signature,
+ const boost::string_view& session_token,
+ const string_to_sign_t& string_to_sign,
+ const signature_factory_t& signature_factory,
+ const completer_factory_t& completer_factory,
+ const req_state* const s) const
+{
+ if (! s->info.args.exists("X-Amz-Security-Token") &&
+ ! s->info.env->exists("HTTP_X_AMZ_SECURITY_TOKEN")) {
+ return result_t::deny();
+ }
+
+ STS::SessionToken token;
+ if (int ret = get_session_token(session_token, token); ret < 0) {
+ return result_t::reject(ret);
+ }
+ //Authentication
+ //Check if access key is not the same passed in by client
+ if (token.access_key_id != _access_key_id) {
+ ldpp_dout(dpp, 0) << "Invalid access key" << dendl;
+ return result_t::reject(-EPERM);
+ }
+ //Check if the token has expired
+ if (! token.expiration.empty()) {
+ std::string expiration = token.expiration;
+ if (! expiration.empty()) {
+ boost::optional<real_clock::time_point> exp = ceph::from_iso_8601(expiration, false);
+ if (exp) {
+ real_clock::time_point now = real_clock::now();
+ if (now >= *exp) {
+ ldpp_dout(dpp, 0) << "ERROR: Token expired" << dendl;
+ return result_t::reject(-EPERM);
+ }
+ } else {
+ ldpp_dout(dpp, 0) << "ERROR: Invalid expiration: " << expiration << dendl;
+ return result_t::reject(-EPERM);
+ }
+ }
+ }
+ //Check for signature mismatch
+ const VersionAbstractor::server_signature_t server_signature = \
+ signature_factory(cct, token.secret_access_key, string_to_sign);
+ auto compare = signature.compare(server_signature);
+
+ ldpp_dout(dpp, 15) << "string_to_sign="
+ << rgw::crypt_sanitize::log_content{string_to_sign}
+ << dendl;
+ ldpp_dout(dpp, 15) << "server signature=" << server_signature << dendl;
+ ldpp_dout(dpp, 15) << "client signature=" << signature << dendl;
+ ldpp_dout(dpp, 15) << "compare=" << compare << dendl;
+
+ if (compare != 0) {
+ return result_t::reject(-ERR_SIGNATURE_NO_MATCH);
+ }
+
+ // Get all the authorization info
+ RGWUserInfo user_info;
+ rgw_user user_id;
+ vector<string> role_policies;
+ string role_name;
+ if (! token.roleId.empty()) {
+ RGWRole role(s->cct, store, token.roleId);
+ if (role.get_by_id() < 0) {
+ return result_t::deny(-EPERM);
+ }
+ vector<string> role_policy_names = role.get_role_policy_names();
+ for (auto& policy_name : role_policy_names) {
+ string perm_policy;
+ if (int ret = role.get_role_policy(policy_name, perm_policy); ret == 0) {
+ role_policies.push_back(std::move(perm_policy));
+ }
+ }
+ if (! token.policy.empty()) {
+ role_policies.push_back(std::move(token.policy));
+ }
+ // This is mostly needed to assign the owner of a bucket during its creation
+ user_id = token.user;
+ role_name = role.get_name();
+ }
+
+ if (! token.user.empty() && token.acct_type != TYPE_ROLE) {
+ // get user info
+ int ret = rgw_get_user_info_by_uid(store, token.user, user_info, NULL);
+ if (ret < 0) {
+ ldpp_dout(dpp, 5) << "ERROR: failed reading user info: uid=" << token.user << dendl;
+ return result_t::reject(-EPERM);
+ }
+ }
+
+ if (token.acct_type == TYPE_KEYSTONE || token.acct_type == TYPE_LDAP) {
+ auto apl = remote_apl_factory->create_apl_remote(cct, s, get_acl_strategy(),
+ get_creds_info(token));
+ return result_t::grant(std::move(apl), completer_factory(boost::none));
+ } else if (token.acct_type == TYPE_ROLE) {
+ auto apl = role_apl_factory->create_apl_role(cct, s, role_name, user_id, role_policies);
+ return result_t::grant(std::move(apl), completer_factory(token.secret_access_key));
+ } else { // This is for all local users of type TYPE_RGW or TYPE_NONE
+ string subuser;
+ auto apl = local_apl_factory->create_apl_local(cct, s, user_info, subuser, token.perm_mask);
+ return result_t::grant(std::move(apl), completer_factory(token.secret_access_key));
+ }
+}
+
+bool rgw::auth::s3::S3AnonymousEngine::is_applicable(
+ const req_state* s
+) const noexcept {
+ if (s->op == OP_OPTIONS) {
+ return true;
+ }
+
+ AwsVersion version;
+ AwsRoute route;
+ std::tie(version, route) = discover_aws_flavour(s->info);
+
+ return route == AwsRoute::QUERY_STRING && version == AwsVersion::UNKNOWN;
+}
diff --git a/src/rgw/rgw_rest_s3.h b/src/rgw/rgw_rest_s3.h
new file mode 100644
index 00000000..5010c3be
--- /dev/null
+++ b/src/rgw/rgw_rest_s3.h
@@ -0,0 +1,1045 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_REST_S3_H
+
+#define CEPH_RGW_REST_S3_H
+#define TIME_BUF_SIZE 128
+
+#include <mutex>
+
+#include <boost/utility/string_view.hpp>
+#include <boost/container/static_vector.hpp>
+
+#include "common/sstring.hh"
+#include "rgw_op.h"
+#include "rgw_rest.h"
+#include "rgw_http_errors.h"
+#include "rgw_acl_s3.h"
+#include "rgw_policy_s3.h"
+#include "rgw_lc_s3.h"
+#include "rgw_keystone.h"
+#include "rgw_rest_conn.h"
+#include "rgw_ldap.h"
+
+#include "rgw_token.h"
+#include "include/ceph_assert.h"
+
+#include "rgw_auth.h"
+#include "rgw_auth_filters.h"
+#include "rgw_sts.h"
+
+struct rgw_http_error {
+ int http_ret;
+ const char *s3_code;
+};
+
+void rgw_get_errno_s3(struct rgw_http_error *e, int err_no);
+
+class RGWGetObj_ObjStore_S3 : public RGWGetObj_ObjStore
+{
+protected:
+ // Serving a custom error page from an object is really a 200 response with
+ // just the status line altered.
+ int custom_http_ret = 0;
+ std::map<std::string, std::string> crypt_http_responses;
+public:
+ RGWGetObj_ObjStore_S3() {}
+ ~RGWGetObj_ObjStore_S3() override {}
+
+ int get_params() override;
+ int send_response_data_error() override;
+ int send_response_data(bufferlist& bl, off_t ofs, off_t len) override;
+ void set_custom_http_response(int http_ret) { custom_http_ret = http_ret; }
+ int get_decrypt_filter(std::unique_ptr<RGWGetObj_Filter>* filter,
+ RGWGetObj_Filter* cb,
+ bufferlist* manifest_bl) override;
+};
+
+class RGWGetObjTags_ObjStore_S3 : public RGWGetObjTags_ObjStore
+{
+ bufferlist tags_bl;
+public:
+ RGWGetObjTags_ObjStore_S3() {}
+ ~RGWGetObjTags_ObjStore_S3() {}
+
+ void send_response_data(bufferlist &bl) override;
+};
+
+class RGWPutObjTags_ObjStore_S3 : public RGWPutObjTags_ObjStore
+{
+public:
+ RGWPutObjTags_ObjStore_S3() {}
+ ~RGWPutObjTags_ObjStore_S3() {}
+
+ int get_params() override;
+ void send_response() override;
+};
+
+class RGWDeleteObjTags_ObjStore_S3 : public RGWDeleteObjTags
+{
+public:
+ ~RGWDeleteObjTags_ObjStore_S3() override {}
+ void send_response() override;
+};
+
+class RGWListBuckets_ObjStore_S3 : public RGWListBuckets_ObjStore {
+public:
+ RGWListBuckets_ObjStore_S3() {}
+ ~RGWListBuckets_ObjStore_S3() override {}
+
+ int get_params() override {
+ limit = -1; /* no limit */
+ return 0;
+ }
+ void send_response_begin(bool has_buckets) override;
+ void send_response_data(RGWUserBuckets& buckets) override;
+ void send_response_end() override;
+};
+
+class RGWGetUsage_ObjStore_S3 : public RGWGetUsage_ObjStore {
+public:
+ RGWGetUsage_ObjStore_S3() {}
+ ~RGWGetUsage_ObjStore_S3() override {}
+
+ int get_params() override ;
+ void send_response() override;
+};
+
+class RGWListBucket_ObjStore_S3 : public RGWListBucket_ObjStore {
+protected:
+ bool objs_container;
+ bool encode_key {false};
+ int get_common_params();
+ void send_common_response();
+ void send_common_versioned_response();
+ public:
+ RGWListBucket_ObjStore_S3() : objs_container(false) {
+ default_max = 1000;
+ }
+ ~RGWListBucket_ObjStore_S3() override {}
+
+ int get_params() override;
+ void send_response() override;
+ void send_versioned_response();
+};
+
+class RGWListBucket_ObjStore_S3v2 : public RGWListBucket_ObjStore_S3 {
+ bool fetchOwner;
+ bool start_after_exist;
+ bool continuation_token_exist;
+ string startAfter;
+ string continuation_token;
+public:
+ RGWListBucket_ObjStore_S3v2() : fetchOwner(false) {
+ }
+ ~RGWListBucket_ObjStore_S3v2() override {}
+
+ int get_params() override;
+ void send_response() override;
+ void send_versioned_response();
+};
+
+class RGWGetBucketLogging_ObjStore_S3 : public RGWGetBucketLogging {
+public:
+ RGWGetBucketLogging_ObjStore_S3() {}
+ ~RGWGetBucketLogging_ObjStore_S3() override {}
+
+ void send_response() override;
+};
+
+class RGWGetBucketLocation_ObjStore_S3 : public RGWGetBucketLocation {
+public:
+ RGWGetBucketLocation_ObjStore_S3() {}
+ ~RGWGetBucketLocation_ObjStore_S3() override {}
+
+ void send_response() override;
+};
+
+class RGWGetBucketVersioning_ObjStore_S3 : public RGWGetBucketVersioning {
+public:
+ RGWGetBucketVersioning_ObjStore_S3() {}
+ ~RGWGetBucketVersioning_ObjStore_S3() override {}
+
+ void send_response() override;
+};
+
+class RGWSetBucketVersioning_ObjStore_S3 : public RGWSetBucketVersioning {
+public:
+ RGWSetBucketVersioning_ObjStore_S3() {}
+ ~RGWSetBucketVersioning_ObjStore_S3() override {}
+
+ int get_params() override;
+ void send_response() override;
+};
+
+class RGWGetBucketWebsite_ObjStore_S3 : public RGWGetBucketWebsite {
+public:
+ RGWGetBucketWebsite_ObjStore_S3() {}
+ ~RGWGetBucketWebsite_ObjStore_S3() override {}
+
+ void send_response() override;
+};
+
+class RGWSetBucketWebsite_ObjStore_S3 : public RGWSetBucketWebsite {
+public:
+ RGWSetBucketWebsite_ObjStore_S3() {}
+ ~RGWSetBucketWebsite_ObjStore_S3() override {}
+
+ int get_params() override;
+ void send_response() override;
+};
+
+class RGWDeleteBucketWebsite_ObjStore_S3 : public RGWDeleteBucketWebsite {
+public:
+ RGWDeleteBucketWebsite_ObjStore_S3() {}
+ ~RGWDeleteBucketWebsite_ObjStore_S3() override {}
+
+ void send_response() override;
+};
+
+class RGWStatBucket_ObjStore_S3 : public RGWStatBucket_ObjStore {
+public:
+ RGWStatBucket_ObjStore_S3() {}
+ ~RGWStatBucket_ObjStore_S3() override {}
+
+ void send_response() override;
+};
+
+class RGWCreateBucket_ObjStore_S3 : public RGWCreateBucket_ObjStore {
+public:
+ RGWCreateBucket_ObjStore_S3() {}
+ ~RGWCreateBucket_ObjStore_S3() override {}
+
+ int get_params() override;
+ void send_response() override;
+};
+
+class RGWDeleteBucket_ObjStore_S3 : public RGWDeleteBucket_ObjStore {
+public:
+ RGWDeleteBucket_ObjStore_S3() {}
+ ~RGWDeleteBucket_ObjStore_S3() override {}
+
+ void send_response() override;
+};
+
+class RGWPutObj_ObjStore_S3 : public RGWPutObj_ObjStore {
+private:
+ std::map<std::string, std::string> crypt_http_responses;
+
+public:
+ RGWPutObj_ObjStore_S3() {}
+ ~RGWPutObj_ObjStore_S3() override {}
+
+ int get_params() override;
+ int get_data(bufferlist& bl) override;
+ void send_response() override;
+
+ int get_encrypt_filter(std::unique_ptr<rgw::putobj::DataProcessor> *filter,
+ rgw::putobj::DataProcessor *cb) override;
+ int get_decrypt_filter(std::unique_ptr<RGWGetObj_Filter>* filter,
+ RGWGetObj_Filter* cb,
+ map<string, bufferlist>& attrs,
+ bufferlist* manifest_bl) override;
+};
+
+class RGWPostObj_ObjStore_S3 : public RGWPostObj_ObjStore {
+ parts_collection_t parts;
+ std::string filename;
+ std::string content_type;
+ RGWPolicyEnv env;
+ RGWPolicy post_policy;
+ map<string, string> crypt_http_responses;
+
+ const rgw::auth::StrategyRegistry* auth_registry_ptr = nullptr;
+
+ int get_policy();
+ int get_tags();
+ void rebuild_key(string& key);
+
+ std::string get_current_filename() const override;
+ std::string get_current_content_type() const override;
+
+public:
+ RGWPostObj_ObjStore_S3() {}
+ ~RGWPostObj_ObjStore_S3() override {}
+
+ int verify_requester(const rgw::auth::StrategyRegistry& auth_registry) override {
+ auth_registry_ptr = &auth_registry;
+ return RGWPostObj_ObjStore::verify_requester(auth_registry);
+ }
+
+ int get_params() override;
+ int complete_get_params();
+
+ void send_response() override;
+ int get_data(ceph::bufferlist& bl, bool& again) override;
+ int get_encrypt_filter(std::unique_ptr<rgw::putobj::DataProcessor> *filter,
+ rgw::putobj::DataProcessor *cb) override;
+};
+
+class RGWDeleteObj_ObjStore_S3 : public RGWDeleteObj_ObjStore {
+public:
+ RGWDeleteObj_ObjStore_S3() {}
+ ~RGWDeleteObj_ObjStore_S3() override {}
+
+ int get_params() override;
+ void send_response() override;
+};
+
+class RGWCopyObj_ObjStore_S3 : public RGWCopyObj_ObjStore {
+ bool sent_header;
+public:
+ RGWCopyObj_ObjStore_S3() : sent_header(false) {}
+ ~RGWCopyObj_ObjStore_S3() override {}
+
+ int init_dest_policy() override;
+ int get_params() override;
+ int check_storage_class(const rgw_placement_rule& src_placement);
+ void send_partial_response(off_t ofs) override;
+ void send_response() override;
+};
+
+class RGWGetACLs_ObjStore_S3 : public RGWGetACLs_ObjStore {
+public:
+ RGWGetACLs_ObjStore_S3() {}
+ ~RGWGetACLs_ObjStore_S3() override {}
+
+ void send_response() override;
+};
+
+class RGWPutACLs_ObjStore_S3 : public RGWPutACLs_ObjStore {
+public:
+ RGWPutACLs_ObjStore_S3() {}
+ ~RGWPutACLs_ObjStore_S3() override {}
+
+ int get_policy_from_state(RGWRados *store, struct req_state *s, stringstream& ss) override;
+ void send_response() override;
+ int get_params() override;
+};
+
+class RGWGetLC_ObjStore_S3 : public RGWGetLC_ObjStore {
+protected:
+ RGWLifecycleConfiguration_S3 config;
+public:
+ RGWGetLC_ObjStore_S3() {}
+ ~RGWGetLC_ObjStore_S3() override {}
+ void execute() override;
+
+ void send_response() override;
+};
+
+class RGWPutLC_ObjStore_S3 : public RGWPutLC_ObjStore {
+public:
+ RGWPutLC_ObjStore_S3() {}
+ ~RGWPutLC_ObjStore_S3() override {}
+
+ void send_response() override;
+};
+
+class RGWDeleteLC_ObjStore_S3 : public RGWDeleteLC_ObjStore {
+public:
+ RGWDeleteLC_ObjStore_S3() {}
+ ~RGWDeleteLC_ObjStore_S3() override {}
+
+ void send_response() override;
+};
+
+class RGWGetCORS_ObjStore_S3 : public RGWGetCORS_ObjStore {
+public:
+ RGWGetCORS_ObjStore_S3() {}
+ ~RGWGetCORS_ObjStore_S3() override {}
+
+ void send_response() override;
+};
+
+class RGWPutCORS_ObjStore_S3 : public RGWPutCORS_ObjStore {
+public:
+ RGWPutCORS_ObjStore_S3() {}
+ ~RGWPutCORS_ObjStore_S3() override {}
+
+ int get_params() override;
+ void send_response() override;
+};
+
+class RGWDeleteCORS_ObjStore_S3 : public RGWDeleteCORS_ObjStore {
+public:
+ RGWDeleteCORS_ObjStore_S3() {}
+ ~RGWDeleteCORS_ObjStore_S3() override {}
+
+ void send_response() override;
+};
+
+class RGWOptionsCORS_ObjStore_S3 : public RGWOptionsCORS_ObjStore {
+public:
+ RGWOptionsCORS_ObjStore_S3() {}
+ ~RGWOptionsCORS_ObjStore_S3() override {}
+
+ void send_response() override;
+};
+
+class RGWGetRequestPayment_ObjStore_S3 : public RGWGetRequestPayment {
+public:
+ RGWGetRequestPayment_ObjStore_S3() {}
+ ~RGWGetRequestPayment_ObjStore_S3() override {}
+
+ void send_response() override;
+};
+
+class RGWSetRequestPayment_ObjStore_S3 : public RGWSetRequestPayment {
+public:
+ RGWSetRequestPayment_ObjStore_S3() {}
+ ~RGWSetRequestPayment_ObjStore_S3() override {}
+
+ int get_params() override;
+ void send_response() override;
+};
+
+class RGWInitMultipart_ObjStore_S3 : public RGWInitMultipart_ObjStore {
+private:
+ std::map<std::string, std::string> crypt_http_responses;
+public:
+ RGWInitMultipart_ObjStore_S3() {}
+ ~RGWInitMultipart_ObjStore_S3() override {}
+
+ int get_params() override;
+ void send_response() override;
+ int prepare_encryption(map<string, bufferlist>& attrs) override;
+};
+
+class RGWCompleteMultipart_ObjStore_S3 : public RGWCompleteMultipart_ObjStore {
+public:
+ RGWCompleteMultipart_ObjStore_S3() {}
+ ~RGWCompleteMultipart_ObjStore_S3() override {}
+
+ int get_params() override;
+ void send_response() override;
+};
+
+class RGWAbortMultipart_ObjStore_S3 : public RGWAbortMultipart_ObjStore {
+public:
+ RGWAbortMultipart_ObjStore_S3() {}
+ ~RGWAbortMultipart_ObjStore_S3() override {}
+
+ void send_response() override;
+};
+
+class RGWListMultipart_ObjStore_S3 : public RGWListMultipart_ObjStore {
+public:
+ RGWListMultipart_ObjStore_S3() {}
+ ~RGWListMultipart_ObjStore_S3() override {}
+
+ void send_response() override;
+};
+
+class RGWListBucketMultiparts_ObjStore_S3 : public RGWListBucketMultiparts_ObjStore {
+public:
+ RGWListBucketMultiparts_ObjStore_S3() {
+ default_max = 1000;
+ }
+ ~RGWListBucketMultiparts_ObjStore_S3() override {}
+
+ void send_response() override;
+};
+
+class RGWDeleteMultiObj_ObjStore_S3 : public RGWDeleteMultiObj_ObjStore {
+public:
+ RGWDeleteMultiObj_ObjStore_S3() {}
+ ~RGWDeleteMultiObj_ObjStore_S3() override {}
+
+ int get_params() override;
+ void send_status() override;
+ void begin_response() override;
+ void send_partial_response(rgw_obj_key& key, bool delete_marker,
+ const string& marker_version_id, int ret) override;
+ void end_response() override;
+};
+
+class RGWPutBucketObjectLock_ObjStore_S3 : public RGWPutBucketObjectLock_ObjStore {
+public:
+ RGWPutBucketObjectLock_ObjStore_S3() {}
+ ~RGWPutBucketObjectLock_ObjStore_S3() override {}
+ void send_response() override;
+};
+
+class RGWGetBucketObjectLock_ObjStore_S3 : public RGWGetBucketObjectLock_ObjStore {
+public:
+ RGWGetBucketObjectLock_ObjStore_S3() {}
+ ~RGWGetBucketObjectLock_ObjStore_S3() {}
+ void send_response() override;
+};
+
+class RGWPutObjRetention_ObjStore_S3 : public RGWPutObjRetention_ObjStore {
+public:
+ RGWPutObjRetention_ObjStore_S3() {}
+ ~RGWPutObjRetention_ObjStore_S3() {}
+ int get_params() override;
+ void send_response() override;
+};
+
+class RGWGetObjRetention_ObjStore_S3 : public RGWGetObjRetention_ObjStore {
+public:
+ RGWGetObjRetention_ObjStore_S3() {}
+ ~RGWGetObjRetention_ObjStore_S3() {}
+ void send_response() override;
+};
+
+class RGWPutObjLegalHold_ObjStore_S3 : public RGWPutObjLegalHold_ObjStore {
+public:
+ RGWPutObjLegalHold_ObjStore_S3() {}
+ ~RGWPutObjLegalHold_ObjStore_S3() {}
+ void send_response() override;
+};
+
+class RGWGetObjLegalHold_ObjStore_S3 : public RGWGetObjLegalHold_ObjStore {
+public:
+ RGWGetObjLegalHold_ObjStore_S3() {}
+ ~RGWGetObjLegalHold_ObjStore_S3() {}
+ void send_response() override;
+};
+
+class RGWGetObjLayout_ObjStore_S3 : public RGWGetObjLayout {
+public:
+ RGWGetObjLayout_ObjStore_S3() {}
+ ~RGWGetObjLayout_ObjStore_S3() {}
+
+ void send_response() override;
+};
+
+class RGWConfigBucketMetaSearch_ObjStore_S3 : public RGWConfigBucketMetaSearch {
+public:
+ RGWConfigBucketMetaSearch_ObjStore_S3() {}
+ ~RGWConfigBucketMetaSearch_ObjStore_S3() {}
+
+ int get_params() override;
+ void send_response() override;
+};
+
+class RGWGetBucketMetaSearch_ObjStore_S3 : public RGWGetBucketMetaSearch {
+public:
+ RGWGetBucketMetaSearch_ObjStore_S3() {}
+ ~RGWGetBucketMetaSearch_ObjStore_S3() {}
+
+ void send_response() override;
+};
+
+class RGWDelBucketMetaSearch_ObjStore_S3 : public RGWDelBucketMetaSearch {
+public:
+ RGWDelBucketMetaSearch_ObjStore_S3() {}
+ ~RGWDelBucketMetaSearch_ObjStore_S3() {}
+
+ void send_response() override;
+};
+
+class RGW_Auth_S3 {
+public:
+ static int authorize(const DoutPrefixProvider *dpp,
+ RGWRados *store,
+ const rgw::auth::StrategyRegistry& auth_registry,
+ struct req_state *s);
+};
+
+class RGWHandler_Auth_S3 : public RGWHandler_REST {
+ friend class RGWRESTMgr_S3;
+
+ const rgw::auth::StrategyRegistry& auth_registry;
+
+public:
+ explicit RGWHandler_Auth_S3(const rgw::auth::StrategyRegistry& auth_registry)
+ : RGWHandler_REST(),
+ auth_registry(auth_registry) {
+ }
+ ~RGWHandler_Auth_S3() override = default;
+
+ static int validate_bucket_name(const string& bucket);
+ static int validate_object_name(const string& bucket);
+
+ int init(RGWRados *store,
+ struct req_state *s,
+ rgw::io::BasicClient *cio) override;
+ int authorize(const DoutPrefixProvider *dpp) override {
+ return RGW_Auth_S3::authorize(dpp, store, auth_registry, s);
+ }
+ int postauth_init() override { return 0; }
+};
+
+class RGWHandler_REST_S3 : public RGWHandler_REST {
+ friend class RGWRESTMgr_S3;
+protected:
+ const rgw::auth::StrategyRegistry& auth_registry;
+public:
+ static int init_from_header(struct req_state *s, int default_formatter, bool configurable_format);
+
+ explicit RGWHandler_REST_S3(const rgw::auth::StrategyRegistry& auth_registry)
+ : RGWHandler_REST(),
+ auth_registry(auth_registry) {
+ }
+ ~RGWHandler_REST_S3() override = default;
+
+ int init(RGWRados *store,
+ struct req_state *s,
+ rgw::io::BasicClient *cio) override;
+ int authorize(const DoutPrefixProvider *dpp) override;
+ int postauth_init() override;
+};
+
+class RGWHandler_REST_Service_S3 : public RGWHandler_REST_S3 {
+protected:
+ const bool isSTSenabled;
+ bool isIAMenabled;
+ const bool isPSenabled;
+ bool is_usage_op() {
+ return s->info.args.exists("usage");
+ }
+ RGWOp *op_get() override;
+ RGWOp *op_head() override;
+ RGWOp *op_post() override;
+public:
+ RGWHandler_REST_Service_S3(const rgw::auth::StrategyRegistry& auth_registry,
+ bool _isSTSenabled, bool _isIAMenabled, bool _isPSenabled) :
+ RGWHandler_REST_S3(auth_registry), isSTSenabled(_isSTSenabled), isIAMenabled(_isIAMenabled), isPSenabled(_isPSenabled) {}
+ ~RGWHandler_REST_Service_S3() override = default;
+};
+
+class RGWHandler_REST_Bucket_S3 : public RGWHandler_REST_S3 {
+ const bool enable_pubsub;
+protected:
+ bool is_acl_op() {
+ return s->info.args.exists("acl");
+ }
+ bool is_cors_op() {
+ return s->info.args.exists("cors");
+ }
+ bool is_lc_op() {
+ return s->info.args.exists("lifecycle");
+ }
+ bool is_obj_update_op() override {
+ return is_acl_op() || is_cors_op();
+ }
+ bool is_request_payment_op() {
+ return s->info.args.exists("requestPayment");
+ }
+ bool is_policy_op() {
+ return s->info.args.exists("policy");
+ }
+ bool is_object_lock_op() {
+ return s->info.args.exists("object-lock");
+ }
+ bool is_notification_op() const {
+ if (enable_pubsub) {
+ return s->info.args.exists("notification");
+ }
+ return false;
+ }
+ RGWOp *get_obj_op(bool get_data);
+
+ RGWOp *op_get() override;
+ RGWOp *op_head() override;
+ RGWOp *op_put() override;
+ RGWOp *op_delete() override;
+ RGWOp *op_post() override;
+ RGWOp *op_options() override;
+public:
+ RGWHandler_REST_Bucket_S3(const rgw::auth::StrategyRegistry& auth_registry, bool _enable_pubsub) :
+ RGWHandler_REST_S3(auth_registry), enable_pubsub(_enable_pubsub) {}
+ ~RGWHandler_REST_Bucket_S3() override = default;
+};
+
+class RGWHandler_REST_Obj_S3 : public RGWHandler_REST_S3 {
+protected:
+ bool is_acl_op() {
+ return s->info.args.exists("acl");
+ }
+ bool is_tagging_op() {
+ return s->info.args.exists("tagging");
+ }
+ bool is_obj_retention_op() {
+ return s->info.args.exists("retention");
+ }
+ bool is_obj_legal_hold_op() {
+ return s->info.args.exists("legal-hold");
+ }
+
+ bool is_obj_update_op() override {
+ return is_acl_op() || is_tagging_op() || is_obj_retention_op() || is_obj_legal_hold_op();
+ }
+ RGWOp *get_obj_op(bool get_data);
+
+ RGWOp *op_get() override;
+ RGWOp *op_head() override;
+ RGWOp *op_put() override;
+ RGWOp *op_delete() override;
+ RGWOp *op_post() override;
+ RGWOp *op_options() override;
+public:
+ using RGWHandler_REST_S3::RGWHandler_REST_S3;
+ ~RGWHandler_REST_Obj_S3() override = default;
+};
+
+class RGWRESTMgr_S3 : public RGWRESTMgr {
+private:
+ bool enable_s3website;
+ bool enable_sts;
+ bool enable_iam;
+ const bool enable_pubsub;
+public:
+ explicit RGWRESTMgr_S3(bool enable_s3website = false, bool enable_sts = false, bool enable_iam = false, bool _enable_pubsub = false)
+ : enable_s3website(enable_s3website),
+ enable_sts(enable_sts),
+ enable_iam(enable_iam),
+ enable_pubsub(_enable_pubsub) {
+ }
+
+ ~RGWRESTMgr_S3() override = default;
+
+ RGWHandler_REST *get_handler(struct req_state* s,
+ const rgw::auth::StrategyRegistry& auth_registry,
+ const std::string& frontend_prefix) override;
+};
+
+class RGWHandler_REST_Obj_S3Website;
+
+static inline bool looks_like_ip_address(const char *bucket)
+{
+ struct in6_addr a;
+ if (inet_pton(AF_INET6, bucket, static_cast<void*>(&a)) == 1) {
+ return true;
+ }
+ int num_periods = 0;
+ bool expect_period = false;
+ for (const char *b = bucket; *b; ++b) {
+ if (*b == '.') {
+ if (!expect_period)
+ return false;
+ ++num_periods;
+ if (num_periods > 3)
+ return false;
+ expect_period = false;
+ }
+ else if (isdigit(*b)) {
+ expect_period = true;
+ }
+ else {
+ return false;
+ }
+ }
+ return (num_periods == 3);
+}
+
+static inline int valid_s3_object_name(const string& name) {
+ if (name.size() > 1024) {
+ return -ERR_INVALID_OBJECT_NAME;
+ }
+ if (check_utf8(name.c_str(), name.size())) {
+ return -ERR_INVALID_OBJECT_NAME;
+ }
+ return 0;
+}
+
+static inline int valid_s3_bucket_name(const string& name, bool relaxed=false)
+{
+ // This function enforces Amazon's spec for bucket names.
+ // (The requirements, not the recommendations.)
+ int len = name.size();
+ if (len < 3) {
+ // Name too short
+ return -ERR_INVALID_BUCKET_NAME;
+ } else if (len > 255) {
+ // Name too long
+ return -ERR_INVALID_BUCKET_NAME;
+ }
+
+ // bucket names must start with a number, letter, or underscore
+ if (!(isalpha(name[0]) || isdigit(name[0]))) {
+ if (!relaxed)
+ return -ERR_INVALID_BUCKET_NAME;
+ else if (!(name[0] == '_' || name[0] == '.' || name[0] == '-'))
+ return -ERR_INVALID_BUCKET_NAME;
+ }
+
+ for (const char *s = name.c_str(); *s; ++s) {
+ char c = *s;
+ if (isdigit(c) || (c == '.'))
+ continue;
+ if (isalpha(c))
+ continue;
+ if ((c == '-') || (c == '_'))
+ continue;
+ // Invalid character
+ return -ERR_INVALID_BUCKET_NAME;
+ }
+
+ if (looks_like_ip_address(name.c_str()))
+ return -ERR_INVALID_BUCKET_NAME;
+
+ return 0;
+}
+
+
+namespace rgw {
+namespace auth {
+namespace s3 {
+
+class AWSEngine : public rgw::auth::Engine {
+public:
+ class VersionAbstractor {
+ static constexpr size_t DIGEST_SIZE_V2 = CEPH_CRYPTO_HMACSHA1_DIGESTSIZE;
+ static constexpr size_t DIGEST_SIZE_V4 = CEPH_CRYPTO_HMACSHA256_DIGESTSIZE;
+
+ /* Knowing the signature max size allows us to employ the sstring, and thus
+ * avoid dynamic allocations. The multiplier comes from representing digest
+ * in the base64-encoded form. */
+ static constexpr size_t SIGNATURE_MAX_SIZE = \
+ std::max(DIGEST_SIZE_V2, DIGEST_SIZE_V4) * 2 + sizeof('\0');
+
+ public:
+ virtual ~VersionAbstractor() {};
+
+ using access_key_id_t = boost::string_view;
+ using client_signature_t = boost::string_view;
+ using session_token_t = boost::string_view;
+ using server_signature_t = basic_sstring<char, uint16_t, SIGNATURE_MAX_SIZE>;
+ using string_to_sign_t = std::string;
+
+ /* Transformation for crafting the AWS signature at server side which is
+ * used later to compare with the user-provided one. The methodology for
+ * doing that depends on AWS auth version. */
+ using signature_factory_t = \
+ std::function<server_signature_t(CephContext* cct,
+ const std::string& secret_key,
+ const string_to_sign_t& string_to_sign)>;
+
+ /* Return an instance of Completer for verifying the payload's fingerprint
+ * if necessary. Otherwise caller gets nullptr. Caller may provide secret
+ * key */
+ using completer_factory_t = \
+ std::function<rgw::auth::Completer::cmplptr_t(
+ const boost::optional<std::string>& secret_key)>;
+
+ struct auth_data_t {
+ access_key_id_t access_key_id;
+ client_signature_t client_signature;
+ session_token_t session_token;
+ string_to_sign_t string_to_sign;
+ signature_factory_t signature_factory;
+ completer_factory_t completer_factory;
+ };
+
+ virtual auth_data_t get_auth_data(const req_state* s) const = 0;
+ };
+
+protected:
+ CephContext* cct;
+ const VersionAbstractor& ver_abstractor;
+
+ AWSEngine(CephContext* const cct, const VersionAbstractor& ver_abstractor)
+ : cct(cct),
+ ver_abstractor(ver_abstractor) {
+ }
+
+ using result_t = rgw::auth::Engine::result_t;
+ using string_to_sign_t = VersionAbstractor::string_to_sign_t;
+ using signature_factory_t = VersionAbstractor::signature_factory_t;
+ using completer_factory_t = VersionAbstractor::completer_factory_t;
+
+ /* TODO(rzarzynski): clean up. We've too many input parameter hee. Also
+ * the signature get_auth_data() of VersionAbstractor is too complicated.
+ * Replace these thing with a simple, dedicated structure. */
+ virtual result_t authenticate(const DoutPrefixProvider* dpp,
+ const boost::string_view& access_key_id,
+ const boost::string_view& signature,
+ const boost::string_view& session_token,
+ const string_to_sign_t& string_to_sign,
+ const signature_factory_t& signature_factory,
+ const completer_factory_t& completer_factory,
+ const req_state* s) const = 0;
+
+public:
+ result_t authenticate(const DoutPrefixProvider* dpp, const req_state* const s) const final;
+};
+
+
+class AWSGeneralAbstractor : public AWSEngine::VersionAbstractor {
+ CephContext* const cct;
+
+ virtual boost::optional<std::string>
+ get_v4_canonical_headers(const req_info& info,
+ const boost::string_view& signedheaders,
+ const bool using_qs) const;
+
+ auth_data_t get_auth_data_v2(const req_state* s) const;
+ auth_data_t get_auth_data_v4(const req_state* s, const bool using_qs) const;
+
+public:
+ explicit AWSGeneralAbstractor(CephContext* const cct)
+ : cct(cct) {
+ }
+
+ auth_data_t get_auth_data(const req_state* s) const override;
+};
+
+class AWSGeneralBoto2Abstractor : public AWSGeneralAbstractor {
+ boost::optional<std::string>
+ get_v4_canonical_headers(const req_info& info,
+ const boost::string_view& signedheaders,
+ const bool using_qs) const override;
+
+public:
+ using AWSGeneralAbstractor::AWSGeneralAbstractor;
+};
+
+class AWSBrowserUploadAbstractor : public AWSEngine::VersionAbstractor {
+ static std::string to_string(ceph::bufferlist bl) {
+ return std::string(bl.c_str(),
+ static_cast<std::string::size_type>(bl.length()));
+ }
+
+ auth_data_t get_auth_data_v2(const req_state* s) const;
+ auth_data_t get_auth_data_v4(const req_state* s) const;
+
+public:
+ explicit AWSBrowserUploadAbstractor(CephContext*) {
+ }
+
+ auth_data_t get_auth_data(const req_state* s) const override;
+};
+
+
+class LDAPEngine : public AWSEngine {
+ static rgw::LDAPHelper* ldh;
+ static std::mutex mtx;
+
+ static void init(CephContext* const cct);
+
+ using acl_strategy_t = rgw::auth::RemoteApplier::acl_strategy_t;
+ using auth_info_t = rgw::auth::RemoteApplier::AuthInfo;
+ using result_t = rgw::auth::Engine::result_t;
+
+protected:
+ RGWRados* const store;
+ const rgw::auth::RemoteApplier::Factory* const apl_factory;
+
+ acl_strategy_t get_acl_strategy() const;
+ auth_info_t get_creds_info(const rgw::RGWToken& token) const noexcept;
+
+ result_t authenticate(const DoutPrefixProvider* dpp,
+ const boost::string_view& access_key_id,
+ const boost::string_view& signature,
+ const boost::string_view& session_token,
+ const string_to_sign_t& string_to_sign,
+ const signature_factory_t&,
+ const completer_factory_t& completer_factory,
+ const req_state* s) const override;
+public:
+ LDAPEngine(CephContext* const cct,
+ RGWRados* const store,
+ const VersionAbstractor& ver_abstractor,
+ const rgw::auth::RemoteApplier::Factory* const apl_factory)
+ : AWSEngine(cct, ver_abstractor),
+ store(store),
+ apl_factory(apl_factory) {
+ init(cct);
+ }
+
+ using AWSEngine::authenticate;
+
+ const char* get_name() const noexcept override {
+ return "rgw::auth::s3::LDAPEngine";
+ }
+
+ static bool valid();
+ static void shutdown();
+};
+
+class LocalEngine : public AWSEngine {
+ RGWRados* const store;
+ const rgw::auth::LocalApplier::Factory* const apl_factory;
+
+ result_t authenticate(const DoutPrefixProvider* dpp,
+ const boost::string_view& access_key_id,
+ const boost::string_view& signature,
+ const boost::string_view& session_token,
+ const string_to_sign_t& string_to_sign,
+ const signature_factory_t& signature_factory,
+ const completer_factory_t& completer_factory,
+ const req_state* s) const override;
+public:
+ LocalEngine(CephContext* const cct,
+ RGWRados* const store,
+ const VersionAbstractor& ver_abstractor,
+ const rgw::auth::LocalApplier::Factory* const apl_factory)
+ : AWSEngine(cct, ver_abstractor),
+ store(store),
+ apl_factory(apl_factory) {
+ }
+
+ using AWSEngine::authenticate;
+
+ const char* get_name() const noexcept override {
+ return "rgw::auth::s3::LocalEngine";
+ }
+};
+
+class STSEngine : public AWSEngine {
+ RGWRados* const store;
+ const rgw::auth::LocalApplier::Factory* const local_apl_factory;
+ const rgw::auth::RemoteApplier::Factory* const remote_apl_factory;
+ const rgw::auth::RoleApplier::Factory* const role_apl_factory;
+
+ using acl_strategy_t = rgw::auth::RemoteApplier::acl_strategy_t;
+ using auth_info_t = rgw::auth::RemoteApplier::AuthInfo;
+
+ acl_strategy_t get_acl_strategy() const { return nullptr; };
+ auth_info_t get_creds_info(const STS::SessionToken& token) const noexcept;
+
+ int get_session_token(const boost::string_view& session_token,
+ STS::SessionToken& token) const;
+
+ result_t authenticate(const DoutPrefixProvider* dpp,
+ const boost::string_view& access_key_id,
+ const boost::string_view& signature,
+ const boost::string_view& session_token,
+ const string_to_sign_t& string_to_sign,
+ const signature_factory_t& signature_factory,
+ const completer_factory_t& completer_factory,
+ const req_state* s) const override;
+public:
+ STSEngine(CephContext* const cct,
+ RGWRados* const store,
+ const VersionAbstractor& ver_abstractor,
+ const rgw::auth::LocalApplier::Factory* const local_apl_factory,
+ const rgw::auth::RemoteApplier::Factory* const remote_apl_factory,
+ const rgw::auth::RoleApplier::Factory* const role_apl_factory)
+ : AWSEngine(cct, ver_abstractor),
+ store(store),
+ local_apl_factory(local_apl_factory),
+ remote_apl_factory(remote_apl_factory),
+ role_apl_factory(role_apl_factory) {
+ }
+
+ using AWSEngine::authenticate;
+
+ const char* get_name() const noexcept override {
+ return "rgw::auth::s3::STSEngine";
+ }
+};
+
+class S3AnonymousEngine : public rgw::auth::AnonymousEngine {
+ bool is_applicable(const req_state* s) const noexcept override;
+
+public:
+ /* Let's reuse the parent class' constructor. */
+ using rgw::auth::AnonymousEngine::AnonymousEngine;
+
+ const char* get_name() const noexcept override {
+ return "rgw::auth::s3::S3AnonymousEngine";
+ }
+};
+
+
+} /* namespace s3 */
+} /* namespace auth */
+} /* namespace rgw */
+
+
+#endif /* CEPH_RGW_REST_S3_H */
diff --git a/src/rgw/rgw_rest_s3website.h b/src/rgw/rgw_rest_s3website.h
new file mode 100644
index 00000000..209ef964
--- /dev/null
+++ b/src/rgw/rgw_rest_s3website.h
@@ -0,0 +1,103 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2015 Robin H. Johnson <robin.johnson@dreamhost.com>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_RGW_REST_S3WEBSITE_H
+#define CEPH_RGW_REST_S3WEBSITE_H
+
+#include "rgw_rest_s3.h"
+
+class RGWHandler_REST_S3Website : public RGWHandler_REST_S3 {
+ std::string original_object_name; // object name before retarget()
+ bool web_dir() const;
+protected:
+ int retarget(RGWOp *op, RGWOp **new_op) override;
+ // TODO: this should be virtual I think, and ensure that it's always
+ // overridden, but that conflates that op_get/op_head are defined in this
+ // class and call this; and don't need to be overridden later.
+ virtual RGWOp *get_obj_op(bool get_data) { return NULL; }
+ RGWOp *op_get() override;
+ RGWOp *op_head() override;
+ // Only allowed to use GET+HEAD
+ RGWOp *op_put() override { return NULL; }
+ RGWOp *op_delete() override { return NULL; }
+ RGWOp *op_post() override { return NULL; }
+ RGWOp *op_copy() override { return NULL; }
+ RGWOp *op_options() override { return NULL; }
+
+ int serve_errordoc(int http_ret, const string &errordoc_key);
+public:
+ using RGWHandler_REST_S3::RGWHandler_REST_S3;
+ ~RGWHandler_REST_S3Website() override = default;
+
+ int init(RGWRados *store, req_state *s, rgw::io::BasicClient* cio) override;
+ int error_handler(int err_no, string *error_content) override;
+};
+
+class RGWHandler_REST_Service_S3Website : public RGWHandler_REST_S3Website {
+protected:
+ RGWOp *get_obj_op(bool get_data) override;
+public:
+ using RGWHandler_REST_S3Website::RGWHandler_REST_S3Website;
+ ~RGWHandler_REST_Service_S3Website() override = default;
+};
+
+class RGWHandler_REST_Obj_S3Website : public RGWHandler_REST_S3Website {
+protected:
+ RGWOp *get_obj_op(bool get_data) override;
+public:
+ using RGWHandler_REST_S3Website::RGWHandler_REST_S3Website;
+ ~RGWHandler_REST_Obj_S3Website() override = default;
+};
+
+/* The cross-inheritance from Obj to Bucket is deliberate!
+ * S3Websites do NOT support any bucket operations
+ */
+class RGWHandler_REST_Bucket_S3Website : public RGWHandler_REST_S3Website {
+protected:
+ RGWOp *get_obj_op(bool get_data) override;
+public:
+ using RGWHandler_REST_S3Website::RGWHandler_REST_S3Website;
+ ~RGWHandler_REST_Bucket_S3Website() override = default;
+};
+
+// TODO: do we actually need this?
+class RGWGetObj_ObjStore_S3Website : public RGWGetObj_ObjStore_S3
+{
+ friend class RGWHandler_REST_S3Website;
+private:
+ bool is_errordoc_request;
+public:
+ RGWGetObj_ObjStore_S3Website() : is_errordoc_request(false) {}
+ explicit RGWGetObj_ObjStore_S3Website(bool is_errordoc_request) : is_errordoc_request(false) { this->is_errordoc_request = is_errordoc_request; }
+ ~RGWGetObj_ObjStore_S3Website() override {}
+ int send_response_data_error() override;
+ int send_response_data(bufferlist& bl, off_t ofs, off_t len) override;
+ // We override RGWGetObj_ObjStore::get_params here, to allow ignoring all
+ // conditional params for error pages.
+ int get_params() override {
+ if (is_errordoc_request) {
+ range_str = NULL;
+ if_mod = NULL;
+ if_unmod = NULL;
+ if_match = NULL;
+ if_nomatch = NULL;
+ return 0;
+ } else {
+ return RGWGetObj_ObjStore_S3::get_params();
+ }
+ }
+};
+
+#endif
diff --git a/src/rgw/rgw_rest_sts.cc b/src/rgw/rgw_rest_sts.cc
new file mode 100644
index 00000000..f7424be9
--- /dev/null
+++ b/src/rgw/rgw_rest_sts.cc
@@ -0,0 +1,459 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <boost/algorithm/string/predicate.hpp>
+#include <boost/format.hpp>
+#include <boost/optional.hpp>
+#include <boost/utility/in_place_factory.hpp>
+#include <boost/tokenizer.hpp>
+
+#include "ceph_ver.h"
+
+#include "common/Formatter.h"
+#include "common/utf8.h"
+#include "common/ceph_json.h"
+
+#include "rgw_rest.h"
+#include "rgw_auth.h"
+#include "rgw_auth_registry.h"
+#include "rgw_rest_sts.h"
+
+#include "rgw_formats.h"
+#include "rgw_client_io.h"
+
+#include "rgw_request.h"
+#include "rgw_process.h"
+#include "rgw_iam_policy.h"
+#include "rgw_iam_policy_keywords.h"
+
+#include "rgw_sts.h"
+
+#include <array>
+#include <sstream>
+#include <memory>
+
+#include <boost/utility/string_ref.hpp>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+namespace rgw {
+namespace auth {
+namespace sts {
+
+bool
+WebTokenEngine::is_applicable(const std::string& token) const noexcept
+{
+ return ! token.empty();
+}
+
+boost::optional<WebTokenEngine::token_t>
+WebTokenEngine::get_from_idp(const DoutPrefixProvider* dpp, const std::string& token) const
+{
+ //Access token conforming to OAuth2.0
+ if (! cct->_conf->rgw_sts_token_introspection_url.empty()) {
+ bufferlist introspect_resp;
+ RGWHTTPTransceiver introspect_req(cct, "POST", cct->_conf->rgw_sts_token_introspection_url, &introspect_resp);
+ //Headers
+ introspect_req.append_header("Content-Type", "application/x-www-form-urlencoded");
+ string base64_creds = "Basic " + rgw::to_base64(cct->_conf->rgw_sts_client_id + ":" + cct->_conf->rgw_sts_client_secret);
+ introspect_req.append_header("Authorization", base64_creds);
+ // POST data
+ string post_data = "token=" + token;
+ introspect_req.set_post_data(post_data);
+ introspect_req.set_send_length(post_data.length());
+
+ int res = introspect_req.process();
+ if (res < 0) {
+ ldpp_dout(dpp, 10) << "HTTP request res: " << res << dendl;
+ throw -EINVAL;
+ }
+ //Debug only
+ ldpp_dout(dpp, 20) << "HTTP status: " << introspect_req.get_http_status() << dendl;
+ ldpp_dout(dpp, 20) << "JSON Response is: " << introspect_resp.c_str() << dendl;
+
+ JSONParser parser;
+ WebTokenEngine::token_t token;
+ if (!parser.parse(introspect_resp.c_str(), introspect_resp.length())) {
+ ldpp_dout(dpp, 2) << "Malformed json" << dendl;
+ throw -EINVAL;
+ } else {
+ bool is_active;
+ JSONDecoder::decode_json("active", is_active, &parser);
+ if (! is_active) {
+ ldpp_dout(dpp, 0) << "Active state is false" << dendl;
+ throw -ERR_INVALID_IDENTITY_TOKEN;
+ }
+ JSONDecoder::decode_json("iss", token.iss, &parser);
+ JSONDecoder::decode_json("aud", token.aud, &parser);
+ JSONDecoder::decode_json("sub", token.sub, &parser);
+ JSONDecoder::decode_json("user_name", token.user_name, &parser);
+ }
+ return token;
+ }
+ return boost::none;
+}
+
+WebTokenEngine::result_t
+WebTokenEngine::authenticate( const DoutPrefixProvider* dpp,
+ const std::string& token,
+ const req_state* const s) const
+{
+ boost::optional<WebTokenEngine::token_t> t;
+
+ if (! is_applicable(token)) {
+ return result_t::deny();
+ }
+
+ try {
+ t = get_from_idp(dpp, token);
+ } catch(...) {
+ return result_t::deny(-EACCES);
+ }
+
+ if (t) {
+ auto apl = apl_factory->create_apl_web_identity(cct, s, *t);
+ return result_t::grant(std::move(apl));
+ }
+ return result_t::deny(-EACCES);
+}
+
+}; /* namespace sts */
+}; /* namespace auth */
+}; /* namespace rgw */
+
+int RGWREST_STS::verify_permission()
+{
+ STS::STSService _sts(s->cct, store, s->user->user_id, s->auth.identity.get());
+ sts = std::move(_sts);
+
+ string rArn = s->info.args.get("RoleArn");
+ const auto& [ret, role] = sts.getRoleInfo(rArn);
+ if (ret < 0) {
+ return ret;
+ }
+ string policy = role.get_assume_role_policy();
+ buffer::list bl = buffer::list::static_from_string(policy);
+
+ //Parse the policy
+ //TODO - This step should be part of Role Creation
+ try {
+ const rgw::IAM::Policy p(s->cct, s->user->user_id.tenant, bl);
+ //Check if the input role arn is there as one of the Principals in the policy,
+ // If yes, then return 0, else -EPERM
+ auto p_res = p.eval_principal(s->env, *s->auth.identity);
+ if (p_res == rgw::IAM::Effect::Deny) {
+ return -EPERM;
+ }
+ auto c_res = p.eval_conditions(s->env);
+ if (c_res == rgw::IAM::Effect::Deny) {
+ return -EPERM;
+ }
+ } catch (rgw::IAM::PolicyParseException& e) {
+ ldout(s->cct, 20) << "failed to parse policy: " << e.what() << dendl;
+ return -EPERM;
+ }
+
+ return 0;
+}
+
+void RGWREST_STS::send_response()
+{
+ if (op_ret) {
+ set_req_state_err(s, op_ret);
+ }
+ dump_errno(s);
+ end_header(s);
+}
+
+int RGWSTSGetSessionToken::verify_permission()
+{
+ rgw::Partition partition = rgw::Partition::aws;
+ rgw::Service service = rgw::Service::s3;
+ if (!verify_user_permission(this,
+ s,
+ rgw::ARN(partition, service, "", s->user->user_id.tenant, ""),
+ rgw::IAM::stsGetSessionToken)) {
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+int RGWSTSGetSessionToken::get_params()
+{
+ duration = s->info.args.get("DurationSeconds");
+ serialNumber = s->info.args.get("SerialNumber");
+ tokenCode = s->info.args.get("TokenCode");
+
+ if (! duration.empty()) {
+ string err;
+ uint64_t duration_in_secs = strict_strtoll(duration.c_str(), 10, &err);
+ if (!err.empty()) {
+ return -EINVAL;
+ }
+
+ if (duration_in_secs < STS::GetSessionTokenRequest::getMinDuration() ||
+ duration_in_secs > s->cct->_conf->rgw_sts_max_session_duration)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+void RGWSTSGetSessionToken::execute()
+{
+ if (op_ret = get_params(); op_ret < 0) {
+ return;
+ }
+
+ STS::STSService sts(s->cct, store, s->user->user_id, s->auth.identity.get());
+
+ STS::GetSessionTokenRequest req(duration, serialNumber, tokenCode);
+ const auto& [ret, creds] = sts.getSessionToken(req);
+ op_ret = std::move(ret);
+ //Dump the output
+ if (op_ret == 0) {
+ s->formatter->open_object_section("GetSessionTokenResponse");
+ s->formatter->open_object_section("GetSessionTokenResult");
+ s->formatter->open_object_section("Credentials");
+ creds.dump(s->formatter);
+ s->formatter->close_section();
+ s->formatter->close_section();
+ s->formatter->close_section();
+ }
+}
+
+int RGWSTSAssumeRoleWithWebIdentity::get_params()
+{
+ duration = s->info.args.get("DurationSeconds");
+ providerId = s->info.args.get("ProviderId");
+ policy = s->info.args.get("Policy");
+ roleArn = s->info.args.get("RoleArn");
+ roleSessionName = s->info.args.get("RoleSessionName");
+ iss = s->info.args.get("provider_id");
+ sub = s->info.args.get("sub");
+ aud = s->info.args.get("aud");
+
+ if (roleArn.empty() || roleSessionName.empty() || sub.empty() || aud.empty()) {
+ ldout(s->cct, 20) << "ERROR: one of role arn or role session name or token is empty" << dendl;
+ return -EINVAL;
+ }
+
+ if (! policy.empty()) {
+ bufferlist bl = bufferlist::static_from_string(policy);
+ try {
+ const rgw::IAM::Policy p(s->cct, s->user->user_id.tenant, bl);
+ }
+ catch (rgw::IAM::PolicyParseException& e) {
+ ldout(s->cct, 20) << "failed to parse policy: " << e.what() << "policy" << policy << dendl;
+ return -ERR_MALFORMED_DOC;
+ }
+ }
+
+ return 0;
+}
+
+void RGWSTSAssumeRoleWithWebIdentity::execute()
+{
+ if (op_ret = get_params(); op_ret < 0) {
+ return;
+ }
+
+ STS::AssumeRoleWithWebIdentityRequest req(duration, providerId, policy, roleArn,
+ roleSessionName, iss, sub, aud);
+ STS::AssumeRoleWithWebIdentityResponse response = sts.assumeRoleWithWebIdentity(req);
+ op_ret = std::move(response.assumeRoleResp.retCode);
+
+ //Dump the output
+ if (op_ret == 0) {
+ s->formatter->open_object_section("AssumeRoleWithWebIdentityResponse");
+ s->formatter->open_object_section("AssumeRoleWithWebIdentityResult");
+ encode_json("SubjectFromWebIdentityToken", response.sub , s->formatter);
+ encode_json("Audience", response.aud , s->formatter);
+ s->formatter->open_object_section("AssumedRoleUser");
+ response.assumeRoleResp.user.dump(s->formatter);
+ s->formatter->close_section();
+ s->formatter->open_object_section("Credentials");
+ response.assumeRoleResp.creds.dump(s->formatter);
+ s->formatter->close_section();
+ encode_json("Provider", response.providerId , s->formatter);
+ encode_json("PackedPolicySize", response.assumeRoleResp.packedPolicySize , s->formatter);
+ s->formatter->close_section();
+ s->formatter->close_section();
+ }
+}
+
+int RGWSTSAssumeRole::get_params()
+{
+ duration = s->info.args.get("DurationSeconds");
+ externalId = s->info.args.get("ExternalId");
+ policy = s->info.args.get("Policy");
+ roleArn = s->info.args.get("RoleArn");
+ roleSessionName = s->info.args.get("RoleSessionName");
+ serialNumber = s->info.args.get("SerialNumber");
+ tokenCode = s->info.args.get("TokenCode");
+
+ if (roleArn.empty() || roleSessionName.empty()) {
+ ldout(s->cct, 20) << "ERROR: one of role arn or role session name is empty" << dendl;
+ return -EINVAL;
+ }
+
+ if (! policy.empty()) {
+ bufferlist bl = bufferlist::static_from_string(policy);
+ try {
+ const rgw::IAM::Policy p(s->cct, s->user->user_id.tenant, bl);
+ }
+ catch (rgw::IAM::PolicyParseException& e) {
+ ldout(s->cct, 20) << "failed to parse policy: " << e.what() << "policy" << policy << dendl;
+ return -ERR_MALFORMED_DOC;
+ }
+ }
+
+ return 0;
+}
+
+void RGWSTSAssumeRole::execute()
+{
+ if (op_ret = get_params(); op_ret < 0) {
+ return;
+ }
+
+ STS::AssumeRoleRequest req(duration, externalId, policy, roleArn,
+ roleSessionName, serialNumber, tokenCode);
+ STS::AssumeRoleResponse response = sts.assumeRole(req);
+ op_ret = std::move(response.retCode);
+ //Dump the output
+ if (op_ret == 0) {
+ s->formatter->open_object_section("AssumeRoleResponse");
+ s->formatter->open_object_section("AssumeRoleResult");
+ s->formatter->open_object_section("Credentials");
+ response.creds.dump(s->formatter);
+ s->formatter->close_section();
+ s->formatter->open_object_section("AssumedRoleUser");
+ response.user.dump(s->formatter);
+ s->formatter->close_section();
+ encode_json("PackedPolicySize", response.packedPolicySize , s->formatter);
+ s->formatter->close_section();
+ s->formatter->close_section();
+ }
+}
+
+int RGW_Auth_STS::authorize(const DoutPrefixProvider *dpp,
+ RGWRados *store,
+ const rgw::auth::StrategyRegistry& auth_registry,
+ struct req_state *s)
+{
+ return rgw::auth::Strategy::apply(dpp, auth_registry.get_sts(), s);
+}
+
+void RGWHandler_REST_STS::rgw_sts_parse_input()
+{
+ if (post_body.size() > 0) {
+ ldout(s->cct, 10) << "Content of POST: " << post_body << dendl;
+
+ if (post_body.find("Action") != string::npos) {
+ boost::char_separator<char> sep("&");
+ boost::tokenizer<boost::char_separator<char>> tokens(post_body, sep);
+ for (const auto& t : tokens) {
+ auto pos = t.find("=");
+ if (pos != string::npos) {
+ s->info.args.append(t.substr(0,pos),
+ url_decode(t.substr(pos+1, t.size() -1)));
+ }
+ }
+ }
+ }
+ auto payload_hash = rgw::auth::s3::calc_v4_payload_hash(post_body);
+ s->info.args.append("PayloadHash", payload_hash);
+}
+
+RGWOp *RGWHandler_REST_STS::op_post()
+{
+ rgw_sts_parse_input();
+
+ if (s->info.args.exists("Action")) {
+ string action = s->info.args.get("Action");
+ if (action == "AssumeRole") {
+ return new RGWSTSAssumeRole;
+ } else if (action == "GetSessionToken") {
+ return new RGWSTSGetSessionToken;
+ } else if (action == "AssumeRoleWithWebIdentity") {
+ return new RGWSTSAssumeRoleWithWebIdentity;
+ }
+ }
+
+ return nullptr;
+}
+
+int RGWHandler_REST_STS::init(RGWRados *store,
+ struct req_state *s,
+ rgw::io::BasicClient *cio)
+{
+ s->dialect = "sts";
+
+ if (int ret = RGWHandler_REST_STS::init_from_header(s, RGW_FORMAT_XML, true); ret < 0) {
+ ldout(s->cct, 10) << "init_from_header returned err=" << ret << dendl;
+ return ret;
+ }
+
+ return RGWHandler_REST::init(store, s, cio);
+}
+
+int RGWHandler_REST_STS::authorize(const DoutPrefixProvider* dpp)
+{
+ if (s->info.args.exists("Action") && s->info.args.get("Action") == "AssumeRoleWithWebIdentity") {
+ return RGW_Auth_STS::authorize(dpp, store, auth_registry, s);
+ }
+ return RGW_Auth_S3::authorize(dpp, store, auth_registry, s);
+}
+
+int RGWHandler_REST_STS::init_from_header(struct req_state* s,
+ int default_formatter,
+ bool configurable_format)
+{
+ string req;
+ string first;
+
+ s->prot_flags = RGW_REST_STS;
+
+ const char *p, *req_name;
+ if (req_name = s->relative_uri.c_str(); *req_name == '?') {
+ p = req_name;
+ } else {
+ p = s->info.request_params.c_str();
+ }
+
+ s->info.args.set(p);
+ s->info.args.parse();
+
+ /* must be called after the args parsing */
+ if (int ret = allocate_formatter(s, default_formatter, configurable_format); ret < 0)
+ return ret;
+
+ if (*req_name != '/')
+ return 0;
+
+ req_name++;
+
+ if (!*req_name)
+ return 0;
+
+ req = req_name;
+ int pos = req.find('/');
+ if (pos >= 0) {
+ first = req.substr(0, pos);
+ } else {
+ first = req;
+ }
+
+ return 0;
+}
+
+RGWHandler_REST*
+RGWRESTMgr_STS::get_handler(struct req_state* const s,
+ const rgw::auth::StrategyRegistry& auth_registry,
+ const std::string& frontend_prefix)
+{
+ return new RGWHandler_REST_STS(auth_registry);
+}
diff --git a/src/rgw/rgw_rest_sts.h b/src/rgw/rgw_rest_sts.h
new file mode 100644
index 00000000..d9baa2c3
--- /dev/null
+++ b/src/rgw/rgw_rest_sts.h
@@ -0,0 +1,202 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_REST_STS_H
+#define CEPH_RGW_REST_STS_H
+
+#include "rgw_auth.h"
+#include "rgw_auth_filters.h"
+#include "rgw_sts.h"
+#include "rgw_web_idp.h"
+
+namespace rgw {
+namespace auth {
+namespace sts {
+
+class WebTokenEngine : public rgw::auth::Engine {
+ CephContext* const cct;
+
+ using result_t = rgw::auth::Engine::result_t;
+ using token_t = rgw::web_idp::WebTokenClaims;
+
+ const rgw::auth::TokenExtractor* const extractor;
+ const rgw::auth::WebIdentityApplier::Factory* const apl_factory;
+
+ bool is_applicable(const std::string& token) const noexcept;
+
+ boost::optional<token_t>
+ get_from_idp(const DoutPrefixProvider* dpp, const std::string& token) const;
+
+ result_t authenticate(const DoutPrefixProvider* dpp,
+ const std::string& token,
+ const req_state* s) const;
+
+public:
+ WebTokenEngine(CephContext* const cct,
+ const rgw::auth::TokenExtractor* const extractor,
+ const rgw::auth::WebIdentityApplier::Factory* const apl_factory)
+ : cct(cct),
+ extractor(extractor),
+ apl_factory(apl_factory) {
+ }
+
+ const char* get_name() const noexcept override {
+ return "rgw::auth::sts::WebTokenEngine";
+ }
+
+ result_t authenticate(const DoutPrefixProvider* dpp, const req_state* const s) const override {
+ return authenticate(dpp, extractor->get_token(s), s);
+ }
+}; /* class WebTokenEngine */
+
+class DefaultStrategy : public rgw::auth::Strategy,
+ public rgw::auth::TokenExtractor,
+ public rgw::auth::WebIdentityApplier::Factory {
+ RGWRados* const store;
+
+ /* The engine. */
+ const WebTokenEngine web_token_engine;
+
+ using aplptr_t = rgw::auth::IdentityApplier::aplptr_t;
+
+ /* The method implements TokenExtractor for Web Token in req_state. */
+ std::string get_token(const req_state* const s) const override {
+ return s->info.args.get("WebIdentityToken");
+ }
+
+ aplptr_t create_apl_web_identity( CephContext* cct,
+ const req_state* s,
+ const rgw::web_idp::WebTokenClaims& token) const override {
+ auto apl = rgw::auth::add_sysreq(cct, store, s,
+ rgw::auth::WebIdentityApplier(cct, store, token));
+ return aplptr_t(new decltype(apl)(std::move(apl)));
+ }
+
+public:
+ DefaultStrategy(CephContext* const cct,
+ RGWRados* const store)
+ : store(store),
+ web_token_engine(cct,
+ static_cast<rgw::auth::TokenExtractor*>(this),
+ static_cast<rgw::auth::WebIdentityApplier::Factory*>(this)) {
+ /* When the constructor's body is being executed, all member engines
+ * should be initialized. Thus, we can safely add them. */
+ using Control = rgw::auth::Strategy::Control;
+ add_engine(Control::SUFFICIENT, web_token_engine);
+ }
+
+ const char* get_name() const noexcept override {
+ return "rgw::auth::sts::DefaultStrategy";
+ }
+};
+
+}; /* namespace sts */
+}; /* namespace auth */
+};
+
+class RGWREST_STS : public RGWRESTOp {
+protected:
+ STS::STSService sts;
+public:
+ RGWREST_STS() = default;
+ int verify_permission() override;
+ void send_response() override;
+};
+
+class RGWSTSAssumeRoleWithWebIdentity : public RGWREST_STS {
+protected:
+ string duration;
+ string providerId;
+ string policy;
+ string roleArn;
+ string roleSessionName;
+ string sub;
+ string aud;
+ string iss;
+public:
+ RGWSTSAssumeRoleWithWebIdentity() = default;
+ void execute() override;
+ int get_params();
+ const char* name() const override { return "assume_role_web_identity"; }
+ RGWOpType get_type() override { return RGW_STS_ASSUME_ROLE_WEB_IDENTITY; }
+};
+
+class RGWSTSAssumeRole : public RGWREST_STS {
+protected:
+ string duration;
+ string externalId;
+ string policy;
+ string roleArn;
+ string roleSessionName;
+ string serialNumber;
+ string tokenCode;
+public:
+ RGWSTSAssumeRole() = default;
+ void execute() override;
+ int get_params();
+ const char* name() const override { return "assume_role"; }
+ RGWOpType get_type() override { return RGW_STS_ASSUME_ROLE; }
+};
+
+class RGWSTSGetSessionToken : public RGWREST_STS {
+protected:
+ string duration;
+ string serialNumber;
+ string tokenCode;
+public:
+ RGWSTSGetSessionToken() = default;
+ void execute() override;
+ int verify_permission() override;
+ int get_params();
+ const char* name() const override { return "get_session_token"; }
+ RGWOpType get_type() override { return RGW_STS_GET_SESSION_TOKEN; }
+};
+
+class RGW_Auth_STS {
+public:
+ static int authorize(const DoutPrefixProvider *dpp,
+ RGWRados *store,
+ const rgw::auth::StrategyRegistry& auth_registry,
+ struct req_state *s);
+};
+
+class RGWHandler_REST_STS : public RGWHandler_REST {
+ const rgw::auth::StrategyRegistry& auth_registry;
+ const string& post_body;
+ RGWOp *op_post() override;
+ void rgw_sts_parse_input();
+public:
+
+ static int init_from_header(struct req_state *s, int default_formatter, bool configurable_format);
+
+ RGWHandler_REST_STS(const rgw::auth::StrategyRegistry& auth_registry, const string& post_body="")
+ : RGWHandler_REST(),
+ auth_registry(auth_registry),
+ post_body(post_body) {}
+ ~RGWHandler_REST_STS() override = default;
+
+ int init(RGWRados *store,
+ struct req_state *s,
+ rgw::io::BasicClient *cio) override;
+ int authorize(const DoutPrefixProvider* dpp) override;
+ int postauth_init() override { return 0; }
+};
+
+class RGWRESTMgr_STS : public RGWRESTMgr {
+public:
+ RGWRESTMgr_STS() = default;
+ ~RGWRESTMgr_STS() override = default;
+
+ RGWRESTMgr *get_resource_mgr(struct req_state* const s,
+ const std::string& uri,
+ std::string* const out_uri) override {
+ return this;
+ }
+
+ RGWHandler_REST* get_handler(struct req_state*,
+ const rgw::auth::StrategyRegistry&,
+ const std::string&) override;
+};
+
+#endif /* CEPH_RGW_REST_STS_H */
+
diff --git a/src/rgw/rgw_rest_swift.cc b/src/rgw/rgw_rest_swift.cc
new file mode 100644
index 00000000..e1d8095e
--- /dev/null
+++ b/src/rgw/rgw_rest_swift.cc
@@ -0,0 +1,3093 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <boost/algorithm/string/predicate.hpp>
+#include <boost/format.hpp>
+#include <boost/optional.hpp>
+#include <boost/utility/in_place_factory.hpp>
+
+#include "include/ceph_assert.h"
+#include "ceph_ver.h"
+
+#include "common/Formatter.h"
+#include "common/utf8.h"
+#include "common/ceph_json.h"
+
+#include "rgw_rest_swift.h"
+#include "rgw_acl_swift.h"
+#include "rgw_cors_swift.h"
+#include "rgw_formats.h"
+#include "rgw_client_io.h"
+
+#include "rgw_auth.h"
+#include "rgw_swift_auth.h"
+
+#include "rgw_request.h"
+#include "rgw_process.h"
+
+#include "rgw_zone.h"
+
+#include "services/svc_zone.h"
+
+#include <array>
+#include <sstream>
+#include <memory>
+
+#include <boost/utility/string_ref.hpp>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+int RGWListBuckets_ObjStore_SWIFT::get_params()
+{
+ prefix = s->info.args.get("prefix");
+ marker = s->info.args.get("marker");
+ end_marker = s->info.args.get("end_marker");
+ wants_reversed = s->info.args.exists("reverse");
+
+ if (wants_reversed) {
+ std::swap(marker, end_marker);
+ }
+
+ std::string limit_str = s->info.args.get("limit");
+ if (!limit_str.empty()) {
+ std::string err;
+ long l = strict_strtol(limit_str.c_str(), 10, &err);
+ if (!err.empty()) {
+ return -EINVAL;
+ }
+
+ if (l > (long)limit_max || l < 0) {
+ return -ERR_PRECONDITION_FAILED;
+ }
+
+ limit = (uint64_t)l;
+ }
+
+ if (s->cct->_conf->rgw_swift_need_stats) {
+ bool stats, exists;
+ int r = s->info.args.get_bool("stats", &stats, &exists);
+
+ if (r < 0) {
+ return r;
+ }
+
+ if (exists) {
+ need_stats = stats;
+ }
+ } else {
+ need_stats = false;
+ }
+
+ return 0;
+}
+
+static void dump_account_metadata(struct req_state * const s,
+ const RGWUsageStats& global_stats,
+ const std::map<std::string, RGWUsageStats> &policies_stats,
+ /* const */map<string, bufferlist>& attrs,
+ const RGWQuotaInfo& quota,
+ const RGWAccessControlPolicy_SWIFTAcct &policy)
+{
+ /* Adding X-Timestamp to keep align with Swift API */
+ dump_header(s, "X-Timestamp", ceph_clock_now());
+
+ dump_header(s, "X-Account-Container-Count", global_stats.buckets_count);
+ dump_header(s, "X-Account-Object-Count", global_stats.objects_count);
+ dump_header(s, "X-Account-Bytes-Used", global_stats.bytes_used);
+ dump_header(s, "X-Account-Bytes-Used-Actual", global_stats.bytes_used_rounded);
+
+ for (const auto& kv : policies_stats) {
+ const auto& policy_name = camelcase_dash_http_attr(kv.first);
+ const auto& policy_stats = kv.second;
+
+ dump_header_infixed(s, "X-Account-Storage-Policy-", policy_name,
+ "-Container-Count", policy_stats.buckets_count);
+ dump_header_infixed(s, "X-Account-Storage-Policy-", policy_name,
+ "-Object-Count", policy_stats.objects_count);
+ dump_header_infixed(s, "X-Account-Storage-Policy-", policy_name,
+ "-Bytes-Used", policy_stats.bytes_used);
+ dump_header_infixed(s, "X-Account-Storage-Policy-", policy_name,
+ "-Bytes-Used-Actual", policy_stats.bytes_used_rounded);
+ }
+
+ /* Dump TempURL-related stuff */
+ if (s->perm_mask == RGW_PERM_FULL_CONTROL) {
+ auto iter = s->user->temp_url_keys.find(0);
+ if (iter != std::end(s->user->temp_url_keys) && ! iter->second.empty()) {
+ dump_header(s, "X-Account-Meta-Temp-Url-Key", iter->second);
+ }
+
+ iter = s->user->temp_url_keys.find(1);
+ if (iter != std::end(s->user->temp_url_keys) && ! iter->second.empty()) {
+ dump_header(s, "X-Account-Meta-Temp-Url-Key-2", iter->second);
+ }
+ }
+
+ /* Dump quota headers. */
+ if (quota.enabled) {
+ if (quota.max_size >= 0) {
+ dump_header(s, "X-Account-Meta-Quota-Bytes", quota.max_size);
+ }
+
+ /* Limit on the number of objects in a given account is a RadosGW's
+ * extension. Swift's account quota WSGI filter doesn't support it. */
+ if (quota.max_objects >= 0) {
+ dump_header(s, "X-Account-Meta-Quota-Count", quota.max_objects);
+ }
+ }
+
+ /* Dump user-defined metadata items and generic attrs. */
+ const size_t PREFIX_LEN = sizeof(RGW_ATTR_META_PREFIX) - 1;
+ map<string, bufferlist>::iterator iter;
+ for (iter = attrs.lower_bound(RGW_ATTR_PREFIX); iter != attrs.end(); ++iter) {
+ const char *name = iter->first.c_str();
+ map<string, string>::const_iterator geniter = rgw_to_http_attrs.find(name);
+
+ if (geniter != rgw_to_http_attrs.end()) {
+ dump_header(s, geniter->second, iter->second);
+ } else if (strncmp(name, RGW_ATTR_META_PREFIX, PREFIX_LEN) == 0) {
+ dump_header_prefixed(s, "X-Account-Meta-",
+ camelcase_dash_http_attr(name + PREFIX_LEN),
+ iter->second);
+ }
+ }
+
+ /* Dump account ACLs */
+ auto account_acls = policy.to_str();
+ if (account_acls) {
+ dump_header(s, "X-Account-Access-Control", std::move(*account_acls));
+ }
+}
+
+void RGWListBuckets_ObjStore_SWIFT::send_response_begin(bool has_buckets)
+{
+ if (op_ret) {
+ set_req_state_err(s, op_ret);
+ } else if (!has_buckets && s->format == RGW_FORMAT_PLAIN) {
+ op_ret = STATUS_NO_CONTENT;
+ set_req_state_err(s, op_ret);
+ }
+
+ if (! s->cct->_conf->rgw_swift_enforce_content_length) {
+ /* Adding account stats in the header to keep align with Swift API */
+ dump_account_metadata(s,
+ global_stats,
+ policies_stats,
+ attrs,
+ user_quota,
+ static_cast<RGWAccessControlPolicy_SWIFTAcct&>(*s->user_acl));
+ dump_errno(s);
+ dump_header(s, "Accept-Ranges", "bytes");
+ end_header(s, NULL, NULL, NO_CONTENT_LENGTH, true);
+ }
+
+ if (! op_ret) {
+ dump_start(s);
+ s->formatter->open_array_section_with_attrs("account",
+ FormatterAttrs("name", s->user->display_name.c_str(), NULL));
+
+ sent_data = true;
+ }
+}
+
+void RGWListBuckets_ObjStore_SWIFT::handle_listing_chunk(RGWUserBuckets&& buckets)
+{
+ if (wants_reversed) {
+ /* Just store in the reversal buffer. Its content will be handled later,
+ * in send_response_end(). */
+ reverse_buffer.emplace(std::begin(reverse_buffer), std::move(buckets));
+ } else {
+ return send_response_data(buckets);
+ }
+}
+
+void RGWListBuckets_ObjStore_SWIFT::send_response_data(RGWUserBuckets& buckets)
+{
+ if (! sent_data) {
+ return;
+ }
+
+ /* Take care of the prefix parameter of Swift API. There is no business
+ * in applying the filter earlier as we really need to go through all
+ * entries regardless of it (the headers like X-Account-Container-Count
+ * aren't affected by specifying prefix). */
+ const std::map<std::string, RGWBucketEnt>& m = buckets.get_buckets();
+ for (auto iter = m.lower_bound(prefix);
+ iter != m.end() && boost::algorithm::starts_with(iter->first, prefix);
+ ++iter) {
+ dump_bucket_entry(iter->second);
+ }
+}
+
+void RGWListBuckets_ObjStore_SWIFT::dump_bucket_entry(const RGWBucketEnt& obj)
+{
+ s->formatter->open_object_section("container");
+ s->formatter->dump_string("name", obj.bucket.name);
+
+ if (need_stats) {
+ s->formatter->dump_int("count", obj.count);
+ s->formatter->dump_int("bytes", obj.size);
+ }
+
+ s->formatter->close_section();
+
+ if (! s->cct->_conf->rgw_swift_enforce_content_length) {
+ rgw_flush_formatter(s, s->formatter);
+ }
+}
+
+void RGWListBuckets_ObjStore_SWIFT::send_response_data_reversed(RGWUserBuckets& buckets)
+{
+ if (! sent_data) {
+ return;
+ }
+
+ /* Take care of the prefix parameter of Swift API. There is no business
+ * in applying the filter earlier as we really need to go through all
+ * entries regardless of it (the headers like X-Account-Container-Count
+ * aren't affected by specifying prefix). */
+ std::map<std::string, RGWBucketEnt>& m = buckets.get_buckets();
+
+ auto iter = m.rbegin();
+ for (/* initialized above */;
+ iter != m.rend() && !boost::algorithm::starts_with(iter->first, prefix);
+ ++iter) {
+ /* NOP */;
+ }
+
+ for (/* iter carried */;
+ iter != m.rend() && boost::algorithm::starts_with(iter->first, prefix);
+ ++iter) {
+ dump_bucket_entry(iter->second);
+ }
+}
+
+void RGWListBuckets_ObjStore_SWIFT::send_response_end()
+{
+ if (wants_reversed) {
+ for (auto& buckets : reverse_buffer) {
+ send_response_data_reversed(buckets);
+ }
+ }
+
+ if (sent_data) {
+ s->formatter->close_section();
+ }
+
+ if (s->cct->_conf->rgw_swift_enforce_content_length) {
+ /* Adding account stats in the header to keep align with Swift API */
+ dump_account_metadata(s,
+ global_stats,
+ policies_stats,
+ attrs,
+ user_quota,
+ static_cast<RGWAccessControlPolicy_SWIFTAcct&>(*s->user_acl));
+ dump_errno(s);
+ end_header(s, nullptr, nullptr, s->formatter->get_len(), true);
+ }
+
+ if (sent_data || s->cct->_conf->rgw_swift_enforce_content_length) {
+ rgw_flush_formatter_and_reset(s, s->formatter);
+ }
+}
+
+int RGWListBucket_ObjStore_SWIFT::get_params()
+{
+ prefix = s->info.args.get("prefix");
+ marker = s->info.args.get("marker");
+ end_marker = s->info.args.get("end_marker");
+ max_keys = s->info.args.get("limit");
+
+ // non-standard
+ s->info.args.get_bool("allow_unordered", &allow_unordered, false);
+
+ delimiter = s->info.args.get("delimiter");
+
+ op_ret = parse_max_keys();
+ if (op_ret < 0) {
+ return op_ret;
+ }
+ // S3 behavior is to silently cap the max-keys.
+ // Swift behavior is to abort.
+ if (max > default_max)
+ return -ERR_PRECONDITION_FAILED;
+
+ string path_args;
+ if (s->info.args.exists("path")) { // should handle empty path
+ path_args = s->info.args.get("path");
+ if (!delimiter.empty() || !prefix.empty()) {
+ return -EINVAL;
+ }
+ prefix = path_args;
+ delimiter="/";
+
+ path = prefix;
+ if (path.size() && path[path.size() - 1] != '/')
+ path.append("/");
+
+ int len = prefix.size();
+ int delim_size = delimiter.size();
+
+ if (len >= delim_size) {
+ if (prefix.substr(len - delim_size).compare(delimiter) != 0)
+ prefix.append(delimiter);
+ }
+ }
+
+ return 0;
+}
+
+static void dump_container_metadata(struct req_state *,
+ const RGWBucketEnt&,
+ const RGWQuotaInfo&,
+ const RGWBucketWebsiteConf&);
+
+void RGWListBucket_ObjStore_SWIFT::send_response()
+{
+ vector<rgw_bucket_dir_entry>::iterator iter = objs.begin();
+ map<string, bool>::iterator pref_iter = common_prefixes.begin();
+
+ dump_start(s);
+ dump_container_metadata(s, bucket, bucket_quota,
+ s->bucket_info.website_conf);
+
+ s->formatter->open_array_section_with_attrs("container",
+ FormatterAttrs("name",
+ s->bucket.name.c_str(),
+ NULL));
+
+ while (iter != objs.end() || pref_iter != common_prefixes.end()) {
+ bool do_pref = false;
+ bool do_objs = false;
+ rgw_obj_key key;
+ if (iter != objs.end()) {
+ key = iter->key;
+ }
+ if (pref_iter == common_prefixes.end())
+ do_objs = true;
+ else if (iter == objs.end())
+ do_pref = true;
+ else if (!key.empty() && key.name.compare(pref_iter->first) == 0) {
+ do_objs = true;
+ ++pref_iter;
+ } else if (!key.empty() && key.name.compare(pref_iter->first) <= 0)
+ do_objs = true;
+ else
+ do_pref = true;
+
+ if (do_objs && (allow_unordered || marker.empty() || marker < key)) {
+ if (key.name.compare(path) == 0)
+ goto next;
+
+ s->formatter->open_object_section("object");
+ s->formatter->dump_string("name", key.name);
+ s->formatter->dump_string("hash", iter->meta.etag);
+ s->formatter->dump_int("bytes", iter->meta.accounted_size);
+ if (!iter->meta.user_data.empty())
+ s->formatter->dump_string("user_custom_data", iter->meta.user_data);
+ string single_content_type = iter->meta.content_type;
+ if (iter->meta.content_type.size()) {
+ // content type might hold multiple values, just dump the last one
+ ssize_t pos = iter->meta.content_type.rfind(',');
+ if (pos > 0) {
+ ++pos;
+ while (single_content_type[pos] == ' ')
+ ++pos;
+ single_content_type = single_content_type.substr(pos);
+ }
+ s->formatter->dump_string("content_type", single_content_type);
+ }
+ dump_time(s, "last_modified", &iter->meta.mtime);
+ s->formatter->close_section();
+ }
+
+ if (do_pref && (marker.empty() || pref_iter->first.compare(marker.name) > 0)) {
+ const string& name = pref_iter->first;
+ if (name.compare(delimiter) == 0)
+ goto next;
+
+ s->formatter->open_object_section_with_attrs("subdir", FormatterAttrs("name", name.c_str(), NULL));
+
+ /* swift is a bit inconsistent here */
+ switch (s->format) {
+ case RGW_FORMAT_XML:
+ s->formatter->dump_string("name", name);
+ break;
+ default:
+ s->formatter->dump_string("subdir", name);
+ }
+ s->formatter->close_section();
+ }
+next:
+ if (do_objs)
+ ++iter;
+ else
+ ++pref_iter;
+ }
+
+ s->formatter->close_section();
+
+ int64_t content_len = 0;
+ if (! op_ret) {
+ content_len = s->formatter->get_len();
+ if (content_len == 0) {
+ op_ret = STATUS_NO_CONTENT;
+ }
+ } else if (op_ret > 0) {
+ op_ret = 0;
+ }
+
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+ end_header(s, this, NULL, content_len);
+ if (op_ret < 0) {
+ return;
+ }
+
+ rgw_flush_formatter_and_reset(s, s->formatter);
+} // RGWListBucket_ObjStore_SWIFT::send_response
+
+static void dump_container_metadata(struct req_state *s,
+ const RGWBucketEnt& bucket,
+ const RGWQuotaInfo& quota,
+ const RGWBucketWebsiteConf& ws_conf)
+{
+ /* Adding X-Timestamp to keep align with Swift API */
+ dump_header(s, "X-Timestamp", utime_t(s->bucket_info.creation_time));
+
+ dump_header(s, "X-Container-Object-Count", bucket.count);
+ dump_header(s, "X-Container-Bytes-Used", bucket.size);
+ dump_header(s, "X-Container-Bytes-Used-Actual", bucket.size_rounded);
+
+ if (s->object.empty()) {
+ auto swift_policy = \
+ static_cast<RGWAccessControlPolicy_SWIFT*>(s->bucket_acl.get());
+ std::string read_acl, write_acl;
+ swift_policy->to_str(read_acl, write_acl);
+
+ if (read_acl.size()) {
+ dump_header(s, "X-Container-Read", read_acl);
+ }
+ if (write_acl.size()) {
+ dump_header(s, "X-Container-Write", write_acl);
+ }
+ if (!s->bucket_info.placement_rule.name.empty()) {
+ dump_header(s, "X-Storage-Policy", s->bucket_info.placement_rule.name);
+ }
+ dump_header(s, "X-Storage-Class", s->bucket_info.placement_rule.get_storage_class());
+
+ /* Dump user-defined metadata items and generic attrs. */
+ const size_t PREFIX_LEN = sizeof(RGW_ATTR_META_PREFIX) - 1;
+ map<string, bufferlist>::iterator iter;
+ for (iter = s->bucket_attrs.lower_bound(RGW_ATTR_PREFIX);
+ iter != s->bucket_attrs.end();
+ ++iter) {
+ const char *name = iter->first.c_str();
+ map<string, string>::const_iterator geniter = rgw_to_http_attrs.find(name);
+
+ if (geniter != rgw_to_http_attrs.end()) {
+ dump_header(s, geniter->second, iter->second);
+ } else if (strncmp(name, RGW_ATTR_META_PREFIX, PREFIX_LEN) == 0) {
+ dump_header_prefixed(s, "X-Container-Meta-",
+ camelcase_dash_http_attr(name + PREFIX_LEN),
+ iter->second);
+ }
+ }
+ }
+
+ /* Dump container versioning info. */
+ if (! s->bucket_info.swift_ver_location.empty()) {
+ dump_header(s, "X-Versions-Location",
+ url_encode(s->bucket_info.swift_ver_location));
+ }
+
+ /* Dump quota headers. */
+ if (quota.enabled) {
+ if (quota.max_size >= 0) {
+ dump_header(s, "X-Container-Meta-Quota-Bytes", quota.max_size);
+ }
+
+ if (quota.max_objects >= 0) {
+ dump_header(s, "X-Container-Meta-Quota-Count", quota.max_objects);
+ }
+ }
+
+ /* Dump Static Website headers. */
+ if (! ws_conf.index_doc_suffix.empty()) {
+ dump_header(s, "X-Container-Meta-Web-Index", ws_conf.index_doc_suffix);
+ }
+
+ if (! ws_conf.error_doc.empty()) {
+ dump_header(s, "X-Container-Meta-Web-Error", ws_conf.error_doc);
+ }
+
+ if (! ws_conf.subdir_marker.empty()) {
+ dump_header(s, "X-Container-Meta-Web-Directory-Type",
+ ws_conf.subdir_marker);
+ }
+
+ if (! ws_conf.listing_css_doc.empty()) {
+ dump_header(s, "X-Container-Meta-Web-Listings-CSS",
+ ws_conf.listing_css_doc);
+ }
+
+ if (ws_conf.listing_enabled) {
+ dump_header(s, "X-Container-Meta-Web-Listings", "true");
+ }
+
+ /* Dump bucket's modification time. Compliance with the Swift API really
+ * needs that. */
+ dump_last_modified(s, s->bucket_mtime);
+}
+
+void RGWStatAccount_ObjStore_SWIFT::execute()
+{
+ RGWStatAccount_ObjStore::execute();
+ op_ret = rgw_get_user_attrs_by_uid(store, s->user->user_id, attrs);
+}
+
+void RGWStatAccount_ObjStore_SWIFT::send_response()
+{
+ if (op_ret >= 0) {
+ op_ret = STATUS_NO_CONTENT;
+ dump_account_metadata(s,
+ global_stats,
+ policies_stats,
+ attrs,
+ user_quota,
+ static_cast<RGWAccessControlPolicy_SWIFTAcct&>(*s->user_acl));
+ }
+
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+
+ end_header(s, NULL, NULL, 0, true);
+
+ dump_start(s);
+}
+
+void RGWStatBucket_ObjStore_SWIFT::send_response()
+{
+ if (op_ret >= 0) {
+ op_ret = STATUS_NO_CONTENT;
+ dump_container_metadata(s, bucket, bucket_quota,
+ s->bucket_info.website_conf);
+ }
+
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+
+ end_header(s, this, NULL, 0, true);
+ dump_start(s);
+}
+
+static int get_swift_container_settings(req_state * const s,
+ RGWRados * const store,
+ RGWAccessControlPolicy * const policy,
+ bool * const has_policy,
+ uint32_t * rw_mask,
+ RGWCORSConfiguration * const cors_config,
+ bool * const has_cors)
+{
+ const char * const read_list = s->info.env->get("HTTP_X_CONTAINER_READ");
+ const char * const write_list = s->info.env->get("HTTP_X_CONTAINER_WRITE");
+
+ *has_policy = false;
+
+ if (read_list || write_list) {
+ RGWAccessControlPolicy_SWIFT swift_policy(s->cct);
+ const auto r = swift_policy.create(store,
+ s->user->user_id,
+ s->user->display_name,
+ read_list,
+ write_list,
+ *rw_mask);
+ if (r < 0) {
+ return r;
+ }
+
+ *policy = swift_policy;
+ *has_policy = true;
+ }
+
+ *has_cors = false;
+
+ /*Check and update CORS configuration*/
+ const char *allow_origins = s->info.env->get("HTTP_X_CONTAINER_META_ACCESS_CONTROL_ALLOW_ORIGIN");
+ const char *allow_headers = s->info.env->get("HTTP_X_CONTAINER_META_ACCESS_CONTROL_ALLOW_HEADERS");
+ const char *expose_headers = s->info.env->get("HTTP_X_CONTAINER_META_ACCESS_CONTROL_EXPOSE_HEADERS");
+ const char *max_age = s->info.env->get("HTTP_X_CONTAINER_META_ACCESS_CONTROL_MAX_AGE");
+ if (allow_origins) {
+ RGWCORSConfiguration_SWIFT *swift_cors = new RGWCORSConfiguration_SWIFT;
+ int r = swift_cors->create_update(allow_origins, allow_headers, expose_headers, max_age);
+ if (r < 0) {
+ dout(0) << "Error creating/updating the cors configuration" << dendl;
+ delete swift_cors;
+ return r;
+ }
+ *has_cors = true;
+ *cors_config = *swift_cors;
+ cors_config->dump();
+ delete swift_cors;
+ }
+
+ return 0;
+}
+
+#define ACCT_REMOVE_ATTR_PREFIX "HTTP_X_REMOVE_ACCOUNT_META_"
+#define ACCT_PUT_ATTR_PREFIX "HTTP_X_ACCOUNT_META_"
+#define CONT_REMOVE_ATTR_PREFIX "HTTP_X_REMOVE_CONTAINER_META_"
+#define CONT_PUT_ATTR_PREFIX "HTTP_X_CONTAINER_META_"
+
+static void get_rmattrs_from_headers(const req_state * const s,
+ const char * const put_prefix,
+ const char * const del_prefix,
+ set<string>& rmattr_names)
+{
+ const size_t put_prefix_len = strlen(put_prefix);
+ const size_t del_prefix_len = strlen(del_prefix);
+
+ for (const auto& kv : s->info.env->get_map()) {
+ size_t prefix_len = 0;
+ const char * const p = kv.first.c_str();
+
+ if (strncasecmp(p, del_prefix, del_prefix_len) == 0) {
+ /* Explicitly requested removal. */
+ prefix_len = del_prefix_len;
+ } else if ((strncasecmp(p, put_prefix, put_prefix_len) == 0)
+ && kv.second.empty()) {
+ /* Removal requested by putting an empty value. */
+ prefix_len = put_prefix_len;
+ }
+
+ if (prefix_len > 0) {
+ string name(RGW_ATTR_META_PREFIX);
+ name.append(lowercase_dash_http_attr(p + prefix_len));
+ rmattr_names.insert(name);
+ }
+ }
+}
+
+static int get_swift_versioning_settings(
+ req_state * const s,
+ boost::optional<std::string>& swift_ver_location)
+{
+ /* Removing the Swift's versions location has lower priority than setting
+ * a new one. That's the reason why we're handling it first. */
+ const std::string vlocdel =
+ s->info.env->get("HTTP_X_REMOVE_VERSIONS_LOCATION", "");
+ if (vlocdel.size()) {
+ swift_ver_location = boost::in_place(std::string());
+ }
+
+ if (s->info.env->exists("HTTP_X_VERSIONS_LOCATION")) {
+ /* If the Swift's versioning is globally disabled but someone wants to
+ * enable it for a given container, new version of Swift will generate
+ * the precondition failed error. */
+ if (! s->cct->_conf->rgw_swift_versioning_enabled) {
+ return -ERR_PRECONDITION_FAILED;
+ }
+
+ swift_ver_location = s->info.env->get("HTTP_X_VERSIONS_LOCATION", "");
+ }
+
+ return 0;
+}
+
+int RGWCreateBucket_ObjStore_SWIFT::get_params()
+{
+ bool has_policy;
+ uint32_t policy_rw_mask = 0;
+
+ int r = get_swift_container_settings(s, store, &policy, &has_policy,
+ &policy_rw_mask, &cors_config, &has_cors);
+ if (r < 0) {
+ return r;
+ }
+
+ if (!has_policy) {
+ policy.create_default(s->user->user_id, s->user->display_name);
+ }
+
+ location_constraint = store->svc.zone->get_zonegroup().api_name;
+ get_rmattrs_from_headers(s, CONT_PUT_ATTR_PREFIX,
+ CONT_REMOVE_ATTR_PREFIX, rmattr_names);
+ placement_rule.init(s->info.env->get("HTTP_X_STORAGE_POLICY", ""), s->info.storage_class);
+
+ return get_swift_versioning_settings(s, swift_ver_location);
+}
+
+static inline int handle_metadata_errors(req_state* const s, const int op_ret)
+{
+ if (op_ret == -EFBIG) {
+ /* Handle the custom error message of exceeding maximum custom attribute
+ * (stored as xattr) size. */
+ const auto error_message = boost::str(
+ boost::format("Metadata value longer than %lld")
+ % s->cct->_conf.get_val<Option::size_t>("rgw_max_attr_size"));
+ set_req_state_err(s, EINVAL, error_message);
+ return -EINVAL;
+ } else if (op_ret == -E2BIG) {
+ const auto error_message = boost::str(
+ boost::format("Too many metadata items; max %lld")
+ % s->cct->_conf.get_val<uint64_t>("rgw_max_attrs_num_in_req"));
+ set_req_state_err(s, EINVAL, error_message);
+ return -EINVAL;
+ }
+
+ return op_ret;
+}
+
+void RGWCreateBucket_ObjStore_SWIFT::send_response()
+{
+ const auto meta_ret = handle_metadata_errors(s, op_ret);
+ if (meta_ret != op_ret) {
+ op_ret = meta_ret;
+ } else {
+ if (!op_ret) {
+ op_ret = STATUS_CREATED;
+ } else if (op_ret == -ERR_BUCKET_EXISTS) {
+ op_ret = STATUS_ACCEPTED;
+ }
+ set_req_state_err(s, op_ret);
+ }
+
+ dump_errno(s);
+ /* Propose ending HTTP header with 0 Content-Length header. */
+ end_header(s, NULL, NULL, 0);
+ rgw_flush_formatter_and_reset(s, s->formatter);
+}
+
+void RGWDeleteBucket_ObjStore_SWIFT::send_response()
+{
+ int r = op_ret;
+ if (!r)
+ r = STATUS_NO_CONTENT;
+
+ set_req_state_err(s, r);
+ dump_errno(s);
+ end_header(s, this, NULL, 0);
+ rgw_flush_formatter_and_reset(s, s->formatter);
+}
+
+static int get_delete_at_param(req_state *s, boost::optional<real_time> &delete_at)
+{
+ /* Handle Swift object expiration. */
+ real_time delat_proposal;
+ string x_delete = s->info.env->get("HTTP_X_DELETE_AFTER", "");
+
+ if (x_delete.empty()) {
+ x_delete = s->info.env->get("HTTP_X_DELETE_AT", "");
+ } else {
+ /* X-Delete-After HTTP is present. It means we need add its value
+ * to the current time. */
+ delat_proposal = real_clock::now();
+ }
+
+ if (x_delete.empty()) {
+ delete_at = boost::none;
+ if (s->info.env->exists("HTTP_X_REMOVE_DELETE_AT")) {
+ delete_at = boost::in_place(real_time());
+ }
+ return 0;
+ }
+ string err;
+ long ts = strict_strtoll(x_delete.c_str(), 10, &err);
+
+ if (!err.empty()) {
+ return -EINVAL;
+ }
+
+ delat_proposal += make_timespan(ts);
+ if (delat_proposal < real_clock::now()) {
+ return -EINVAL;
+ }
+
+ delete_at = delat_proposal;
+
+ return 0;
+}
+
+int RGWPutObj_ObjStore_SWIFT::verify_permission()
+{
+ op_ret = RGWPutObj_ObjStore::verify_permission();
+
+ /* We have to differentiate error codes depending on whether user is
+ * anonymous (401 Unauthorized) or he doesn't have necessary permissions
+ * (403 Forbidden). */
+ if (s->auth.identity->is_anonymous() && op_ret == -EACCES) {
+ return -EPERM;
+ } else {
+ return op_ret;
+ }
+}
+
+int RGWPutObj_ObjStore_SWIFT::update_slo_segment_size(rgw_slo_entry& entry) {
+
+ int r = 0;
+ const string& path = entry.path;
+
+ /* If the path starts with slashes, strip them all. */
+ const size_t pos_init = path.find_first_not_of('/');
+
+ if (pos_init == string::npos) {
+ return -EINVAL;
+ }
+
+ const size_t pos_sep = path.find('/', pos_init);
+ if (pos_sep == string::npos) {
+ return -EINVAL;
+ }
+
+ string bucket_name = path.substr(pos_init, pos_sep - pos_init);
+ string obj_name = path.substr(pos_sep + 1);
+
+ rgw_bucket bucket;
+
+ if (bucket_name.compare(s->bucket.name) != 0) {
+ RGWBucketInfo bucket_info;
+ map<string, bufferlist> bucket_attrs;
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+ r = store->get_bucket_info(obj_ctx, s->user->user_id.tenant,
+ bucket_name, bucket_info, nullptr,
+ &bucket_attrs);
+ if (r < 0) {
+ ldpp_dout(this, 0) << "could not get bucket info for bucket="
+ << bucket_name << dendl;
+ return r;
+ }
+ bucket = bucket_info.bucket;
+ } else {
+ bucket = s->bucket;
+ }
+
+ /* fetch the stored size of the seg (or error if not valid) */
+ rgw_obj_key slo_key(obj_name);
+ rgw_obj slo_seg(bucket, slo_key);
+
+ /* no prefetch */
+ RGWObjectCtx obj_ctx(store);
+ obj_ctx.set_atomic(slo_seg);
+
+ RGWRados::Object op_target(store, s->bucket_info, obj_ctx, slo_seg);
+ RGWRados::Object::Read read_op(&op_target);
+
+ bool compressed;
+ RGWCompressionInfo cs_info;
+ map<std::string, buffer::list> attrs;
+ uint64_t size_bytes{0};
+
+ read_op.params.attrs = &attrs;
+ read_op.params.obj_size = &size_bytes;
+
+ r = read_op.prepare();
+ if (r < 0) {
+ return r;
+ }
+
+ r = rgw_compression_info_from_attrset(attrs, compressed, cs_info);
+ if (r < 0) {
+ return -EIO;
+ }
+
+ if (compressed) {
+ size_bytes = cs_info.orig_size;
+ }
+
+ /* "When the PUT operation sees the multipart-manifest=put query
+ * parameter, it reads the request body and verifies that each
+ * segment object exists and that the sizes and ETags match. If
+ * there is a mismatch, the PUT operation fails."
+ */
+ if (entry.size_bytes &&
+ (entry.size_bytes != size_bytes)) {
+ return -EINVAL;
+ }
+
+ entry.size_bytes = size_bytes;
+
+ return 0;
+} /* RGWPutObj_ObjStore_SWIFT::update_slo_segment_sizes */
+
+int RGWPutObj_ObjStore_SWIFT::get_params()
+{
+ if (s->has_bad_meta) {
+ return -EINVAL;
+ }
+
+ if (!s->length) {
+ const char *encoding = s->info.env->get("HTTP_TRANSFER_ENCODING");
+ if (!encoding || strcmp(encoding, "chunked") != 0) {
+ ldout(s->cct, 20) << "neither length nor chunked encoding" << dendl;
+ return -ERR_LENGTH_REQUIRED;
+ }
+
+ chunked_upload = true;
+ }
+
+ supplied_etag = s->info.env->get("HTTP_ETAG");
+
+ if (!s->generic_attrs.count(RGW_ATTR_CONTENT_TYPE)) {
+ ldout(s->cct, 5) << "content type wasn't provided, trying to guess" << dendl;
+ const char *suffix = strrchr(s->object.name.c_str(), '.');
+ if (suffix) {
+ suffix++;
+ if (*suffix) {
+ string suffix_str(suffix);
+ const char *mime = rgw_find_mime_by_ext(suffix_str);
+ if (mime) {
+ s->generic_attrs[RGW_ATTR_CONTENT_TYPE] = mime;
+ }
+ }
+ }
+ }
+
+ policy.create_default(s->user->user_id, s->user->display_name);
+
+ int r = get_delete_at_param(s, delete_at);
+ if (r < 0) {
+ ldout(s->cct, 5) << "ERROR: failed to get Delete-At param" << dendl;
+ return r;
+ }
+
+ if (!s->cct->_conf->rgw_swift_custom_header.empty()) {
+ string custom_header = s->cct->_conf->rgw_swift_custom_header;
+ if (s->info.env->exists(custom_header.c_str())) {
+ user_data = s->info.env->get(custom_header.c_str());
+ }
+ }
+
+ dlo_manifest = s->info.env->get("HTTP_X_OBJECT_MANIFEST");
+ bool exists;
+ string multipart_manifest = s->info.args.get("multipart-manifest", &exists);
+ if (exists) {
+ if (multipart_manifest != "put") {
+ ldout(s->cct, 5) << "invalid multipart-manifest http param: " << multipart_manifest << dendl;
+ return -EINVAL;
+ }
+
+#define MAX_SLO_ENTRY_SIZE (1024 + 128) // 1024 - max obj name, 128 - enough extra for other info
+ uint64_t max_len = s->cct->_conf->rgw_max_slo_entries * MAX_SLO_ENTRY_SIZE;
+
+ slo_info = new RGWSLOInfo;
+
+ int r = 0;
+ std::tie(r, slo_info->raw_data) = rgw_rest_get_json_input_keep_data(s->cct, s, slo_info->entries, max_len);
+ if (r < 0) {
+ ldout(s->cct, 5) << "failed to read input for slo r=" << r << dendl;
+ return r;
+ }
+
+ if ((int64_t)slo_info->entries.size() > s->cct->_conf->rgw_max_slo_entries) {
+ ldout(s->cct, 5) << "too many entries in slo request: " << slo_info->entries.size() << dendl;
+ return -EINVAL;
+ }
+
+ MD5 etag_sum;
+ uint64_t total_size = 0;
+ for (auto& entry : slo_info->entries) {
+ etag_sum.Update((const unsigned char *)entry.etag.c_str(),
+ entry.etag.length());
+
+ /* if size_bytes == 0, it should be replaced with the
+ * real segment size (which could be 0); this follows from the
+ * fact that Swift requires all segments to exist, but permits
+ * the size_bytes element to be omitted from the SLO manifest, see
+ * https://docs.openstack.org/swift/latest/api/large_objects.html
+ */
+ r = update_slo_segment_size(entry);
+ if (r < 0) {
+ return r;
+ }
+
+ total_size += entry.size_bytes;
+
+ ldout(s->cct, 20) << "slo_part: " << entry.path
+ << " size=" << entry.size_bytes
+ << " etag=" << entry.etag
+ << dendl;
+ }
+ complete_etag(etag_sum, &lo_etag);
+ slo_info->total_size = total_size;
+
+ ofs = slo_info->raw_data.length();
+ }
+
+ return RGWPutObj_ObjStore::get_params();
+}
+
+void RGWPutObj_ObjStore_SWIFT::send_response()
+{
+ const auto meta_ret = handle_metadata_errors(s, op_ret);
+ if (meta_ret) {
+ op_ret = meta_ret;
+ } else {
+ if (!op_ret) {
+ op_ret = STATUS_CREATED;
+ }
+ set_req_state_err(s, op_ret);
+ }
+
+ if (! lo_etag.empty()) {
+ /* Static Large Object of Swift API has two etags represented by
+ * following members:
+ * - etag - for the manifest itself (it will be stored in xattrs),
+ * - lo_etag - for the content composited from SLO's segments.
+ * The value is calculated basing on segments' etags.
+ * In response for PUT request we have to expose the second one.
+ * The first one may be obtained by GET with "multipart-manifest=get"
+ * in query string on a given SLO. */
+ dump_etag(s, lo_etag, true /* quoted */);
+ } else {
+ dump_etag(s, etag);
+ }
+
+ dump_last_modified(s, mtime);
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+ end_header(s, this);
+ rgw_flush_formatter_and_reset(s, s->formatter);
+}
+
+static int get_swift_account_settings(req_state * const s,
+ RGWRados * const store,
+ RGWAccessControlPolicy_SWIFTAcct * const policy,
+ bool * const has_policy)
+{
+ *has_policy = false;
+
+ const char * const acl_attr = s->info.env->get("HTTP_X_ACCOUNT_ACCESS_CONTROL");
+ if (acl_attr) {
+ RGWAccessControlPolicy_SWIFTAcct swift_acct_policy(s->cct);
+ const bool r = swift_acct_policy.create(store,
+ s->user->user_id,
+ s->user->display_name,
+ string(acl_attr));
+ if (r != true) {
+ return -EINVAL;
+ }
+
+ *policy = swift_acct_policy;
+ *has_policy = true;
+ }
+
+ return 0;
+}
+
+int RGWPutMetadataAccount_ObjStore_SWIFT::get_params()
+{
+ if (s->has_bad_meta) {
+ return -EINVAL;
+ }
+
+ int ret = get_swift_account_settings(s,
+ store,
+ // FIXME: we need to carry unique_ptr in generic class
+ // and allocate appropriate ACL class in the ctor
+ static_cast<RGWAccessControlPolicy_SWIFTAcct *>(&policy),
+ &has_policy);
+ if (ret < 0) {
+ return ret;
+ }
+
+ get_rmattrs_from_headers(s, ACCT_PUT_ATTR_PREFIX, ACCT_REMOVE_ATTR_PREFIX,
+ rmattr_names);
+ return 0;
+}
+
+void RGWPutMetadataAccount_ObjStore_SWIFT::send_response()
+{
+ const auto meta_ret = handle_metadata_errors(s, op_ret);
+ if (meta_ret != op_ret) {
+ op_ret = meta_ret;
+ } else {
+ if (!op_ret) {
+ op_ret = STATUS_NO_CONTENT;
+ }
+ set_req_state_err(s, op_ret);
+ }
+
+ dump_errno(s);
+ end_header(s, this);
+ rgw_flush_formatter_and_reset(s, s->formatter);
+}
+
+int RGWPutMetadataBucket_ObjStore_SWIFT::get_params()
+{
+ if (s->has_bad_meta) {
+ return -EINVAL;
+ }
+
+ int r = get_swift_container_settings(s, store, &policy, &has_policy,
+ &policy_rw_mask, &cors_config, &has_cors);
+ if (r < 0) {
+ return r;
+ }
+
+ get_rmattrs_from_headers(s, CONT_PUT_ATTR_PREFIX, CONT_REMOVE_ATTR_PREFIX,
+ rmattr_names);
+ placement_rule.init(s->info.env->get("HTTP_X_STORAGE_POLICY", ""), s->info.storage_class);
+
+ return get_swift_versioning_settings(s, swift_ver_location);
+}
+
+void RGWPutMetadataBucket_ObjStore_SWIFT::send_response()
+{
+ const auto meta_ret = handle_metadata_errors(s, op_ret);
+ if (meta_ret != op_ret) {
+ op_ret = meta_ret;
+ } else {
+ if (!op_ret && (op_ret != -EINVAL)) {
+ op_ret = STATUS_NO_CONTENT;
+ }
+ set_req_state_err(s, op_ret);
+ }
+
+ dump_errno(s);
+ end_header(s, this);
+ rgw_flush_formatter_and_reset(s, s->formatter);
+}
+
+int RGWPutMetadataObject_ObjStore_SWIFT::get_params()
+{
+ if (s->has_bad_meta) {
+ return -EINVAL;
+ }
+
+ /* Handle Swift object expiration. */
+ int r = get_delete_at_param(s, delete_at);
+ if (r < 0) {
+ ldout(s->cct, 5) << "ERROR: failed to get Delete-At param" << dendl;
+ return r;
+ }
+
+ dlo_manifest = s->info.env->get("HTTP_X_OBJECT_MANIFEST");
+
+ return 0;
+}
+
+void RGWPutMetadataObject_ObjStore_SWIFT::send_response()
+{
+ const auto meta_ret = handle_metadata_errors(s, op_ret);
+ if (meta_ret != op_ret) {
+ op_ret = meta_ret;
+ } else {
+ if (!op_ret) {
+ op_ret = STATUS_ACCEPTED;
+ }
+ set_req_state_err(s, op_ret);
+ }
+
+ if (!s->is_err()) {
+ dump_content_length(s, 0);
+ }
+
+ dump_errno(s);
+ end_header(s, this);
+ rgw_flush_formatter_and_reset(s, s->formatter);
+}
+
+static void bulkdelete_respond(const unsigned num_deleted,
+ const unsigned int num_unfound,
+ const std::list<RGWBulkDelete::fail_desc_t>& failures,
+ const int prot_flags, /* in */
+ ceph::Formatter& formatter) /* out */
+{
+ formatter.open_object_section("delete");
+
+ string resp_status;
+ string resp_body;
+
+ if (!failures.empty()) {
+ int reason = ERR_INVALID_REQUEST;
+ for (const auto fail_desc : failures) {
+ if (-ENOENT != fail_desc.err && -EACCES != fail_desc.err) {
+ reason = fail_desc.err;
+ }
+ }
+ rgw_err err;
+ set_req_state_err(err, reason, prot_flags);
+ dump_errno(err, resp_status);
+ } else if (0 == num_deleted && 0 == num_unfound) {
+ /* 400 Bad Request */
+ dump_errno(400, resp_status);
+ resp_body = "Invalid bulk delete.";
+ } else {
+ /* 200 OK */
+ dump_errno(200, resp_status);
+ }
+
+ encode_json("Number Deleted", num_deleted, &formatter);
+ encode_json("Number Not Found", num_unfound, &formatter);
+ encode_json("Response Body", resp_body, &formatter);
+ encode_json("Response Status", resp_status, &formatter);
+
+ formatter.open_array_section("Errors");
+ for (const auto fail_desc : failures) {
+ formatter.open_array_section("object");
+
+ stringstream ss_name;
+ ss_name << fail_desc.path;
+ encode_json("Name", ss_name.str(), &formatter);
+
+ rgw_err err;
+ set_req_state_err(err, fail_desc.err, prot_flags);
+ string status;
+ dump_errno(err, status);
+ encode_json("Status", status, &formatter);
+ formatter.close_section();
+ }
+ formatter.close_section();
+
+ formatter.close_section();
+}
+
+int RGWDeleteObj_ObjStore_SWIFT::verify_permission()
+{
+ op_ret = RGWDeleteObj_ObjStore::verify_permission();
+
+ /* We have to differentiate error codes depending on whether user is
+ * anonymous (401 Unauthorized) or he doesn't have necessary permissions
+ * (403 Forbidden). */
+ if (s->auth.identity->is_anonymous() && op_ret == -EACCES) {
+ return -EPERM;
+ } else {
+ return op_ret;
+ }
+}
+
+int RGWDeleteObj_ObjStore_SWIFT::get_params()
+{
+ const string& mm = s->info.args.get("multipart-manifest");
+ multipart_delete = (mm.compare("delete") == 0);
+
+ return RGWDeleteObj_ObjStore::get_params();
+}
+
+void RGWDeleteObj_ObjStore_SWIFT::send_response()
+{
+ int r = op_ret;
+
+ if (multipart_delete) {
+ r = 0;
+ } else if(!r) {
+ r = STATUS_NO_CONTENT;
+ }
+
+ set_req_state_err(s, r);
+ dump_errno(s);
+
+ if (multipart_delete) {
+ end_header(s, this /* RGWOp */, nullptr /* contype */,
+ CHUNKED_TRANSFER_ENCODING);
+
+ if (deleter) {
+ bulkdelete_respond(deleter->get_num_deleted(),
+ deleter->get_num_unfound(),
+ deleter->get_failures(),
+ s->prot_flags,
+ *s->formatter);
+ } else if (-ENOENT == op_ret) {
+ bulkdelete_respond(0, 1, {}, s->prot_flags, *s->formatter);
+ } else {
+ RGWBulkDelete::acct_path_t path;
+ path.bucket_name = s->bucket_name;
+ path.obj_key = s->object;
+
+ RGWBulkDelete::fail_desc_t fail_desc;
+ fail_desc.err = op_ret;
+ fail_desc.path = path;
+
+ bulkdelete_respond(0, 0, { fail_desc }, s->prot_flags, *s->formatter);
+ }
+ } else {
+ end_header(s, this);
+ }
+
+ rgw_flush_formatter_and_reset(s, s->formatter);
+
+}
+
+static void get_contype_from_attrs(map<string, bufferlist>& attrs,
+ string& content_type)
+{
+ map<string, bufferlist>::iterator iter = attrs.find(RGW_ATTR_CONTENT_TYPE);
+ if (iter != attrs.end()) {
+ content_type = rgw_bl_str(iter->second);
+ }
+}
+
+static void dump_object_metadata(struct req_state * const s,
+ const map<string, bufferlist>& attrs)
+{
+ map<string, string> response_attrs;
+
+ for (auto kv : attrs) {
+ const char * name = kv.first.c_str();
+ const auto aiter = rgw_to_http_attrs.find(name);
+
+ if (aiter != std::end(rgw_to_http_attrs)) {
+ response_attrs[aiter->second] = rgw_bl_str(kv.second);
+ } else if (strcmp(name, RGW_ATTR_SLO_UINDICATOR) == 0) {
+ // this attr has an extra length prefix from encode() in prior versions
+ dump_header(s, "X-Object-Meta-Static-Large-Object", "True");
+ } else if (strncmp(name, RGW_ATTR_META_PREFIX,
+ sizeof(RGW_ATTR_META_PREFIX)-1) == 0) {
+ name += sizeof(RGW_ATTR_META_PREFIX) - 1;
+ dump_header_prefixed(s, "X-Object-Meta-",
+ camelcase_dash_http_attr(name), kv.second);
+ }
+ }
+
+ /* Handle override and fallback for Content-Disposition HTTP header.
+ * At the moment this will be used only by TempURL of the Swift API. */
+ const auto cditer = rgw_to_http_attrs.find(RGW_ATTR_CONTENT_DISP);
+ if (cditer != std::end(rgw_to_http_attrs)) {
+ const auto& name = cditer->second;
+
+ if (!s->content_disp.override.empty()) {
+ response_attrs[name] = s->content_disp.override;
+ } else if (!s->content_disp.fallback.empty()
+ && response_attrs.find(name) == std::end(response_attrs)) {
+ response_attrs[name] = s->content_disp.fallback;
+ }
+ }
+
+ for (const auto kv : response_attrs) {
+ dump_header(s, kv.first, kv.second);
+ }
+
+ const auto iter = attrs.find(RGW_ATTR_DELETE_AT);
+ if (iter != std::end(attrs)) {
+ utime_t delete_at;
+ try {
+ decode(delete_at, iter->second);
+ if (!delete_at.is_zero()) {
+ dump_header(s, "X-Delete-At", delete_at.sec());
+ }
+ } catch (buffer::error& err) {
+ ldout(s->cct, 0) << "ERROR: cannot decode object's " RGW_ATTR_DELETE_AT
+ " attr, ignoring"
+ << dendl;
+ }
+ }
+}
+
+int RGWCopyObj_ObjStore_SWIFT::init_dest_policy()
+{
+ dest_policy.create_default(s->user->user_id, s->user->display_name);
+
+ return 0;
+}
+
+int RGWCopyObj_ObjStore_SWIFT::get_params()
+{
+ if_mod = s->info.env->get("HTTP_IF_MODIFIED_SINCE");
+ if_unmod = s->info.env->get("HTTP_IF_UNMODIFIED_SINCE");
+ if_match = s->info.env->get("HTTP_COPY_IF_MATCH");
+ if_nomatch = s->info.env->get("HTTP_COPY_IF_NONE_MATCH");
+
+ src_tenant_name = s->src_tenant_name;
+ src_bucket_name = s->src_bucket_name;
+ src_object = s->src_object;
+ dest_tenant_name = s->bucket_tenant;
+ dest_bucket_name = s->bucket_name;
+ dest_object = s->object.name;
+
+ const char * const fresh_meta = s->info.env->get("HTTP_X_FRESH_METADATA");
+ if (fresh_meta && strcasecmp(fresh_meta, "TRUE") == 0) {
+ attrs_mod = RGWRados::ATTRSMOD_REPLACE;
+ } else {
+ attrs_mod = RGWRados::ATTRSMOD_MERGE;
+ }
+
+ int r = get_delete_at_param(s, delete_at);
+ if (r < 0) {
+ ldout(s->cct, 5) << "ERROR: failed to get Delete-At param" << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+void RGWCopyObj_ObjStore_SWIFT::send_partial_response(off_t ofs)
+{
+ if (! sent_header) {
+ if (! op_ret)
+ op_ret = STATUS_CREATED;
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+ end_header(s, this);
+
+ /* Send progress information. Note that this diverge from the original swift
+ * spec. We do this in order to keep connection alive.
+ */
+ if (op_ret == 0) {
+ s->formatter->open_array_section("progress");
+ }
+ sent_header = true;
+ } else {
+ s->formatter->dump_int("ofs", (uint64_t)ofs);
+ }
+ rgw_flush_formatter(s, s->formatter);
+}
+
+void RGWCopyObj_ObjStore_SWIFT::dump_copy_info()
+{
+ /* Dump X-Copied-From. */
+ dump_header(s, "X-Copied-From", url_encode(src_bucket.name) +
+ "/" + url_encode(src_object.name));
+
+ /* Dump X-Copied-From-Account. */
+ /* XXX tenant */
+ dump_header(s, "X-Copied-From-Account", url_encode(s->user->user_id.id));
+
+ /* Dump X-Copied-From-Last-Modified. */
+ dump_time_header(s, "X-Copied-From-Last-Modified", src_mtime);
+}
+
+void RGWCopyObj_ObjStore_SWIFT::send_response()
+{
+ if (! sent_header) {
+ string content_type;
+ if (! op_ret)
+ op_ret = STATUS_CREATED;
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+ dump_etag(s, etag);
+ dump_last_modified(s, mtime);
+ dump_copy_info();
+ get_contype_from_attrs(attrs, content_type);
+ dump_object_metadata(s, attrs);
+ end_header(s, this, !content_type.empty() ? content_type.c_str()
+ : "binary/octet-stream");
+ } else {
+ s->formatter->close_section();
+ rgw_flush_formatter(s, s->formatter);
+ }
+}
+
+int RGWGetObj_ObjStore_SWIFT::verify_permission()
+{
+ op_ret = RGWGetObj_ObjStore::verify_permission();
+
+ /* We have to differentiate error codes depending on whether user is
+ * anonymous (401 Unauthorized) or he doesn't have necessary permissions
+ * (403 Forbidden). */
+ if (s->auth.identity->is_anonymous() && op_ret == -EACCES) {
+ return -EPERM;
+ } else {
+ return op_ret;
+ }
+}
+
+int RGWGetObj_ObjStore_SWIFT::get_params()
+{
+ const string& mm = s->info.args.get("multipart-manifest");
+ skip_manifest = (mm.compare("get") == 0);
+
+ return RGWGetObj_ObjStore::get_params();
+}
+
+int RGWGetObj_ObjStore_SWIFT::send_response_data_error()
+{
+ std::string error_content;
+ op_ret = error_handler(op_ret, &error_content);
+ if (! op_ret) {
+ /* The error handler has taken care of the error. */
+ return 0;
+ }
+
+ bufferlist error_bl;
+ error_bl.append(error_content);
+ return send_response_data(error_bl, 0, error_bl.length());
+}
+
+int RGWGetObj_ObjStore_SWIFT::send_response_data(bufferlist& bl,
+ const off_t bl_ofs,
+ const off_t bl_len)
+{
+ string content_type;
+
+ if (sent_header) {
+ goto send_data;
+ }
+
+ if (custom_http_ret) {
+ set_req_state_err(s, 0);
+ dump_errno(s, custom_http_ret);
+ } else {
+ set_req_state_err(s, (partial_content && !op_ret) ? STATUS_PARTIAL_CONTENT
+ : op_ret);
+ dump_errno(s);
+
+ if (s->is_err()) {
+ end_header(s, NULL);
+ return 0;
+ }
+ }
+
+ if (range_str) {
+ dump_range(s, ofs, end, s->obj_size);
+ }
+
+ if (s->is_err()) {
+ end_header(s, NULL);
+ return 0;
+ }
+
+ dump_content_length(s, total_len);
+ dump_last_modified(s, lastmod);
+ dump_header(s, "X-Timestamp", utime_t(lastmod));
+ if (is_slo) {
+ dump_header(s, "X-Static-Large-Object", "True");
+ }
+
+ if (! op_ret) {
+ if (! lo_etag.empty()) {
+ dump_etag(s, lo_etag, true /* quoted */);
+ } else {
+ auto iter = attrs.find(RGW_ATTR_ETAG);
+ if (iter != attrs.end()) {
+ dump_etag(s, iter->second.to_str());
+ }
+ }
+
+ get_contype_from_attrs(attrs, content_type);
+ dump_object_metadata(s, attrs);
+ }
+
+ end_header(s, this, !content_type.empty() ? content_type.c_str()
+ : "binary/octet-stream");
+
+ sent_header = true;
+
+send_data:
+ if (get_data && !op_ret) {
+ const auto r = dump_body(s, bl.c_str() + bl_ofs, bl_len);
+ if (r < 0) {
+ return r;
+ }
+ }
+ rgw_flush_formatter_and_reset(s, s->formatter);
+
+ return 0;
+}
+
+void RGWOptionsCORS_ObjStore_SWIFT::send_response()
+{
+ string hdrs, exp_hdrs;
+ uint32_t max_age = CORS_MAX_AGE_INVALID;
+ /*EACCES means, there is no CORS registered yet for the bucket
+ *ENOENT means, there is no match of the Origin in the list of CORSRule
+ */
+ if (op_ret == -ENOENT)
+ op_ret = -EACCES;
+ if (op_ret < 0) {
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+ end_header(s, NULL);
+ return;
+ }
+ get_response_params(hdrs, exp_hdrs, &max_age);
+ dump_errno(s);
+ dump_access_control(s, origin, req_meth, hdrs.c_str(), exp_hdrs.c_str(),
+ max_age);
+ end_header(s, NULL);
+}
+
+int RGWBulkDelete_ObjStore_SWIFT::get_data(
+ list<RGWBulkDelete::acct_path_t>& items, bool * const is_truncated)
+{
+ constexpr size_t MAX_LINE_SIZE = 2048;
+
+ RGWClientIOStreamBuf ciosb(static_cast<RGWRestfulIO&>(*(s->cio)),
+ size_t(s->cct->_conf->rgw_max_chunk_size));
+ istream cioin(&ciosb);
+
+ char buf[MAX_LINE_SIZE];
+ while (cioin.getline(buf, sizeof(buf))) {
+ string path_str(buf);
+
+ ldout(s->cct, 20) << "extracted Bulk Delete entry: " << path_str << dendl;
+
+ RGWBulkDelete::acct_path_t path;
+
+ /* We need to skip all slashes at the beginning in order to preserve
+ * compliance with Swift. */
+ const size_t start_pos = path_str.find_first_not_of('/');
+
+ if (string::npos != start_pos) {
+ /* Seperator is the first slash after the leading ones. */
+ const size_t sep_pos = path_str.find('/', start_pos);
+
+ if (string::npos != sep_pos) {
+ path.bucket_name = url_decode(path_str.substr(start_pos,
+ sep_pos - start_pos));
+ path.obj_key = url_decode(path_str.substr(sep_pos + 1));
+ } else {
+ /* It's guaranteed here that bucket name is at least one character
+ * long and is different than slash. */
+ path.bucket_name = url_decode(path_str.substr(start_pos));
+ }
+
+ items.push_back(path);
+ }
+
+ if (items.size() == MAX_CHUNK_ENTRIES) {
+ *is_truncated = true;
+ return 0;
+ }
+ }
+
+ *is_truncated = false;
+ return 0;
+}
+
+void RGWBulkDelete_ObjStore_SWIFT::send_response()
+{
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+ end_header(s, this /* RGWOp */, nullptr /* contype */,
+ CHUNKED_TRANSFER_ENCODING);
+
+ bulkdelete_respond(deleter->get_num_deleted(),
+ deleter->get_num_unfound(),
+ deleter->get_failures(),
+ s->prot_flags,
+ *s->formatter);
+ rgw_flush_formatter_and_reset(s, s->formatter);
+}
+
+
+std::unique_ptr<RGWBulkUploadOp::StreamGetter>
+RGWBulkUploadOp_ObjStore_SWIFT::create_stream()
+{
+ class SwiftStreamGetter : public StreamGetter {
+ const size_t conlen;
+ size_t curpos;
+ req_state* const s;
+
+ public:
+ SwiftStreamGetter(req_state* const s, const size_t conlen)
+ : conlen(conlen),
+ curpos(0),
+ s(s) {
+ }
+
+ ssize_t get_at_most(size_t want, ceph::bufferlist& dst) override {
+ /* maximum requested by a caller */
+ /* data provided by client */
+ /* RadosGW's limit. */
+ const size_t max_chunk_size = \
+ static_cast<size_t>(s->cct->_conf->rgw_max_chunk_size);
+ const size_t max_to_read = std::min({ want, conlen - curpos, max_chunk_size });
+
+ ldout(s->cct, 20) << "bulk_upload: get_at_most max_to_read="
+ << max_to_read
+ << ", dst.c_str()=" << reinterpret_cast<intptr_t>(dst.c_str()) << dendl;
+
+ bufferptr bp(max_to_read);
+ const auto read_len = recv_body(s, bp.c_str(), max_to_read);
+ dst.append(bp, 0, read_len);
+ //const auto read_len = recv_body(s, dst.c_str(), max_to_read);
+ if (read_len < 0) {
+ return read_len;
+ }
+
+ curpos += read_len;
+ return curpos > s->cct->_conf->rgw_max_put_size ? -ERR_TOO_LARGE
+ : read_len;
+ }
+
+ ssize_t get_exactly(size_t want, ceph::bufferlist& dst) override {
+ ldout(s->cct, 20) << "bulk_upload: get_exactly want=" << want << dendl;
+
+ /* FIXME: do this in a loop. */
+ const auto ret = get_at_most(want, dst);
+ ldout(s->cct, 20) << "bulk_upload: get_exactly ret=" << ret << dendl;
+ if (ret < 0) {
+ return ret;
+ } else if (static_cast<size_t>(ret) != want) {
+ return -EINVAL;
+ } else {
+ return want;
+ }
+ }
+ };
+
+ if (! s->length) {
+ op_ret = -EINVAL;
+ return nullptr;
+ } else {
+ ldout(s->cct, 20) << "bulk upload: create_stream for length="
+ << s->length << dendl;
+
+ const size_t conlen = atoll(s->length);
+ return std::unique_ptr<SwiftStreamGetter>(new SwiftStreamGetter(s, conlen));
+ }
+}
+
+void RGWBulkUploadOp_ObjStore_SWIFT::send_response()
+{
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+ end_header(s, this /* RGWOp */, nullptr /* contype */,
+ CHUNKED_TRANSFER_ENCODING);
+ rgw_flush_formatter_and_reset(s, s->formatter);
+
+ s->formatter->open_object_section("delete");
+
+ std::string resp_status;
+ std::string resp_body;
+
+ if (! failures.empty()) {
+ rgw_err err;
+
+ const auto last_err = { failures.back().err };
+ if (boost::algorithm::contains(last_err, terminal_errors)) {
+ /* The terminal errors are affecting the status of the whole upload. */
+ set_req_state_err(err, failures.back().err, s->prot_flags);
+ } else {
+ set_req_state_err(err, ERR_INVALID_REQUEST, s->prot_flags);
+ }
+
+ dump_errno(err, resp_status);
+ } else if (0 == num_created && failures.empty()) {
+ /* Nothing created, nothing failed. This means the archive contained no
+ * entity we could understand (regular file or directory). We need to
+ * send 400 Bad Request to an HTTP client in the internal status field. */
+ dump_errno(400, resp_status);
+ resp_body = "Invalid Tar File: No Valid Files";
+ } else {
+ /* 200 OK */
+ dump_errno(201, resp_status);
+ }
+
+ encode_json("Number Files Created", num_created, s->formatter);
+ encode_json("Response Body", resp_body, s->formatter);
+ encode_json("Response Status", resp_status, s->formatter);
+
+ s->formatter->open_array_section("Errors");
+ for (const auto& fail_desc : failures) {
+ s->formatter->open_array_section("object");
+
+ encode_json("Name", fail_desc.path, s->formatter);
+
+ rgw_err err;
+ set_req_state_err(err, fail_desc.err, s->prot_flags);
+ std::string status;
+ dump_errno(err, status);
+ encode_json("Status", status, s->formatter);
+
+ s->formatter->close_section();
+ }
+ s->formatter->close_section();
+
+ s->formatter->close_section();
+ rgw_flush_formatter_and_reset(s, s->formatter);
+}
+
+
+void RGWGetCrossDomainPolicy_ObjStore_SWIFT::send_response()
+{
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+ end_header(s, this, "application/xml");
+
+ std::stringstream ss;
+
+ ss << R"(<?xml version="1.0"?>)" << "\n"
+ << R"(<!DOCTYPE cross-domain-policy SYSTEM )"
+ << R"("http://www.adobe.com/xml/dtds/cross-domain-policy.dtd" >)" << "\n"
+ << R"(<cross-domain-policy>)" << "\n"
+ << g_conf()->rgw_cross_domain_policy << "\n"
+ << R"(</cross-domain-policy>)";
+
+ dump_body(s, ss.str());
+}
+
+void RGWGetHealthCheck_ObjStore_SWIFT::send_response()
+{
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+ end_header(s, this, "application/xml");
+
+ if (op_ret) {
+ static constexpr char DISABLED[] = "DISABLED BY FILE";
+ dump_body(s, DISABLED, strlen(DISABLED));
+ }
+}
+
+const vector<pair<string, RGWInfo_ObjStore_SWIFT::info>> RGWInfo_ObjStore_SWIFT::swift_info =
+{
+ {"bulk_delete", {false, nullptr}},
+ {"container_quotas", {false, nullptr}},
+ {"swift", {false, RGWInfo_ObjStore_SWIFT::list_swift_data}},
+ {"tempurl", { false, RGWInfo_ObjStore_SWIFT::list_tempurl_data}},
+ {"slo", {false, RGWInfo_ObjStore_SWIFT::list_slo_data}},
+ {"account_quotas", {false, nullptr}},
+ {"staticweb", {false, nullptr}},
+ {"tempauth", {false, RGWInfo_ObjStore_SWIFT::list_tempauth_data}},
+};
+
+void RGWInfo_ObjStore_SWIFT::execute()
+{
+ bool is_admin_info_enabled = false;
+
+ const string& swiftinfo_sig = s->info.args.get("swiftinfo_sig");
+ const string& swiftinfo_expires = s->info.args.get("swiftinfo_expires");
+
+ if (!swiftinfo_sig.empty() &&
+ !swiftinfo_expires.empty() &&
+ !is_expired(swiftinfo_expires, s->cct)) {
+ is_admin_info_enabled = true;
+ }
+
+ s->formatter->open_object_section("info");
+
+ for (const auto& pair : swift_info) {
+ if(!is_admin_info_enabled && pair.second.is_admin_info)
+ continue;
+
+ if (!pair.second.list_data) {
+ s->formatter->open_object_section((pair.first).c_str());
+ s->formatter->close_section();
+ }
+ else {
+ pair.second.list_data(*(s->formatter), s->cct->_conf, *store);
+ }
+ }
+
+ s->formatter->close_section();
+}
+
+void RGWInfo_ObjStore_SWIFT::send_response()
+{
+ if (op_ret < 0) {
+ op_ret = STATUS_NO_CONTENT;
+ }
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+ end_header(s, this);
+ rgw_flush_formatter_and_reset(s, s->formatter);
+}
+
+void RGWInfo_ObjStore_SWIFT::list_swift_data(Formatter& formatter,
+ const ConfigProxy& config,
+ RGWRados& store)
+{
+ formatter.open_object_section("swift");
+ formatter.dump_int("max_file_size", config->rgw_max_put_size);
+ formatter.dump_int("container_listing_limit", RGW_LIST_BUCKETS_LIMIT_MAX);
+
+ string ceph_version(CEPH_GIT_NICE_VER);
+ formatter.dump_string("version", ceph_version);
+
+ const size_t max_attr_name_len = \
+ g_conf().get_val<Option::size_t>("rgw_max_attr_name_len");
+ if (max_attr_name_len) {
+ const size_t meta_name_limit = \
+ max_attr_name_len - strlen(RGW_ATTR_PREFIX RGW_AMZ_META_PREFIX);
+ formatter.dump_int("max_meta_name_length", meta_name_limit);
+ }
+
+ const size_t meta_value_limit = g_conf().get_val<Option::size_t>("rgw_max_attr_size");
+ if (meta_value_limit) {
+ formatter.dump_int("max_meta_value_length", meta_value_limit);
+ }
+
+ const size_t meta_num_limit = \
+ g_conf().get_val<uint64_t>("rgw_max_attrs_num_in_req");
+ if (meta_num_limit) {
+ formatter.dump_int("max_meta_count", meta_num_limit);
+ }
+
+ formatter.open_array_section("policies");
+ const RGWZoneGroup& zonegroup = store.svc.zone->get_zonegroup();
+
+ for (const auto& placement_targets : zonegroup.placement_targets) {
+ formatter.open_object_section("policy");
+ if (placement_targets.second.name.compare(zonegroup.default_placement.name) == 0)
+ formatter.dump_bool("default", true);
+ formatter.dump_string("name", placement_targets.second.name.c_str());
+ formatter.close_section();
+ }
+ formatter.close_section();
+
+ formatter.dump_int("max_object_name_size", RGWHandler_REST::MAX_OBJ_NAME_LEN);
+ formatter.dump_bool("strict_cors_mode", true);
+ formatter.dump_int("max_container_name_length", RGWHandler_REST::MAX_BUCKET_NAME_LEN);
+ formatter.close_section();
+}
+
+void RGWInfo_ObjStore_SWIFT::list_tempauth_data(Formatter& formatter,
+ const ConfigProxy& config,
+ RGWRados& store)
+{
+ formatter.open_object_section("tempauth");
+ formatter.dump_bool("account_acls", true);
+ formatter.close_section();
+}
+void RGWInfo_ObjStore_SWIFT::list_tempurl_data(Formatter& formatter,
+ const ConfigProxy& config,
+ RGWRados& store)
+{
+ formatter.open_object_section("tempurl");
+ formatter.open_array_section("methods");
+ formatter.dump_string("methodname", "GET");
+ formatter.dump_string("methodname", "HEAD");
+ formatter.dump_string("methodname", "PUT");
+ formatter.dump_string("methodname", "POST");
+ formatter.dump_string("methodname", "DELETE");
+ formatter.close_section();
+ formatter.close_section();
+}
+
+void RGWInfo_ObjStore_SWIFT::list_slo_data(Formatter& formatter,
+ const ConfigProxy& config,
+ RGWRados& store)
+{
+ formatter.open_object_section("slo");
+ formatter.dump_int("max_manifest_segments", config->rgw_max_slo_entries);
+ formatter.close_section();
+}
+
+bool RGWInfo_ObjStore_SWIFT::is_expired(const std::string& expires, CephContext* cct)
+{
+ string err;
+ const utime_t now = ceph_clock_now();
+ const uint64_t expiration = (uint64_t)strict_strtoll(expires.c_str(),
+ 10, &err);
+ if (!err.empty()) {
+ ldout(cct, 5) << "failed to parse siginfo_expires: " << err << dendl;
+ return true;
+ }
+
+ if (expiration <= (uint64_t)now.sec()) {
+ ldout(cct, 5) << "siginfo expired: " << expiration << " <= " << now.sec() << dendl;
+ return true;
+ }
+
+ return false;
+}
+
+
+void RGWFormPost::init(RGWRados* const store,
+ req_state* const s,
+ RGWHandler* const dialect_handler)
+{
+ prefix = std::move(s->object.name);
+ s->object = rgw_obj_key();
+
+ return RGWPostObj_ObjStore::init(store, s, dialect_handler);
+}
+
+std::size_t RGWFormPost::get_max_file_size() /*const*/
+{
+ std::string max_str = get_part_str(ctrl_parts, "max_file_size", "0");
+
+ std::string err;
+ const std::size_t max_file_size =
+ static_cast<uint64_t>(strict_strtoll(max_str.c_str(), 10, &err));
+
+ if (! err.empty()) {
+ ldout(s->cct, 5) << "failed to parse FormPost's max_file_size: " << err
+ << dendl;
+ return 0;
+ }
+
+ return max_file_size;
+}
+
+bool RGWFormPost::is_non_expired()
+{
+ std::string expires = get_part_str(ctrl_parts, "expires", "0");
+
+ std::string err;
+ const uint64_t expires_timestamp =
+ static_cast<uint64_t>(strict_strtoll(expires.c_str(), 10, &err));
+
+ if (! err.empty()) {
+ dout(5) << "failed to parse FormPost's expires: " << err << dendl;
+ return false;
+ }
+
+ const utime_t now = ceph_clock_now();
+ if (expires_timestamp <= static_cast<uint64_t>(now.sec())) {
+ dout(5) << "FormPost form expired: "
+ << expires_timestamp << " <= " << now.sec() << dendl;
+ return false;
+ }
+
+ return true;
+}
+
+bool RGWFormPost::is_integral()
+{
+ const std::string form_signature = get_part_str(ctrl_parts, "signature");
+
+ try {
+ get_owner_info(s, *s->user);
+ s->auth.identity = rgw::auth::transform_old_authinfo(s);
+ } catch (...) {
+ ldout(s->cct, 5) << "cannot get user_info of account's owner" << dendl;
+ return false;
+ }
+
+ for (const auto& kv : s->user->temp_url_keys) {
+ const int temp_url_key_num = kv.first;
+ const string& temp_url_key = kv.second;
+
+ if (temp_url_key.empty()) {
+ continue;
+ }
+
+ SignatureHelper sig_helper;
+ sig_helper.calc(temp_url_key,
+ s->info.request_uri,
+ get_part_str(ctrl_parts, "redirect"),
+ get_part_str(ctrl_parts, "max_file_size", "0"),
+ get_part_str(ctrl_parts, "max_file_count", "0"),
+ get_part_str(ctrl_parts, "expires", "0"));
+
+ const auto local_sig = sig_helper.get_signature();
+
+ ldout(s->cct, 20) << "FormPost signature [" << temp_url_key_num << "]"
+ << " (calculated): " << local_sig << dendl;
+
+ if (sig_helper.is_equal_to(form_signature)) {
+ return true;
+ } else {
+ ldout(s->cct, 5) << "FormPost's signature mismatch: "
+ << local_sig << " != " << form_signature << dendl;
+ }
+ }
+
+ return false;
+}
+
+void RGWFormPost::get_owner_info(const req_state* const s,
+ RGWUserInfo& owner_info) const
+{
+ /* We cannot use req_state::bucket_name because it isn't available
+ * now. It will be initialized in RGWHandler_REST_SWIFT::postauth_init(). */
+ const string& bucket_name = s->init_state.url_bucket;
+
+ /* TempURL in Formpost only requires that bucket name is specified. */
+ if (bucket_name.empty()) {
+ throw -EPERM;
+ }
+
+ string bucket_tenant;
+ if (!s->account_name.empty()) {
+ RGWUserInfo uinfo;
+ bool found = false;
+
+ const rgw_user uid(s->account_name);
+ if (uid.tenant.empty()) {
+ const rgw_user tenanted_uid(uid.id, uid.id);
+
+ if (rgw_get_user_info_by_uid(store, tenanted_uid, uinfo) >= 0) {
+ /* Succeeded. */
+ bucket_tenant = uinfo.user_id.tenant;
+ found = true;
+ }
+ }
+
+ if (!found && rgw_get_user_info_by_uid(store, uid, uinfo) < 0) {
+ throw -EPERM;
+ } else {
+ bucket_tenant = uinfo.user_id.tenant;
+ }
+ }
+
+ /* Need to get user info of bucket owner. */
+ RGWBucketInfo bucket_info;
+ int ret = store->get_bucket_info(*s->sysobj_ctx,
+ bucket_tenant, bucket_name,
+ bucket_info, nullptr);
+ if (ret < 0) {
+ throw ret;
+ }
+
+ ldout(s->cct, 20) << "temp url user (bucket owner): " << bucket_info.owner
+ << dendl;
+
+ if (rgw_get_user_info_by_uid(store, bucket_info.owner, owner_info) < 0) {
+ throw -EPERM;
+ }
+}
+
+int RGWFormPost::get_params()
+{
+ /* The parentt class extracts boundary info from the Content-Type. */
+ int ret = RGWPostObj_ObjStore::get_params();
+ if (ret < 0) {
+ return ret;
+ }
+
+ policy.create_default(s->user->user_id, s->user->display_name);
+
+ /* Let's start parsing the HTTP body by parsing each form part step-
+ * by-step till encountering the first part with file data. */
+ do {
+ struct post_form_part part;
+ ret = read_form_part_header(&part, stream_done);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (s->cct->_conf->subsys.should_gather<ceph_subsys_rgw, 20>()) {
+ ldout(s->cct, 20) << "read part header -- part.name="
+ << part.name << dendl;
+
+ for (const auto& pair : part.fields) {
+ ldout(s->cct, 20) << "field.name=" << pair.first << dendl;
+ ldout(s->cct, 20) << "field.val=" << pair.second.val << dendl;
+ ldout(s->cct, 20) << "field.params:" << dendl;
+
+ for (const auto& param_pair : pair.second.params) {
+ ldout(s->cct, 20) << " " << param_pair.first
+ << " -> " << param_pair.second << dendl;
+ }
+ }
+ }
+
+ if (stream_done) {
+ /* Unexpected here. */
+ err_msg = "Malformed request";
+ return -EINVAL;
+ }
+
+ const auto field_iter = part.fields.find("Content-Disposition");
+ if (std::end(part.fields) != field_iter &&
+ std::end(field_iter->second.params) != field_iter->second.params.find("filename")) {
+ /* First data part ahead. */
+ current_data_part = std::move(part);
+
+ /* Stop the iteration. We can assume that all control parts have been
+ * already parsed. The rest of HTTP body should contain data parts
+ * only. They will be picked up by ::get_data(). */
+ break;
+ } else {
+ /* Control part ahead. Receive, parse and store for later usage. */
+ bool boundary;
+ ret = read_data(part.data, s->cct->_conf->rgw_max_chunk_size,
+ boundary, stream_done);
+ if (ret < 0) {
+ return ret;
+ } else if (! boundary) {
+ err_msg = "Couldn't find boundary";
+ return -EINVAL;
+ }
+
+ ctrl_parts[part.name] = std::move(part);
+ }
+ } while (! stream_done);
+
+ min_len = 0;
+ max_len = get_max_file_size();
+
+ if (! current_data_part) {
+ err_msg = "FormPost: no files to process";
+ return -EINVAL;
+ }
+
+ if (! is_non_expired()) {
+ err_msg = "FormPost: Form Expired";
+ return -EPERM;
+ }
+
+ if (! is_integral()) {
+ err_msg = "FormPost: Invalid Signature";
+ return -EPERM;
+ }
+
+ return 0;
+}
+
+std::string RGWFormPost::get_current_filename() const
+{
+ try {
+ const auto& field = current_data_part->fields.at("Content-Disposition");
+ const auto iter = field.params.find("filename");
+
+ if (std::end(field.params) != iter) {
+ return prefix + iter->second;
+ }
+ } catch (std::out_of_range&) {
+ /* NOP */;
+ }
+
+ return prefix;
+}
+
+std::string RGWFormPost::get_current_content_type() const
+{
+ try {
+ const auto& field = current_data_part->fields.at("Content-Type");
+ return field.val;
+ } catch (std::out_of_range&) {
+ /* NOP */;
+ }
+
+ return std::string();
+}
+
+bool RGWFormPost::is_next_file_to_upload()
+{
+ if (! stream_done) {
+ /* We have at least one additional part in the body. */
+ struct post_form_part part;
+ int r = read_form_part_header(&part, stream_done);
+ if (r < 0) {
+ return false;
+ }
+
+ const auto field_iter = part.fields.find("Content-Disposition");
+ if (std::end(part.fields) != field_iter) {
+ const auto& params = field_iter->second.params;
+ const auto& filename_iter = params.find("filename");
+
+ if (std::end(params) != filename_iter && ! filename_iter->second.empty()) {
+ current_data_part = std::move(part);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+int RGWFormPost::get_data(ceph::bufferlist& bl, bool& again)
+{
+ bool boundary;
+
+ int r = read_data(bl, s->cct->_conf->rgw_max_chunk_size,
+ boundary, stream_done);
+ if (r < 0) {
+ return r;
+ }
+
+ /* Tell RGWPostObj::execute() that it has some data to put. */
+ again = !boundary;
+
+ return bl.length();
+}
+
+void RGWFormPost::send_response()
+{
+ std::string redirect = get_part_str(ctrl_parts, "redirect");
+ if (! redirect.empty()) {
+ op_ret = STATUS_REDIRECT;
+ }
+
+ set_req_state_err(s, op_ret);
+ s->err.err_code = err_msg;
+ dump_errno(s);
+ if (! redirect.empty()) {
+ dump_redirect(s, redirect);
+ }
+ end_header(s, this);
+}
+
+bool RGWFormPost::is_formpost_req(req_state* const s)
+{
+ std::string content_type;
+ std::map<std::string, std::string> params;
+
+ parse_boundary_params(s->info.env->get("CONTENT_TYPE", ""),
+ content_type, params);
+
+ return boost::algorithm::iequals(content_type, "multipart/form-data") &&
+ params.count("boundary") > 0;
+}
+
+
+RGWOp *RGWHandler_REST_Service_SWIFT::op_get()
+{
+ return new RGWListBuckets_ObjStore_SWIFT;
+}
+
+RGWOp *RGWHandler_REST_Service_SWIFT::op_head()
+{
+ return new RGWStatAccount_ObjStore_SWIFT;
+}
+
+RGWOp *RGWHandler_REST_Service_SWIFT::op_put()
+{
+ if (s->info.args.exists("extract-archive")) {
+ return new RGWBulkUploadOp_ObjStore_SWIFT;
+ }
+ return nullptr;
+}
+
+RGWOp *RGWHandler_REST_Service_SWIFT::op_post()
+{
+ if (s->info.args.exists("bulk-delete")) {
+ return new RGWBulkDelete_ObjStore_SWIFT;
+ }
+ return new RGWPutMetadataAccount_ObjStore_SWIFT;
+}
+
+RGWOp *RGWHandler_REST_Service_SWIFT::op_delete()
+{
+ if (s->info.args.exists("bulk-delete")) {
+ return new RGWBulkDelete_ObjStore_SWIFT;
+ }
+ return NULL;
+}
+
+int RGWSwiftWebsiteHandler::serve_errordoc(const int http_ret,
+ const std::string error_doc)
+{
+ /* Try to throw it all away. */
+ s->formatter->reset();
+
+ class RGWGetErrorPage : public RGWGetObj_ObjStore_SWIFT {
+ public:
+ RGWGetErrorPage(RGWRados* const store,
+ RGWHandler_REST* const handler,
+ req_state* const s,
+ const int http_ret) {
+ /* Calling a virtual from the base class is safe as the subobject should
+ * be properly initialized and we haven't overridden the init method. */
+ init(store, s, handler);
+ set_get_data(true);
+ set_custom_http_response(http_ret);
+ }
+
+ int error_handler(const int err_no,
+ std::string* const error_content) override {
+ /* Enforce that any error generated while getting the error page will
+ * not be send to a client. This allows us to recover from the double
+ * fault situation by sending the original message. */
+ return 0;
+ }
+ } get_errpage_op(store, handler, s, http_ret);
+
+ s->object = std::to_string(http_ret) + error_doc;
+
+ RGWOp* newop = &get_errpage_op;
+ RGWRequest req(0);
+ return rgw_process_authenticated(handler, newop, &req, s, true);
+}
+
+int RGWSwiftWebsiteHandler::error_handler(const int err_no,
+ std::string* const error_content)
+{
+ const auto& ws_conf = s->bucket_info.website_conf;
+
+ if (can_be_website_req() && ! ws_conf.error_doc.empty()) {
+ set_req_state_err(s, err_no);
+ return serve_errordoc(s->err.http_ret, ws_conf.error_doc);
+ }
+
+ /* Let's go to the default, no-op handler. */
+ return err_no;
+}
+
+bool RGWSwiftWebsiteHandler::is_web_mode() const
+{
+ const boost::string_ref webmode = s->info.env->get("HTTP_X_WEB_MODE", "");
+ return boost::algorithm::iequals(webmode, "true");
+}
+
+bool RGWSwiftWebsiteHandler::can_be_website_req() const
+{
+ /* Static website works only with the GET or HEAD method. Nothing more. */
+ static const std::set<boost::string_ref> ws_methods = { "GET", "HEAD" };
+ if (ws_methods.count(s->info.method) == 0) {
+ return false;
+ }
+
+ /* We also need to handle early failures from the auth system. In such cases
+ * req_state::auth.identity may be empty. Let's treat that the same way as
+ * the anonymous access. */
+ if (! s->auth.identity) {
+ return true;
+ }
+
+ /* Swift serves websites only for anonymous requests unless client explicitly
+ * requested this behaviour by supplying X-Web-Mode HTTP header set to true. */
+ if (s->auth.identity->is_anonymous() || is_web_mode()) {
+ return true;
+ }
+
+ return false;
+}
+
+RGWOp* RGWSwiftWebsiteHandler::get_ws_redirect_op()
+{
+ class RGWMovedPermanently: public RGWOp {
+ const std::string location;
+ public:
+ explicit RGWMovedPermanently(const std::string& location)
+ : location(location) {
+ }
+
+ int verify_permission() override {
+ return 0;
+ }
+
+ void execute() override {
+ op_ret = -ERR_PERMANENT_REDIRECT;
+ return;
+ }
+
+ void send_response() override {
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+ dump_content_length(s, 0);
+ dump_redirect(s, location);
+ end_header(s, this);
+ }
+
+ const char* name() const override {
+ return "RGWMovedPermanently";
+ }
+ };
+
+ return new RGWMovedPermanently(s->info.request_uri + '/');
+}
+
+RGWOp* RGWSwiftWebsiteHandler::get_ws_index_op()
+{
+ /* Retarget to get obj on requested index file. */
+ if (! s->object.empty()) {
+ s->object = s->object.name +
+ s->bucket_info.website_conf.get_index_doc();
+ } else {
+ s->object = s->bucket_info.website_conf.get_index_doc();
+ }
+
+ auto getop = new RGWGetObj_ObjStore_SWIFT;
+ getop->set_get_data(boost::algorithm::equals("GET", s->info.method));
+
+ return getop;
+}
+
+RGWOp* RGWSwiftWebsiteHandler::get_ws_listing_op()
+{
+ class RGWWebsiteListing : public RGWListBucket_ObjStore_SWIFT {
+ const std::string prefix_override;
+
+ int get_params() override {
+ prefix = prefix_override;
+ max = default_max;
+ delimiter = "/";
+ return 0;
+ }
+
+ void send_response() override {
+ /* Generate the header now. */
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+ dump_container_metadata(s, bucket, bucket_quota,
+ s->bucket_info.website_conf);
+ end_header(s, this, "text/html");
+ if (op_ret < 0) {
+ return;
+ }
+
+ /* Now it's the time to start generating HTML bucket listing.
+ * All the crazy stuff with crafting tags will be delegated to
+ * RGWSwiftWebsiteListingFormatter. */
+ std::stringstream ss;
+ RGWSwiftWebsiteListingFormatter htmler(ss, prefix);
+
+ const auto& ws_conf = s->bucket_info.website_conf;
+ htmler.generate_header(s->decoded_uri,
+ ws_conf.listing_css_doc);
+
+ for (const auto& pair : common_prefixes) {
+ std::string subdir_name = pair.first;
+ if (! subdir_name.empty()) {
+ /* To be compliant with Swift we need to remove the trailing
+ * slash. */
+ subdir_name.pop_back();
+ }
+
+ htmler.dump_subdir(subdir_name);
+ }
+
+ for (const rgw_bucket_dir_entry& obj : objs) {
+ if (! common_prefixes.count(obj.key.name + '/')) {
+ htmler.dump_object(obj);
+ }
+ }
+
+ htmler.generate_footer();
+ dump_body(s, ss.str());
+ }
+ public:
+ /* Taking prefix_override by value to leverage std::string r-value ref
+ * ctor and thus avoid extra memory copying/increasing ref counter. */
+ explicit RGWWebsiteListing(std::string prefix_override)
+ : prefix_override(std::move(prefix_override)) {
+ }
+ };
+
+ std::string prefix = std::move(s->object.name);
+ s->object = rgw_obj_key();
+
+ return new RGWWebsiteListing(std::move(prefix));
+}
+
+bool RGWSwiftWebsiteHandler::is_web_dir() const
+{
+ std::string subdir_name = url_decode(s->object.name);
+
+ /* Remove character from the subdir name if it is "/". */
+ if (subdir_name.empty()) {
+ return false;
+ } else if (subdir_name.back() == '/') {
+ subdir_name.pop_back();
+ if (subdir_name.empty()) {
+ return false;
+ }
+ }
+
+ rgw_obj obj(s->bucket, std::move(subdir_name));
+
+ /* First, get attrset of the object we'll try to retrieve. */
+ RGWObjectCtx& obj_ctx = *static_cast<RGWObjectCtx *>(s->obj_ctx);
+ obj_ctx.set_atomic(obj);
+ obj_ctx.set_prefetch_data(obj);
+
+ RGWObjState* state = nullptr;
+ if (store->get_obj_state(&obj_ctx, s->bucket_info, obj, &state, false) < 0) {
+ return false;
+ }
+
+ /* A nonexistent object cannot be a considered as a marker representing
+ * the emulation of catalog in FS hierarchy. */
+ if (! state->exists) {
+ return false;
+ }
+
+ /* Decode the content type. */
+ std::string content_type;
+ get_contype_from_attrs(state->attrset, content_type);
+
+ const auto& ws_conf = s->bucket_info.website_conf;
+ const std::string subdir_marker = ws_conf.subdir_marker.empty()
+ ? "application/directory"
+ : ws_conf.subdir_marker;
+ return subdir_marker == content_type && state->size <= 1;
+}
+
+bool RGWSwiftWebsiteHandler::is_index_present(const std::string& index)
+{
+ rgw_obj obj(s->bucket, index);
+
+ RGWObjectCtx& obj_ctx = *static_cast<RGWObjectCtx *>(s->obj_ctx);
+ obj_ctx.set_atomic(obj);
+ obj_ctx.set_prefetch_data(obj);
+
+ RGWObjState* state = nullptr;
+ if (store->get_obj_state(&obj_ctx, s->bucket_info, obj, &state, false) < 0) {
+ return false;
+ }
+
+ /* A nonexistent object cannot be a considered as a viable index. We will
+ * try to list the bucket or - if this is impossible - return an error. */
+ return state->exists;
+}
+
+int RGWSwiftWebsiteHandler::retarget_bucket(RGWOp* op, RGWOp** new_op)
+{
+ ldout(s->cct, 10) << "Starting retarget" << dendl;
+ RGWOp* op_override = nullptr;
+
+ /* In Swift static web content is served if the request is anonymous or
+ * has X-Web-Mode HTTP header specified to true. */
+ if (can_be_website_req()) {
+ const auto& ws_conf = s->bucket_info.website_conf;
+ const auto& index = s->bucket_info.website_conf.get_index_doc();
+
+ if (s->decoded_uri.back() != '/') {
+ op_override = get_ws_redirect_op();
+ } else if (! index.empty() && is_index_present(index)) {
+ op_override = get_ws_index_op();
+ } else if (ws_conf.listing_enabled) {
+ op_override = get_ws_listing_op();
+ }
+ }
+
+ if (op_override) {
+ handler->put_op(op);
+ op_override->init(store, s, handler);
+
+ *new_op = op_override;
+ } else {
+ *new_op = op;
+ }
+
+ /* Return 404 Not Found is the request has web mode enforced but we static web
+ * wasn't able to serve it accordingly. */
+ return ! op_override && is_web_mode() ? -ENOENT : 0;
+}
+
+int RGWSwiftWebsiteHandler::retarget_object(RGWOp* op, RGWOp** new_op)
+{
+ ldout(s->cct, 10) << "Starting object retarget" << dendl;
+ RGWOp* op_override = nullptr;
+
+ /* In Swift static web content is served if the request is anonymous or
+ * has X-Web-Mode HTTP header specified to true. */
+ if (can_be_website_req() && is_web_dir()) {
+ const auto& ws_conf = s->bucket_info.website_conf;
+ const auto& index = s->bucket_info.website_conf.get_index_doc();
+
+ if (s->decoded_uri.back() != '/') {
+ op_override = get_ws_redirect_op();
+ } else if (! index.empty() && is_index_present(index)) {
+ op_override = get_ws_index_op();
+ } else if (ws_conf.listing_enabled) {
+ op_override = get_ws_listing_op();
+ }
+ } else {
+ /* A regular request or the specified object isn't a subdirectory marker.
+ * We don't need any re-targeting. Error handling (like sending a custom
+ * error page) will be performed by error_handler of the actual RGWOp. */
+ return 0;
+ }
+
+ if (op_override) {
+ handler->put_op(op);
+ op_override->init(store, s, handler);
+
+ *new_op = op_override;
+ } else {
+ *new_op = op;
+ }
+
+ /* Return 404 Not Found if we aren't able to re-target for subdir marker. */
+ return ! op_override ? -ENOENT : 0;
+}
+
+
+RGWOp *RGWHandler_REST_Bucket_SWIFT::get_obj_op(bool get_data)
+{
+ if (is_acl_op()) {
+ return new RGWGetACLs_ObjStore_SWIFT;
+ }
+
+ if (get_data)
+ return new RGWListBucket_ObjStore_SWIFT;
+ else
+ return new RGWStatBucket_ObjStore_SWIFT;
+}
+
+RGWOp *RGWHandler_REST_Bucket_SWIFT::op_get()
+{
+ return get_obj_op(true);
+}
+
+RGWOp *RGWHandler_REST_Bucket_SWIFT::op_head()
+{
+ return get_obj_op(false);
+}
+
+RGWOp *RGWHandler_REST_Bucket_SWIFT::op_put()
+{
+ if (is_acl_op()) {
+ return new RGWPutACLs_ObjStore_SWIFT;
+ }
+ if(s->info.args.exists("extract-archive")) {
+ return new RGWBulkUploadOp_ObjStore_SWIFT;
+ }
+ return new RGWCreateBucket_ObjStore_SWIFT;
+}
+
+RGWOp *RGWHandler_REST_Bucket_SWIFT::op_delete()
+{
+ return new RGWDeleteBucket_ObjStore_SWIFT;
+}
+
+RGWOp *RGWHandler_REST_Bucket_SWIFT::op_post()
+{
+ if (RGWFormPost::is_formpost_req(s)) {
+ return new RGWFormPost;
+ } else {
+ return new RGWPutMetadataBucket_ObjStore_SWIFT;
+ }
+}
+
+RGWOp *RGWHandler_REST_Bucket_SWIFT::op_options()
+{
+ return new RGWOptionsCORS_ObjStore_SWIFT;
+}
+
+
+RGWOp *RGWHandler_REST_Obj_SWIFT::get_obj_op(bool get_data)
+{
+ if (is_acl_op()) {
+ return new RGWGetACLs_ObjStore_SWIFT;
+ }
+
+ RGWGetObj_ObjStore_SWIFT *get_obj_op = new RGWGetObj_ObjStore_SWIFT;
+ get_obj_op->set_get_data(get_data);
+ return get_obj_op;
+}
+
+RGWOp *RGWHandler_REST_Obj_SWIFT::op_get()
+{
+ return get_obj_op(true);
+}
+
+RGWOp *RGWHandler_REST_Obj_SWIFT::op_head()
+{
+ return get_obj_op(false);
+}
+
+RGWOp *RGWHandler_REST_Obj_SWIFT::op_put()
+{
+ if (is_acl_op()) {
+ return new RGWPutACLs_ObjStore_SWIFT;
+ }
+ if(s->info.args.exists("extract-archive")) {
+ return new RGWBulkUploadOp_ObjStore_SWIFT;
+ }
+ if (s->init_state.src_bucket.empty())
+ return new RGWPutObj_ObjStore_SWIFT;
+ else
+ return new RGWCopyObj_ObjStore_SWIFT;
+}
+
+RGWOp *RGWHandler_REST_Obj_SWIFT::op_delete()
+{
+ return new RGWDeleteObj_ObjStore_SWIFT;
+}
+
+RGWOp *RGWHandler_REST_Obj_SWIFT::op_post()
+{
+ if (RGWFormPost::is_formpost_req(s)) {
+ return new RGWFormPost;
+ } else {
+ return new RGWPutMetadataObject_ObjStore_SWIFT;
+ }
+}
+
+RGWOp *RGWHandler_REST_Obj_SWIFT::op_copy()
+{
+ return new RGWCopyObj_ObjStore_SWIFT;
+}
+
+RGWOp *RGWHandler_REST_Obj_SWIFT::op_options()
+{
+ return new RGWOptionsCORS_ObjStore_SWIFT;
+}
+
+
+int RGWHandler_REST_SWIFT::authorize(const DoutPrefixProvider *dpp)
+{
+ return rgw::auth::Strategy::apply(dpp, auth_strategy, s);
+}
+
+int RGWHandler_REST_SWIFT::postauth_init()
+{
+ struct req_init_state* t = &s->init_state;
+
+ /* XXX Stub this until Swift Auth sets account into URL. */
+ s->bucket_tenant = s->user->user_id.tenant;
+ s->bucket_name = t->url_bucket;
+
+ dout(10) << "s->object=" <<
+ (!s->object.empty() ? s->object : rgw_obj_key("<NULL>"))
+ << " s->bucket="
+ << rgw_make_bucket_entry_name(s->bucket_tenant, s->bucket_name)
+ << dendl;
+
+ int ret;
+ ret = rgw_validate_tenant_name(s->bucket_tenant);
+ if (ret)
+ return ret;
+ ret = validate_bucket_name(s->bucket_name);
+ if (ret)
+ return ret;
+ ret = validate_object_name(s->object.name);
+ if (ret)
+ return ret;
+
+ if (!t->src_bucket.empty()) {
+ /*
+ * We don't allow cross-tenant copy at present. It requires account
+ * names in the URL for Swift.
+ */
+ s->src_tenant_name = s->user->user_id.tenant;
+ s->src_bucket_name = t->src_bucket;
+
+ ret = validate_bucket_name(s->src_bucket_name);
+ if (ret < 0) {
+ return ret;
+ }
+ ret = validate_object_name(s->src_object.name);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+int RGWHandler_REST_SWIFT::validate_bucket_name(const string& bucket)
+{
+ const size_t len = bucket.size();
+
+ if (len > MAX_BUCKET_NAME_LEN) {
+ /* Bucket Name too long. Generate custom error message and bind it
+ * to an R-value reference. */
+ const auto msg = boost::str(
+ boost::format("Container name length of %lld longer than %lld")
+ % len % int(MAX_BUCKET_NAME_LEN));
+ set_req_state_err(s, ERR_INVALID_BUCKET_NAME, msg);
+ return -ERR_INVALID_BUCKET_NAME;
+ }
+
+ const auto ret = RGWHandler_REST::validate_bucket_name(bucket);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (len == 0)
+ return 0;
+
+ if (bucket[0] == '.')
+ return -ERR_INVALID_BUCKET_NAME;
+
+ if (check_utf8(bucket.c_str(), len))
+ return -ERR_INVALID_UTF8;
+
+ const char *s = bucket.c_str();
+
+ for (size_t i = 0; i < len; ++i, ++s) {
+ if (*(unsigned char *)s == 0xff)
+ return -ERR_INVALID_BUCKET_NAME;
+ if (*(unsigned char *)s == '/')
+ return -ERR_INVALID_BUCKET_NAME;
+ }
+
+ return 0;
+}
+
+static void next_tok(string& str, string& tok, char delim)
+{
+ if (str.size() == 0) {
+ tok = "";
+ return;
+ }
+ tok = str;
+ int pos = str.find(delim);
+ if (pos > 0) {
+ tok = str.substr(0, pos);
+ str = str.substr(pos + 1);
+ } else {
+ str = "";
+ }
+}
+
+int RGWHandler_REST_SWIFT::init_from_header(struct req_state* const s,
+ const std::string& frontend_prefix)
+{
+ string req;
+ string first;
+
+ s->prot_flags |= RGW_REST_SWIFT;
+
+ char reqbuf[frontend_prefix.length() + s->decoded_uri.length() + 1];
+ sprintf(reqbuf, "%s%s", frontend_prefix.c_str(), s->decoded_uri.c_str());
+ const char *req_name = reqbuf;
+
+ const char *p;
+
+ if (*req_name == '?') {
+ p = req_name;
+ } else {
+ p = s->info.request_params.c_str();
+ }
+
+ s->info.args.set(p);
+ s->info.args.parse();
+
+ /* Skip the leading slash of URL hierarchy. */
+ if (req_name[0] != '/') {
+ return 0;
+ } else {
+ req_name++;
+ }
+
+ if ('\0' == req_name[0]) {
+ return g_conf()->rgw_swift_url_prefix == "/" ? -ERR_BAD_URL : 0;
+ }
+
+ req = req_name;
+
+ size_t pos = req.find('/');
+ if (std::string::npos != pos && g_conf()->rgw_swift_url_prefix != "/") {
+ bool cut_url = g_conf()->rgw_swift_url_prefix.length();
+ first = req.substr(0, pos);
+
+ if (first.compare(g_conf()->rgw_swift_url_prefix) == 0) {
+ if (cut_url) {
+ /* Rewind to the "v1/..." part. */
+ next_tok(req, first, '/');
+ }
+ }
+ } else if (req.compare(g_conf()->rgw_swift_url_prefix) == 0) {
+ s->formatter = new RGWFormatter_Plain;
+ return -ERR_BAD_URL;
+ } else {
+ first = req;
+ }
+
+ std::string tenant_path;
+ if (! g_conf()->rgw_swift_tenant_name.empty()) {
+ tenant_path = "/AUTH_";
+ tenant_path.append(g_conf()->rgw_swift_tenant_name);
+ }
+
+ /* verify that the request_uri conforms with what's expected */
+ char buf[g_conf()->rgw_swift_url_prefix.length() + 16 + tenant_path.length()];
+ int blen;
+ if (g_conf()->rgw_swift_url_prefix == "/") {
+ blen = sprintf(buf, "/v1%s", tenant_path.c_str());
+ } else {
+ blen = sprintf(buf, "/%s/v1%s",
+ g_conf()->rgw_swift_url_prefix.c_str(), tenant_path.c_str());
+ }
+
+ if (strncmp(reqbuf, buf, blen) != 0) {
+ return -ENOENT;
+ }
+
+ int ret = allocate_formatter(s, RGW_FORMAT_PLAIN, true);
+ if (ret < 0)
+ return ret;
+
+ string ver;
+
+ next_tok(req, ver, '/');
+
+ if (!tenant_path.empty() || g_conf()->rgw_swift_account_in_url) {
+ string account_name;
+ next_tok(req, account_name, '/');
+
+ /* Erase all pre-defined prefixes like "AUTH_" or "KEY_". */
+ const vector<string> skipped_prefixes = { "AUTH_", "KEY_" };
+
+ for (const auto pfx : skipped_prefixes) {
+ const size_t comp_len = min(account_name.length(), pfx.length());
+ if (account_name.compare(0, comp_len, pfx) == 0) {
+ /* Prefix is present. Drop it. */
+ account_name = account_name.substr(comp_len);
+ break;
+ }
+ }
+
+ if (account_name.empty()) {
+ return -ERR_PRECONDITION_FAILED;
+ } else {
+ s->account_name = account_name;
+ }
+ }
+
+ next_tok(req, first, '/');
+
+ dout(10) << "ver=" << ver << " first=" << first << " req=" << req << dendl;
+ if (first.size() == 0)
+ return 0;
+
+ s->info.effective_uri = "/" + first;
+
+ // Save bucket to tide us over until token is parsed.
+ s->init_state.url_bucket = first;
+
+ if (req.size()) {
+ s->object =
+ rgw_obj_key(req, s->info.env->get("HTTP_X_OBJECT_VERSION_ID", "")); /* rgw swift extension */
+ s->info.effective_uri.append("/" + s->object.name);
+ }
+
+ return 0;
+}
+
+int RGWHandler_REST_SWIFT::init(RGWRados* store, struct req_state* s,
+ rgw::io::BasicClient *cio)
+{
+ struct req_init_state *t = &s->init_state;
+
+ s->dialect = "swift";
+
+ std::string copy_source = s->info.env->get("HTTP_X_COPY_FROM", "");
+ if (! copy_source.empty()) {
+ bool result = RGWCopyObj::parse_copy_location(copy_source, t->src_bucket,
+ s->src_object);
+ if (!result)
+ return -ERR_BAD_URL;
+ }
+
+ if (s->op == OP_COPY) {
+ std::string req_dest = s->info.env->get("HTTP_DESTINATION", "");
+ if (req_dest.empty())
+ return -ERR_BAD_URL;
+
+ std::string dest_bucket_name;
+ rgw_obj_key dest_obj_key;
+ bool result =
+ RGWCopyObj::parse_copy_location(req_dest, dest_bucket_name,
+ dest_obj_key);
+ if (!result)
+ return -ERR_BAD_URL;
+
+ std::string dest_object = dest_obj_key.name;
+
+ /* convert COPY operation into PUT */
+ t->src_bucket = t->url_bucket;
+ s->src_object = s->object;
+ t->url_bucket = dest_bucket_name;
+ s->object = rgw_obj_key(dest_object);
+ s->op = OP_PUT;
+ }
+
+ s->info.storage_class = s->info.env->get("HTTP_X_OBJECT_STORAGE_CLASS", "");
+
+ return RGWHandler_REST::init(store, s, cio);
+}
+
+RGWHandler_REST*
+RGWRESTMgr_SWIFT::get_handler(struct req_state* const s,
+ const rgw::auth::StrategyRegistry& auth_registry,
+ const std::string& frontend_prefix)
+{
+ int ret = RGWHandler_REST_SWIFT::init_from_header(s, frontend_prefix);
+ if (ret < 0) {
+ ldout(s->cct, 10) << "init_from_header returned err=" << ret << dendl;
+ return nullptr;
+ }
+
+ const auto& auth_strategy = auth_registry.get_swift();
+
+ if (s->init_state.url_bucket.empty()) {
+ return new RGWHandler_REST_Service_SWIFT(auth_strategy);
+ }
+
+ if (s->object.empty()) {
+ return new RGWHandler_REST_Bucket_SWIFT(auth_strategy);
+ }
+
+ return new RGWHandler_REST_Obj_SWIFT(auth_strategy);
+}
+
+RGWHandler_REST* RGWRESTMgr_SWIFT_Info::get_handler(
+ struct req_state* const s,
+ const rgw::auth::StrategyRegistry& auth_registry,
+ const std::string& frontend_prefix)
+{
+ s->prot_flags |= RGW_REST_SWIFT;
+ const auto& auth_strategy = auth_registry.get_swift();
+ return new RGWHandler_REST_SWIFT_Info(auth_strategy);
+}
diff --git a/src/rgw/rgw_rest_swift.h b/src/rgw/rgw_rest_swift.h
new file mode 100644
index 00000000..2f902c46
--- /dev/null
+++ b/src/rgw/rgw_rest_swift.h
@@ -0,0 +1,681 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_REST_SWIFT_H
+#define CEPH_RGW_REST_SWIFT_H
+#define TIME_BUF_SIZE 128
+
+#include <boost/optional.hpp>
+#include <boost/utility/typed_in_place_factory.hpp>
+
+#include "rgw_op.h"
+#include "rgw_rest.h"
+#include "rgw_swift_auth.h"
+#include "rgw_http_errors.h"
+
+#include <boost/utility/string_ref.hpp>
+
+class RGWGetObj_ObjStore_SWIFT : public RGWGetObj_ObjStore {
+ int custom_http_ret = 0;
+public:
+ RGWGetObj_ObjStore_SWIFT() {}
+ ~RGWGetObj_ObjStore_SWIFT() override {}
+
+ int verify_permission() override;
+ int get_params() override;
+ int send_response_data_error() override;
+ int send_response_data(bufferlist& bl, off_t ofs, off_t len) override;
+
+ void set_custom_http_response(const int http_ret) {
+ custom_http_ret = http_ret;
+ }
+
+ bool need_object_expiration() override {
+ return true;
+ }
+};
+
+class RGWListBuckets_ObjStore_SWIFT : public RGWListBuckets_ObjStore {
+ bool need_stats;
+ bool wants_reversed;
+ std::string prefix;
+ std::vector<RGWUserBuckets> reverse_buffer;
+
+ uint64_t get_default_max() const override {
+ return 0;
+ }
+
+public:
+ RGWListBuckets_ObjStore_SWIFT()
+ : need_stats(true),
+ wants_reversed(false) {
+ }
+ ~RGWListBuckets_ObjStore_SWIFT() override {}
+
+ int get_params() override;
+ void handle_listing_chunk(RGWUserBuckets&& buckets) override;
+ void send_response_begin(bool has_buckets) override;
+ void send_response_data(RGWUserBuckets& buckets) override;
+ void send_response_data_reversed(RGWUserBuckets& buckets);
+ void dump_bucket_entry(const RGWBucketEnt& obj);
+ void send_response_end() override;
+
+ bool should_get_stats() override { return need_stats; }
+ bool supports_account_metadata() override { return true; }
+};
+
+class RGWListBucket_ObjStore_SWIFT : public RGWListBucket_ObjStore {
+ string path;
+public:
+ RGWListBucket_ObjStore_SWIFT() {
+ default_max = 10000;
+ }
+ ~RGWListBucket_ObjStore_SWIFT() override {}
+
+ int get_params() override;
+ void send_response() override;
+ bool need_container_stats() override { return true; }
+};
+
+class RGWStatAccount_ObjStore_SWIFT : public RGWStatAccount_ObjStore {
+ map<string, bufferlist> attrs;
+public:
+ RGWStatAccount_ObjStore_SWIFT() {
+ }
+ ~RGWStatAccount_ObjStore_SWIFT() override {}
+
+ void execute() override;
+ void send_response() override;
+};
+
+class RGWStatBucket_ObjStore_SWIFT : public RGWStatBucket_ObjStore {
+public:
+ RGWStatBucket_ObjStore_SWIFT() {}
+ ~RGWStatBucket_ObjStore_SWIFT() override {}
+
+ void send_response() override;
+};
+
+class RGWCreateBucket_ObjStore_SWIFT : public RGWCreateBucket_ObjStore {
+protected:
+ bool need_metadata_upload() const override { return true; }
+public:
+ RGWCreateBucket_ObjStore_SWIFT() {}
+ ~RGWCreateBucket_ObjStore_SWIFT() override {}
+
+ int get_params() override;
+ void send_response() override;
+};
+
+class RGWDeleteBucket_ObjStore_SWIFT : public RGWDeleteBucket_ObjStore {
+public:
+ RGWDeleteBucket_ObjStore_SWIFT() {}
+ ~RGWDeleteBucket_ObjStore_SWIFT() override {}
+
+ void send_response() override;
+};
+
+class RGWPutObj_ObjStore_SWIFT : public RGWPutObj_ObjStore {
+ string lo_etag;
+public:
+ RGWPutObj_ObjStore_SWIFT() {}
+ ~RGWPutObj_ObjStore_SWIFT() override {}
+
+ int update_slo_segment_size(rgw_slo_entry& entry);
+
+ int verify_permission() override;
+ int get_params() override;
+ void send_response() override;
+};
+
+class RGWPutMetadataAccount_ObjStore_SWIFT : public RGWPutMetadataAccount_ObjStore {
+public:
+ RGWPutMetadataAccount_ObjStore_SWIFT() {}
+ ~RGWPutMetadataAccount_ObjStore_SWIFT() override {}
+
+ int get_params() override;
+ void send_response() override;
+};
+
+class RGWPutMetadataBucket_ObjStore_SWIFT : public RGWPutMetadataBucket_ObjStore {
+public:
+ RGWPutMetadataBucket_ObjStore_SWIFT() {}
+ ~RGWPutMetadataBucket_ObjStore_SWIFT() override {}
+
+ int get_params() override;
+ void send_response() override;
+};
+
+class RGWPutMetadataObject_ObjStore_SWIFT : public RGWPutMetadataObject_ObjStore {
+public:
+ RGWPutMetadataObject_ObjStore_SWIFT() {}
+ ~RGWPutMetadataObject_ObjStore_SWIFT() override {}
+
+ int get_params() override;
+ void send_response() override;
+ bool need_object_expiration() override { return true; }
+};
+
+class RGWDeleteObj_ObjStore_SWIFT : public RGWDeleteObj_ObjStore {
+public:
+ RGWDeleteObj_ObjStore_SWIFT() {}
+ ~RGWDeleteObj_ObjStore_SWIFT() override {}
+
+ int verify_permission() override;
+ int get_params() override;
+ bool need_object_expiration() override { return true; }
+ void send_response() override;
+};
+
+class RGWCopyObj_ObjStore_SWIFT : public RGWCopyObj_ObjStore {
+ bool sent_header;
+protected:
+ void dump_copy_info();
+public:
+ RGWCopyObj_ObjStore_SWIFT() : sent_header(false) {}
+ ~RGWCopyObj_ObjStore_SWIFT() override {}
+
+ int init_dest_policy() override;
+ int get_params() override;
+ void send_response() override;
+ void send_partial_response(off_t ofs) override;
+};
+
+class RGWGetACLs_ObjStore_SWIFT : public RGWGetACLs_ObjStore {
+public:
+ RGWGetACLs_ObjStore_SWIFT() {}
+ ~RGWGetACLs_ObjStore_SWIFT() override {}
+
+ void send_response() override {}
+};
+
+class RGWPutACLs_ObjStore_SWIFT : public RGWPutACLs_ObjStore {
+public:
+ RGWPutACLs_ObjStore_SWIFT() : RGWPutACLs_ObjStore() {}
+ ~RGWPutACLs_ObjStore_SWIFT() override {}
+
+ void send_response() override {}
+};
+
+class RGWOptionsCORS_ObjStore_SWIFT : public RGWOptionsCORS_ObjStore {
+public:
+ RGWOptionsCORS_ObjStore_SWIFT() {}
+ ~RGWOptionsCORS_ObjStore_SWIFT() override {}
+
+ void send_response() override;
+};
+
+class RGWBulkDelete_ObjStore_SWIFT : public RGWBulkDelete_ObjStore {
+public:
+ RGWBulkDelete_ObjStore_SWIFT() {}
+ ~RGWBulkDelete_ObjStore_SWIFT() override {}
+
+ int get_data(std::list<RGWBulkDelete::acct_path_t>& items,
+ bool * is_truncated) override;
+ void send_response() override;
+};
+
+class RGWBulkUploadOp_ObjStore_SWIFT : public RGWBulkUploadOp_ObjStore {
+ size_t conlen;
+ size_t curpos;
+
+public:
+ RGWBulkUploadOp_ObjStore_SWIFT()
+ : conlen(0),
+ curpos(0) {
+ }
+ ~RGWBulkUploadOp_ObjStore_SWIFT() = default;
+
+ std::unique_ptr<StreamGetter> create_stream() override;
+ void send_response() override;
+};
+
+class RGWInfo_ObjStore_SWIFT : public RGWInfo_ObjStore {
+protected:
+ struct info
+ {
+ bool is_admin_info;
+ function<void (Formatter&, const ConfigProxy&, RGWRados&)> list_data;
+ };
+
+ static const vector<pair<string, struct info>> swift_info;
+public:
+ RGWInfo_ObjStore_SWIFT() {}
+ ~RGWInfo_ObjStore_SWIFT() override {}
+
+ void execute() override;
+ void send_response() override;
+ static void list_swift_data(Formatter& formatter, const ConfigProxy& config, RGWRados& store);
+ static void list_tempauth_data(Formatter& formatter, const ConfigProxy& config, RGWRados& store);
+ static void list_tempurl_data(Formatter& formatter, const ConfigProxy& config, RGWRados& store);
+ static void list_slo_data(Formatter& formatter, const ConfigProxy& config, RGWRados& store);
+ static bool is_expired(const std::string& expires, CephContext* cct);
+};
+
+
+class RGWFormPost : public RGWPostObj_ObjStore {
+ std::string get_current_filename() const override;
+ std::string get_current_content_type() const override;
+ std::size_t get_max_file_size() /*const*/;
+ bool is_next_file_to_upload() override;
+ bool is_integral();
+ bool is_non_expired();
+ void get_owner_info(const req_state* s,
+ RGWUserInfo& owner_info) const;
+
+ parts_collection_t ctrl_parts;
+ boost::optional<post_form_part> current_data_part;
+ std::string prefix;
+ bool stream_done = false;
+
+ class SignatureHelper;
+public:
+ RGWFormPost() = default;
+ ~RGWFormPost() = default;
+
+ void init(RGWRados* store,
+ req_state* s,
+ RGWHandler* dialect_handler) override;
+
+ int get_params() override;
+ int get_data(ceph::bufferlist& bl, bool& again) override;
+ void send_response() override;
+
+ static bool is_formpost_req(req_state* const s);
+};
+
+class RGWFormPost::SignatureHelper
+{
+private:
+ static constexpr uint32_t output_size =
+ CEPH_CRYPTO_HMACSHA1_DIGESTSIZE * 2 + 1;
+
+ unsigned char dest[CEPH_CRYPTO_HMACSHA1_DIGESTSIZE]; // 20
+ char dest_str[output_size];
+
+public:
+ SignatureHelper() = default;
+
+ const char* calc(const std::string& key,
+ const boost::string_ref& path_info,
+ const boost::string_ref& redirect,
+ const boost::string_ref& max_file_size,
+ const boost::string_ref& max_file_count,
+ const boost::string_ref& expires) {
+ using ceph::crypto::HMACSHA1;
+ using UCHARPTR = const unsigned char*;
+
+ HMACSHA1 hmac((UCHARPTR) key.data(), key.size());
+
+ hmac.Update((UCHARPTR) path_info.data(), path_info.size());
+ hmac.Update((UCHARPTR) "\n", 1);
+
+ hmac.Update((UCHARPTR) redirect.data(), redirect.size());
+ hmac.Update((UCHARPTR) "\n", 1);
+
+ hmac.Update((UCHARPTR) max_file_size.data(), max_file_size.size());
+ hmac.Update((UCHARPTR) "\n", 1);
+
+ hmac.Update((UCHARPTR) max_file_count.data(), max_file_count.size());
+ hmac.Update((UCHARPTR) "\n", 1);
+
+ hmac.Update((UCHARPTR) expires.data(), expires.size());
+
+ hmac.Final(dest);
+
+ buf_to_hex((UCHARPTR) dest, sizeof(dest), dest_str);
+
+ return dest_str;
+ }
+
+ const char* get_signature() const {
+ return dest_str;
+ }
+
+ bool is_equal_to(const std::string& rhs) const {
+ /* never allow out-of-range exception */
+ if (rhs.size() < (output_size - 1)) {
+ return false;
+ }
+ return rhs.compare(0 /* pos */, output_size, dest_str) == 0;
+ }
+
+}; /* RGWFormPost::SignatureHelper */
+
+
+class RGWSwiftWebsiteHandler {
+ RGWRados* const store;
+ req_state* const s;
+ RGWHandler_REST* const handler;
+
+ bool is_web_mode() const;
+ bool can_be_website_req() const;
+ bool is_web_dir() const;
+ bool is_index_present(const std::string& index);
+
+ int serve_errordoc(int http_ret, std::string error_doc);
+
+ RGWOp* get_ws_redirect_op();
+ RGWOp* get_ws_index_op();
+ RGWOp* get_ws_listing_op();
+public:
+ RGWSwiftWebsiteHandler(RGWRados* const store,
+ req_state* const s,
+ RGWHandler_REST* const handler)
+ : store(store),
+ s(s),
+ handler(handler) {
+ }
+
+ int error_handler(const int err_no,
+ std::string* const error_content);
+ int retarget_bucket(RGWOp* op, RGWOp** new_op);
+ int retarget_object(RGWOp* op, RGWOp** new_op);
+};
+
+
+class RGWHandler_REST_SWIFT : public RGWHandler_REST {
+ friend class RGWRESTMgr_SWIFT;
+ friend class RGWRESTMgr_SWIFT_Info;
+protected:
+ const rgw::auth::Strategy& auth_strategy;
+
+ virtual bool is_acl_op() {
+ return false;
+ }
+
+ static int init_from_header(struct req_state* s,
+ const std::string& frontend_prefix);
+public:
+ explicit RGWHandler_REST_SWIFT(const rgw::auth::Strategy& auth_strategy)
+ : auth_strategy(auth_strategy) {
+ }
+ ~RGWHandler_REST_SWIFT() override = default;
+
+ int validate_bucket_name(const string& bucket);
+
+ int init(RGWRados *store, struct req_state *s, rgw::io::BasicClient *cio) override;
+ int authorize(const DoutPrefixProvider *dpp) override;
+ int postauth_init() override;
+
+ RGWAccessControlPolicy *alloc_policy() { return nullptr; /* return new RGWAccessControlPolicy_SWIFT; */ }
+ void free_policy(RGWAccessControlPolicy *policy) { delete policy; }
+};
+
+class RGWHandler_REST_Service_SWIFT : public RGWHandler_REST_SWIFT {
+protected:
+ RGWOp *op_get() override;
+ RGWOp *op_head() override;
+ RGWOp *op_put() override;
+ RGWOp *op_post() override;
+ RGWOp *op_delete() override;
+public:
+ using RGWHandler_REST_SWIFT::RGWHandler_REST_SWIFT;
+ ~RGWHandler_REST_Service_SWIFT() override = default;
+};
+
+class RGWHandler_REST_Bucket_SWIFT : public RGWHandler_REST_SWIFT {
+ /* We need the boost::optional here only because of handler's late
+ * initialization (see the init() method). */
+ boost::optional<RGWSwiftWebsiteHandler> website_handler;
+protected:
+ bool is_obj_update_op() override {
+ return s->op == OP_POST;
+ }
+
+ RGWOp *get_obj_op(bool get_data);
+ RGWOp *op_get() override;
+ RGWOp *op_head() override;
+ RGWOp *op_put() override;
+ RGWOp *op_delete() override;
+ RGWOp *op_post() override;
+ RGWOp *op_options() override;
+public:
+ using RGWHandler_REST_SWIFT::RGWHandler_REST_SWIFT;
+ ~RGWHandler_REST_Bucket_SWIFT() override = default;
+
+ int error_handler(int err_no, std::string *error_content) override {
+ return website_handler->error_handler(err_no, error_content);
+ }
+
+ int retarget(RGWOp* op, RGWOp** new_op) override {
+ return website_handler->retarget_bucket(op, new_op);
+ }
+
+ int init(RGWRados* const store,
+ struct req_state* const s,
+ rgw::io::BasicClient* const cio) override {
+ website_handler = boost::in_place<RGWSwiftWebsiteHandler>(store, s, this);
+ return RGWHandler_REST_SWIFT::init(store, s, cio);
+ }
+};
+
+class RGWHandler_REST_Obj_SWIFT : public RGWHandler_REST_SWIFT {
+ /* We need the boost::optional here only because of handler's late
+ * initialization (see the init() method). */
+ boost::optional<RGWSwiftWebsiteHandler> website_handler;
+protected:
+ bool is_obj_update_op() override {
+ return s->op == OP_POST;
+ }
+
+ RGWOp *get_obj_op(bool get_data);
+ RGWOp *op_get() override;
+ RGWOp *op_head() override;
+ RGWOp *op_put() override;
+ RGWOp *op_delete() override;
+ RGWOp *op_post() override;
+ RGWOp *op_copy() override;
+ RGWOp *op_options() override;
+
+public:
+ using RGWHandler_REST_SWIFT::RGWHandler_REST_SWIFT;
+ ~RGWHandler_REST_Obj_SWIFT() override = default;
+
+ int error_handler(int err_no, std::string *error_content) override {
+ return website_handler->error_handler(err_no, error_content);
+ }
+
+ int retarget(RGWOp* op, RGWOp** new_op) override {
+ return website_handler->retarget_object(op, new_op);
+ }
+
+ int init(RGWRados* const store,
+ struct req_state* const s,
+ rgw::io::BasicClient* const cio) override {
+ website_handler = boost::in_place<RGWSwiftWebsiteHandler>(store, s, this);
+ return RGWHandler_REST_SWIFT::init(store, s, cio);
+ }
+};
+
+class RGWRESTMgr_SWIFT : public RGWRESTMgr {
+protected:
+ RGWRESTMgr* get_resource_mgr_as_default(struct req_state* const s,
+ const std::string& uri,
+ std::string* const out_uri) override {
+ return this->get_resource_mgr(s, uri, out_uri);
+ }
+
+public:
+ RGWRESTMgr_SWIFT() = default;
+ ~RGWRESTMgr_SWIFT() override = default;
+
+ RGWHandler_REST *get_handler(struct req_state *s,
+ const rgw::auth::StrategyRegistry& auth_registry,
+ const std::string& frontend_prefix) override;
+};
+
+
+class RGWGetCrossDomainPolicy_ObjStore_SWIFT
+ : public RGWGetCrossDomainPolicy_ObjStore {
+public:
+ RGWGetCrossDomainPolicy_ObjStore_SWIFT() = default;
+ ~RGWGetCrossDomainPolicy_ObjStore_SWIFT() override = default;
+
+ void send_response() override;
+};
+
+class RGWGetHealthCheck_ObjStore_SWIFT
+ : public RGWGetHealthCheck_ObjStore {
+public:
+ RGWGetHealthCheck_ObjStore_SWIFT() = default;
+ ~RGWGetHealthCheck_ObjStore_SWIFT() override = default;
+
+ void send_response() override;
+};
+
+class RGWHandler_SWIFT_CrossDomain : public RGWHandler_REST {
+public:
+ RGWHandler_SWIFT_CrossDomain() = default;
+ ~RGWHandler_SWIFT_CrossDomain() override = default;
+
+ RGWOp *op_get() override {
+ return new RGWGetCrossDomainPolicy_ObjStore_SWIFT();
+ }
+
+ int init(RGWRados* const store,
+ struct req_state* const state,
+ rgw::io::BasicClient* const cio) override {
+ state->dialect = "swift";
+ state->formatter = new JSONFormatter;
+ state->format = RGW_FORMAT_JSON;
+
+ return RGWHandler::init(store, state, cio);
+ }
+
+ int authorize(const DoutPrefixProvider *dpp) override {
+ return 0;
+ }
+
+ int postauth_init() override {
+ return 0;
+ }
+
+ int read_permissions(RGWOp *) override {
+ return 0;
+ }
+
+ virtual RGWAccessControlPolicy *alloc_policy() { return nullptr; }
+ virtual void free_policy(RGWAccessControlPolicy *policy) {}
+};
+
+class RGWRESTMgr_SWIFT_CrossDomain : public RGWRESTMgr {
+protected:
+ RGWRESTMgr *get_resource_mgr(struct req_state* const s,
+ const std::string& uri,
+ std::string* const out_uri) override {
+ return this;
+ }
+
+public:
+ RGWRESTMgr_SWIFT_CrossDomain() = default;
+ ~RGWRESTMgr_SWIFT_CrossDomain() override = default;
+
+ RGWHandler_REST* get_handler(struct req_state* const s,
+ const rgw::auth::StrategyRegistry&,
+ const std::string&) override {
+ s->prot_flags |= RGW_REST_SWIFT;
+ return new RGWHandler_SWIFT_CrossDomain;
+ }
+};
+
+
+class RGWHandler_SWIFT_HealthCheck : public RGWHandler_REST {
+public:
+ RGWHandler_SWIFT_HealthCheck() = default;
+ ~RGWHandler_SWIFT_HealthCheck() override = default;
+
+ RGWOp *op_get() override {
+ return new RGWGetHealthCheck_ObjStore_SWIFT();
+ }
+
+ int init(RGWRados* const store,
+ struct req_state* const state,
+ rgw::io::BasicClient* const cio) override {
+ state->dialect = "swift";
+ state->formatter = new JSONFormatter;
+ state->format = RGW_FORMAT_JSON;
+
+ return RGWHandler::init(store, state, cio);
+ }
+
+ int authorize(const DoutPrefixProvider *dpp) override {
+ return 0;
+ }
+
+ int postauth_init() override {
+ return 0;
+ }
+
+ int read_permissions(RGWOp *) override {
+ return 0;
+ }
+
+ virtual RGWAccessControlPolicy *alloc_policy() { return nullptr; }
+ virtual void free_policy(RGWAccessControlPolicy *policy) {}
+};
+
+class RGWRESTMgr_SWIFT_HealthCheck : public RGWRESTMgr {
+protected:
+ RGWRESTMgr *get_resource_mgr(struct req_state* const s,
+ const std::string& uri,
+ std::string* const out_uri) override {
+ return this;
+ }
+
+public:
+ RGWRESTMgr_SWIFT_HealthCheck() = default;
+ ~RGWRESTMgr_SWIFT_HealthCheck() override = default;
+
+ RGWHandler_REST* get_handler(struct req_state* const s,
+ const rgw::auth::StrategyRegistry&,
+ const std::string&) override {
+ s->prot_flags |= RGW_REST_SWIFT;
+ return new RGWHandler_SWIFT_HealthCheck;
+ }
+};
+
+
+class RGWHandler_REST_SWIFT_Info : public RGWHandler_REST_SWIFT {
+public:
+ using RGWHandler_REST_SWIFT::RGWHandler_REST_SWIFT;
+ ~RGWHandler_REST_SWIFT_Info() override = default;
+
+ RGWOp *op_get() override {
+ return new RGWInfo_ObjStore_SWIFT();
+ }
+
+ int init(RGWRados* const store,
+ struct req_state* const state,
+ rgw::io::BasicClient* const cio) override {
+ state->dialect = "swift";
+ state->formatter = new JSONFormatter;
+ state->format = RGW_FORMAT_JSON;
+
+ return RGWHandler::init(store, state, cio);
+ }
+
+ int authorize(const DoutPrefixProvider *dpp) override {
+ return 0;
+ }
+
+ int postauth_init() override {
+ return 0;
+ }
+
+ int read_permissions(RGWOp *) override {
+ return 0;
+ }
+};
+
+class RGWRESTMgr_SWIFT_Info : public RGWRESTMgr {
+public:
+ RGWRESTMgr_SWIFT_Info() = default;
+ ~RGWRESTMgr_SWIFT_Info() override = default;
+
+ RGWHandler_REST *get_handler(struct req_state* s,
+ const rgw::auth::StrategyRegistry& auth_registry,
+ const std::string& frontend_prefix) override;
+};
+
+#endif
diff --git a/src/rgw/rgw_rest_usage.cc b/src/rgw/rgw_rest_usage.cc
new file mode 100644
index 00000000..23b7a971
--- /dev/null
+++ b/src/rgw/rgw_rest_usage.cc
@@ -0,0 +1,108 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "rgw_op.h"
+#include "rgw_usage.h"
+#include "rgw_rest_usage.h"
+
+#include "include/str_list.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+class RGWOp_Usage_Get : public RGWRESTOp {
+
+public:
+ RGWOp_Usage_Get() {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("usage", RGW_CAP_READ);
+ }
+ void execute() override;
+
+ const char* name() const override { return "get_usage"; }
+};
+
+void RGWOp_Usage_Get::execute() {
+ map<std::string, bool> categories;
+
+ string uid_str;
+ string bucket_name;
+ uint64_t start, end;
+ bool show_entries;
+ bool show_summary;
+
+ RESTArgs::get_string(s, "uid", uid_str, &uid_str);
+ RESTArgs::get_string(s, "bucket", bucket_name, &bucket_name);
+ rgw_user uid(uid_str);
+
+ RESTArgs::get_epoch(s, "start", 0, &start);
+ RESTArgs::get_epoch(s, "end", (uint64_t)-1, &end);
+ RESTArgs::get_bool(s, "show-entries", true, &show_entries);
+ RESTArgs::get_bool(s, "show-summary", true, &show_summary);
+
+ string cat_str;
+ RESTArgs::get_string(s, "categories", cat_str, &cat_str);
+
+ if (!cat_str.empty()) {
+ list<string> cat_list;
+ list<string>::iterator iter;
+ get_str_list(cat_str, cat_list);
+ for (iter = cat_list.begin(); iter != cat_list.end(); ++iter) {
+ categories[*iter] = true;
+ }
+ }
+
+ http_ret = RGWUsage::show(store, uid, bucket_name, start, end, show_entries, show_summary, &categories, flusher);
+}
+
+class RGWOp_Usage_Delete : public RGWRESTOp {
+
+public:
+ RGWOp_Usage_Delete() {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("usage", RGW_CAP_WRITE);
+ }
+ void execute() override;
+
+ const char* name() const override { return "trim_usage"; }
+};
+
+void RGWOp_Usage_Delete::execute() {
+ string uid_str;
+ string bucket_name;
+ uint64_t start, end;
+
+ RESTArgs::get_string(s, "uid", uid_str, &uid_str);
+ RESTArgs::get_string(s, "bucket", bucket_name, &bucket_name);
+ rgw_user uid(uid_str);
+
+ RESTArgs::get_epoch(s, "start", 0, &start);
+ RESTArgs::get_epoch(s, "end", (uint64_t)-1, &end);
+
+ if (uid.empty() &&
+ !bucket_name.empty() &&
+ !start &&
+ end == (uint64_t)-1) {
+ bool remove_all;
+ RESTArgs::get_bool(s, "remove-all", false, &remove_all);
+ if (!remove_all) {
+ http_ret = -EINVAL;
+ return;
+ }
+ }
+
+ http_ret = RGWUsage::trim(store, uid, bucket_name, start, end);
+}
+
+RGWOp *RGWHandler_Usage::op_get()
+{
+ return new RGWOp_Usage_Get;
+}
+
+RGWOp *RGWHandler_Usage::op_delete()
+{
+ return new RGWOp_Usage_Delete;
+}
+
+
diff --git a/src/rgw/rgw_rest_usage.h b/src/rgw/rgw_rest_usage.h
new file mode 100644
index 00000000..a09f32d0
--- /dev/null
+++ b/src/rgw/rgw_rest_usage.h
@@ -0,0 +1,36 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_REST_USAGE_H
+#define CEPH_RGW_REST_USAGE_H
+
+#include "rgw_rest.h"
+#include "rgw_rest_s3.h"
+
+
+class RGWHandler_Usage : public RGWHandler_Auth_S3 {
+protected:
+ RGWOp *op_get() override;
+ RGWOp *op_delete() override;
+public:
+ using RGWHandler_Auth_S3::RGWHandler_Auth_S3;
+ ~RGWHandler_Usage() override = default;
+
+ int read_permissions(RGWOp*) override {
+ return 0;
+ }
+};
+
+class RGWRESTMgr_Usage : public RGWRESTMgr {
+public:
+ RGWRESTMgr_Usage() = default;
+ ~RGWRESTMgr_Usage() override = default;
+
+ RGWHandler_REST* get_handler(struct req_state*,
+ const rgw::auth::StrategyRegistry& auth_registry,
+ const std::string&) override {
+ return new RGWHandler_Usage(auth_registry);
+ }
+};
+
+#endif
diff --git a/src/rgw/rgw_rest_user.cc b/src/rgw/rgw_rest_user.cc
new file mode 100644
index 00000000..d27105e0
--- /dev/null
+++ b/src/rgw/rgw_rest_user.cc
@@ -0,0 +1,999 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/ceph_json.h"
+
+#include "rgw_op.h"
+#include "rgw_user.h"
+#include "rgw_rest_user.h"
+
+#include "include/str_list.h"
+#include "include/ceph_assert.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+class RGWOp_User_List : public RGWRESTOp {
+
+public:
+ RGWOp_User_List() {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("users", RGW_CAP_READ);
+ }
+
+ void execute() override;
+
+ const char* name() const override { return "list_user"; }
+};
+
+void RGWOp_User_List::execute()
+{
+ RGWUserAdminOpState op_state;
+
+ uint32_t max_entries;
+ std::string marker;
+ RESTArgs::get_uint32(s, "max-entries", 1000, &max_entries);
+ RESTArgs::get_string(s, "marker", marker, &marker);
+
+ op_state.max_entries = max_entries;
+ op_state.marker = marker;
+ http_ret = RGWUserAdminOp_User::list(store, op_state, flusher);
+}
+
+class RGWOp_User_Info : public RGWRESTOp {
+
+public:
+ RGWOp_User_Info() {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("users", RGW_CAP_READ);
+ }
+
+ void execute() override;
+
+ const char* name() const override { return "get_user_info"; }
+};
+
+void RGWOp_User_Info::execute()
+{
+ RGWUserAdminOpState op_state;
+
+ std::string uid_str, access_key_str;
+ bool fetch_stats;
+ bool sync_stats;
+
+ RESTArgs::get_string(s, "uid", uid_str, &uid_str);
+ RESTArgs::get_string(s, "access-key", access_key_str, &access_key_str);
+
+ // if uid was not supplied in rest argument, error out now, otherwise we'll
+ // end up initializing anonymous user, for which keys.init will eventually
+ // return -EACESS
+ if (uid_str.empty() && access_key_str.empty()){
+ http_ret=-EINVAL;
+ return;
+ }
+
+ rgw_user uid(uid_str);
+
+ RESTArgs::get_bool(s, "stats", false, &fetch_stats);
+
+ RESTArgs::get_bool(s, "sync", false, &sync_stats);
+
+ op_state.set_user_id(uid);
+ op_state.set_access_key(access_key_str);
+ op_state.set_fetch_stats(fetch_stats);
+ op_state.set_sync_stats(sync_stats);
+
+ http_ret = RGWUserAdminOp_User::info(store, op_state, flusher);
+}
+
+class RGWOp_User_Create : public RGWRESTOp {
+
+public:
+ RGWOp_User_Create() {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("users", RGW_CAP_WRITE);
+ }
+
+ void execute() override;
+
+ const char* name() const override { return "create_user"; }
+};
+
+void RGWOp_User_Create::execute()
+{
+ std::string uid_str;
+ std::string display_name;
+ std::string email;
+ std::string access_key;
+ std::string secret_key;
+ std::string key_type_str;
+ std::string caps;
+ std::string tenant_name;
+ std::string op_mask_str;
+
+ bool gen_key;
+ bool suspended;
+ bool system;
+ bool exclusive;
+
+ int32_t max_buckets;
+ const int32_t default_max_buckets =
+ s->cct->_conf.get_val<int64_t>("rgw_user_max_buckets");
+
+ RGWUserAdminOpState op_state;
+
+ RESTArgs::get_string(s, "uid", uid_str, &uid_str);
+ rgw_user uid(uid_str);
+
+ RESTArgs::get_string(s, "display-name", display_name, &display_name);
+ RESTArgs::get_string(s, "email", email, &email);
+ RESTArgs::get_string(s, "access-key", access_key, &access_key);
+ RESTArgs::get_string(s, "secret-key", secret_key, &secret_key);
+ RESTArgs::get_string(s, "key-type", key_type_str, &key_type_str);
+ RESTArgs::get_string(s, "user-caps", caps, &caps);
+ RESTArgs::get_string(s, "tenant", tenant_name, &tenant_name);
+ RESTArgs::get_bool(s, "generate-key", true, &gen_key);
+ RESTArgs::get_bool(s, "suspended", false, &suspended);
+ RESTArgs::get_int32(s, "max-buckets", default_max_buckets, &max_buckets);
+ RESTArgs::get_bool(s, "system", false, &system);
+ RESTArgs::get_bool(s, "exclusive", false, &exclusive);
+ RESTArgs::get_string(s, "op-mask", op_mask_str, &op_mask_str);
+
+ if (!s->user->system && system) {
+ ldout(s->cct, 0) << "cannot set system flag by non-system user" << dendl;
+ http_ret = -EINVAL;
+ return;
+ }
+
+ if (!tenant_name.empty()) {
+ uid.tenant = tenant_name;
+ }
+
+ // TODO: validate required args are passed in. (for eg. uid and display_name here)
+ op_state.set_user_id(uid);
+ op_state.set_display_name(display_name);
+ op_state.set_user_email(email);
+ op_state.set_caps(caps);
+ op_state.set_access_key(access_key);
+ op_state.set_secret_key(secret_key);
+
+ if (!op_mask_str.empty()) {
+ uint32_t op_mask;
+ int ret = rgw_parse_op_type_list(op_mask_str, &op_mask);
+ if (ret < 0) {
+ ldout(s->cct, 0) << "failed to parse op_mask: " << ret << dendl;
+ http_ret = -EINVAL;
+ return;
+ }
+ op_state.set_op_mask(op_mask);
+ }
+
+ if (!key_type_str.empty()) {
+ int32_t key_type = KEY_TYPE_UNDEFINED;
+ if (key_type_str.compare("swift") == 0)
+ key_type = KEY_TYPE_SWIFT;
+ else if (key_type_str.compare("s3") == 0)
+ key_type = KEY_TYPE_S3;
+
+ op_state.set_key_type(key_type);
+ }
+
+ if (max_buckets != default_max_buckets)
+ op_state.set_max_buckets(max_buckets);
+
+ if (s->info.args.exists("suspended"))
+ op_state.set_suspension(suspended);
+
+ if (s->info.args.exists("system"))
+ op_state.set_system(system);
+
+ if (s->info.args.exists("exclusive"))
+ op_state.set_exclusive(exclusive);
+
+ if (gen_key)
+ op_state.set_generate_key();
+
+ http_ret = RGWUserAdminOp_User::create(store, op_state, flusher);
+}
+
+class RGWOp_User_Modify : public RGWRESTOp {
+
+public:
+ RGWOp_User_Modify() {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("users", RGW_CAP_WRITE);
+ }
+
+ void execute() override;
+
+ const char* name() const override { return "modify_user"; }
+};
+
+void RGWOp_User_Modify::execute()
+{
+ std::string uid_str;
+ std::string display_name;
+ std::string email;
+ std::string access_key;
+ std::string secret_key;
+ std::string key_type_str;
+ std::string caps;
+ std::string op_mask_str;
+
+ bool gen_key;
+ bool suspended;
+ bool system;
+ bool email_set;
+ bool quota_set;
+ int32_t max_buckets;
+
+ RGWUserAdminOpState op_state;
+
+ RESTArgs::get_string(s, "uid", uid_str, &uid_str);
+ rgw_user uid(uid_str);
+
+ RESTArgs::get_string(s, "display-name", display_name, &display_name);
+ RESTArgs::get_string(s, "email", email, &email, &email_set);
+ RESTArgs::get_string(s, "access-key", access_key, &access_key);
+ RESTArgs::get_string(s, "secret-key", secret_key, &secret_key);
+ RESTArgs::get_string(s, "user-caps", caps, &caps);
+ RESTArgs::get_bool(s, "generate-key", false, &gen_key);
+ RESTArgs::get_bool(s, "suspended", false, &suspended);
+ RESTArgs::get_int32(s, "max-buckets", RGW_DEFAULT_MAX_BUCKETS, &max_buckets, &quota_set);
+ RESTArgs::get_string(s, "key-type", key_type_str, &key_type_str);
+
+ RESTArgs::get_bool(s, "system", false, &system);
+ RESTArgs::get_string(s, "op-mask", op_mask_str, &op_mask_str);
+
+ if (!s->user->system && system) {
+ ldout(s->cct, 0) << "cannot set system flag by non-system user" << dendl;
+ http_ret = -EINVAL;
+ return;
+ }
+
+ op_state.set_user_id(uid);
+ op_state.set_display_name(display_name);
+
+ if (email_set)
+ op_state.set_user_email(email);
+
+ op_state.set_caps(caps);
+ op_state.set_access_key(access_key);
+ op_state.set_secret_key(secret_key);
+
+ if (quota_set)
+ op_state.set_max_buckets(max_buckets);
+
+ if (gen_key)
+ op_state.set_generate_key();
+
+ if (!key_type_str.empty()) {
+ int32_t key_type = KEY_TYPE_UNDEFINED;
+ if (key_type_str.compare("swift") == 0)
+ key_type = KEY_TYPE_SWIFT;
+ else if (key_type_str.compare("s3") == 0)
+ key_type = KEY_TYPE_S3;
+
+ op_state.set_key_type(key_type);
+ }
+
+ if (s->info.args.exists("suspended"))
+ op_state.set_suspension(suspended);
+
+ if (s->info.args.exists("system"))
+ op_state.set_system(system);
+
+ if (!op_mask_str.empty()) {
+ uint32_t op_mask;
+ int ret = rgw_parse_op_type_list(op_mask_str, &op_mask);
+ if (ret < 0) {
+ ldout(s->cct, 0) << "failed to parse op_mask: " << ret << dendl;
+ http_ret = -EINVAL;
+ return;
+ }
+ op_state.set_op_mask(op_mask);
+ }
+
+ http_ret = RGWUserAdminOp_User::modify(store, op_state, flusher);
+}
+
+class RGWOp_User_Remove : public RGWRESTOp {
+
+public:
+ RGWOp_User_Remove() {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("users", RGW_CAP_WRITE);
+ }
+
+ void execute() override;
+
+ const char* name() const override { return "remove_user"; }
+};
+
+void RGWOp_User_Remove::execute()
+{
+ std::string uid_str;
+ bool purge_data;
+
+ RGWUserAdminOpState op_state;
+
+ RESTArgs::get_string(s, "uid", uid_str, &uid_str);
+ rgw_user uid(uid_str);
+
+ RESTArgs::get_bool(s, "purge-data", false, &purge_data);
+
+ // FIXME: no double checking
+ if (!uid.empty())
+ op_state.set_user_id(uid);
+
+ op_state.set_purge_data(purge_data);
+
+ http_ret = RGWUserAdminOp_User::remove(store, op_state, flusher);
+}
+
+class RGWOp_Subuser_Create : public RGWRESTOp {
+
+public:
+ RGWOp_Subuser_Create() {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("users", RGW_CAP_WRITE);
+ }
+
+ void execute() override;
+
+ const char* name() const override { return "create_subuser"; }
+};
+
+void RGWOp_Subuser_Create::execute()
+{
+ std::string uid_str;
+ std::string subuser;
+ std::string secret_key;
+ std::string access_key;
+ std::string perm_str;
+ std::string key_type_str;
+
+ bool gen_subuser = false; // FIXME placeholder
+ bool gen_secret;
+ bool gen_access;
+
+ uint32_t perm_mask = 0;
+ int32_t key_type = KEY_TYPE_SWIFT;
+
+ RGWUserAdminOpState op_state;
+
+ RESTArgs::get_string(s, "uid", uid_str, &uid_str);
+ rgw_user uid(uid_str);
+
+ RESTArgs::get_string(s, "subuser", subuser, &subuser);
+ RESTArgs::get_string(s, "access-key", access_key, &access_key);
+ RESTArgs::get_string(s, "secret-key", secret_key, &secret_key);
+ RESTArgs::get_string(s, "access", perm_str, &perm_str);
+ RESTArgs::get_string(s, "key-type", key_type_str, &key_type_str);
+ //RESTArgs::get_bool(s, "generate-subuser", false, &gen_subuser);
+ RESTArgs::get_bool(s, "generate-secret", false, &gen_secret);
+ RESTArgs::get_bool(s, "gen-access-key", false, &gen_access);
+
+ perm_mask = rgw_str_to_perm(perm_str.c_str());
+ op_state.set_perm(perm_mask);
+
+ op_state.set_user_id(uid);
+ op_state.set_subuser(subuser);
+ op_state.set_access_key(access_key);
+ op_state.set_secret_key(secret_key);
+ op_state.set_generate_subuser(gen_subuser);
+
+ if (gen_access)
+ op_state.set_gen_access();
+
+ if (gen_secret)
+ op_state.set_gen_secret();
+
+ if (!key_type_str.empty()) {
+ if (key_type_str.compare("swift") == 0)
+ key_type = KEY_TYPE_SWIFT;
+ else if (key_type_str.compare("s3") == 0)
+ key_type = KEY_TYPE_S3;
+ }
+ op_state.set_key_type(key_type);
+
+ http_ret = RGWUserAdminOp_Subuser::create(store, op_state, flusher);
+}
+
+class RGWOp_Subuser_Modify : public RGWRESTOp {
+
+public:
+ RGWOp_Subuser_Modify() {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("users", RGW_CAP_WRITE);
+ }
+
+ void execute() override;
+
+ const char* name() const override { return "modify_subuser"; }
+};
+
+void RGWOp_Subuser_Modify::execute()
+{
+ std::string uid_str;
+ std::string subuser;
+ std::string secret_key;
+ std::string key_type_str;
+ std::string perm_str;
+
+ RGWUserAdminOpState op_state;
+
+ uint32_t perm_mask;
+ int32_t key_type = KEY_TYPE_SWIFT;
+
+ bool gen_secret;
+
+ RESTArgs::get_string(s, "uid", uid_str, &uid_str);
+ rgw_user uid(uid_str);
+
+ RESTArgs::get_string(s, "subuser", subuser, &subuser);
+ RESTArgs::get_string(s, "secret-key", secret_key, &secret_key);
+ RESTArgs::get_string(s, "access", perm_str, &perm_str);
+ RESTArgs::get_string(s, "key-type", key_type_str, &key_type_str);
+ RESTArgs::get_bool(s, "generate-secret", false, &gen_secret);
+
+ perm_mask = rgw_str_to_perm(perm_str.c_str());
+ op_state.set_perm(perm_mask);
+
+ op_state.set_user_id(uid);
+ op_state.set_subuser(subuser);
+
+ if (!secret_key.empty())
+ op_state.set_secret_key(secret_key);
+
+ if (gen_secret)
+ op_state.set_gen_secret();
+
+ if (!key_type_str.empty()) {
+ if (key_type_str.compare("swift") == 0)
+ key_type = KEY_TYPE_SWIFT;
+ else if (key_type_str.compare("s3") == 0)
+ key_type = KEY_TYPE_S3;
+ }
+ op_state.set_key_type(key_type);
+
+ http_ret = RGWUserAdminOp_Subuser::modify(store, op_state, flusher);
+}
+
+class RGWOp_Subuser_Remove : public RGWRESTOp {
+
+public:
+ RGWOp_Subuser_Remove() {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("users", RGW_CAP_WRITE);
+ }
+
+ void execute() override;
+
+ const char* name() const override { return "remove_subuser"; }
+};
+
+void RGWOp_Subuser_Remove::execute()
+{
+ std::string uid_str;
+ std::string subuser;
+ bool purge_keys;
+
+ RGWUserAdminOpState op_state;
+
+ RESTArgs::get_string(s, "uid", uid_str, &uid_str);
+ rgw_user uid(uid_str);
+
+ RESTArgs::get_string(s, "subuser", subuser, &subuser);
+ RESTArgs::get_bool(s, "purge-keys", true, &purge_keys);
+
+ op_state.set_user_id(uid);
+ op_state.set_subuser(subuser);
+
+ if (purge_keys)
+ op_state.set_purge_keys();
+
+ http_ret = RGWUserAdminOp_Subuser::remove(store, op_state, flusher);
+}
+
+class RGWOp_Key_Create : public RGWRESTOp {
+
+public:
+ RGWOp_Key_Create() {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("users", RGW_CAP_WRITE);
+ }
+
+ void execute() override;
+
+ const char* name() const override { return "create_access_key"; }
+};
+
+void RGWOp_Key_Create::execute()
+{
+ std::string uid_str;
+ std::string subuser;
+ std::string access_key;
+ std::string secret_key;
+ std::string key_type_str;
+
+ bool gen_key;
+
+ RGWUserAdminOpState op_state;
+
+ RESTArgs::get_string(s, "uid", uid_str, &uid_str);
+ rgw_user uid(uid_str);
+
+ RESTArgs::get_string(s, "subuser", subuser, &subuser);
+ RESTArgs::get_string(s, "access-key", access_key, &access_key);
+ RESTArgs::get_string(s, "secret-key", secret_key, &secret_key);
+ RESTArgs::get_string(s, "key-type", key_type_str, &key_type_str);
+ RESTArgs::get_bool(s, "generate-key", true, &gen_key);
+
+ op_state.set_user_id(uid);
+ op_state.set_subuser(subuser);
+ op_state.set_access_key(access_key);
+ op_state.set_secret_key(secret_key);
+
+ if (gen_key)
+ op_state.set_generate_key();
+
+ if (!key_type_str.empty()) {
+ int32_t key_type = KEY_TYPE_UNDEFINED;
+ if (key_type_str.compare("swift") == 0)
+ key_type = KEY_TYPE_SWIFT;
+ else if (key_type_str.compare("s3") == 0)
+ key_type = KEY_TYPE_S3;
+
+ op_state.set_key_type(key_type);
+ }
+
+ http_ret = RGWUserAdminOp_Key::create(store, op_state, flusher);
+}
+
+class RGWOp_Key_Remove : public RGWRESTOp {
+
+public:
+ RGWOp_Key_Remove() {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("users", RGW_CAP_WRITE);
+ }
+
+ void execute() override;
+
+ const char* name() const override { return "remove_access_key"; }
+};
+
+void RGWOp_Key_Remove::execute()
+{
+ std::string uid_str;
+ std::string subuser;
+ std::string access_key;
+ std::string key_type_str;
+
+ RGWUserAdminOpState op_state;
+
+ RESTArgs::get_string(s, "uid", uid_str, &uid_str);
+ rgw_user uid(uid_str);
+
+ RESTArgs::get_string(s, "subuser", subuser, &subuser);
+ RESTArgs::get_string(s, "access-key", access_key, &access_key);
+ RESTArgs::get_string(s, "key-type", key_type_str, &key_type_str);
+
+ op_state.set_user_id(uid);
+ op_state.set_subuser(subuser);
+ op_state.set_access_key(access_key);
+
+ if (!key_type_str.empty()) {
+ int32_t key_type = KEY_TYPE_UNDEFINED;
+ if (key_type_str.compare("swift") == 0)
+ key_type = KEY_TYPE_SWIFT;
+ else if (key_type_str.compare("s3") == 0)
+ key_type = KEY_TYPE_S3;
+
+ op_state.set_key_type(key_type);
+ }
+
+ http_ret = RGWUserAdminOp_Key::remove(store, op_state, flusher);
+}
+
+class RGWOp_Caps_Add : public RGWRESTOp {
+
+public:
+ RGWOp_Caps_Add() {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("users", RGW_CAP_WRITE);
+ }
+
+ void execute() override;
+
+ const char* name() const override { return "add_user_caps"; }
+};
+
+void RGWOp_Caps_Add::execute()
+{
+ std::string uid_str;
+ std::string caps;
+
+ RGWUserAdminOpState op_state;
+
+ RESTArgs::get_string(s, "uid", uid_str, &uid_str);
+ rgw_user uid(uid_str);
+
+ RESTArgs::get_string(s, "user-caps", caps, &caps);
+
+ op_state.set_user_id(uid);
+ op_state.set_caps(caps);
+
+ http_ret = RGWUserAdminOp_Caps::add(store, op_state, flusher);
+}
+
+class RGWOp_Caps_Remove : public RGWRESTOp {
+
+public:
+ RGWOp_Caps_Remove() {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("users", RGW_CAP_WRITE);
+ }
+
+ void execute() override;
+
+ const char* name() const override { return "remove_user_caps"; }
+};
+
+void RGWOp_Caps_Remove::execute()
+{
+ std::string uid_str;
+ std::string caps;
+
+ RGWUserAdminOpState op_state;
+
+ RESTArgs::get_string(s, "uid", uid_str, &uid_str);
+ rgw_user uid(uid_str);
+
+ RESTArgs::get_string(s, "user-caps", caps, &caps);
+
+ op_state.set_user_id(uid);
+ op_state.set_caps(caps);
+
+ http_ret = RGWUserAdminOp_Caps::remove(store, op_state, flusher);
+}
+
+struct UserQuotas {
+ RGWQuotaInfo bucket_quota;
+ RGWQuotaInfo user_quota;
+
+ UserQuotas() {}
+
+ explicit UserQuotas(RGWUserInfo& info) : bucket_quota(info.bucket_quota),
+ user_quota(info.user_quota) {}
+
+ void dump(Formatter *f) const {
+ encode_json("bucket_quota", bucket_quota, f);
+ encode_json("user_quota", user_quota, f);
+ }
+ void decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("bucket_quota", bucket_quota, obj);
+ JSONDecoder::decode_json("user_quota", user_quota, obj);
+ }
+};
+
+class RGWOp_Quota_Info : public RGWRESTOp {
+
+public:
+ RGWOp_Quota_Info() {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("users", RGW_CAP_READ);
+ }
+
+ void execute() override;
+
+ const char* name() const override { return "get_quota_info"; }
+};
+
+
+void RGWOp_Quota_Info::execute()
+{
+ RGWUserAdminOpState op_state;
+
+ std::string uid_str;
+ std::string quota_type;
+
+ RESTArgs::get_string(s, "uid", uid_str, &uid_str);
+ RESTArgs::get_string(s, "quota-type", quota_type, &quota_type);
+
+ if (uid_str.empty()) {
+ http_ret = -EINVAL;
+ return;
+ }
+
+ rgw_user uid(uid_str);
+
+ bool show_all = quota_type.empty();
+ bool show_bucket = show_all || (quota_type == "bucket");
+ bool show_user = show_all || (quota_type == "user");
+
+ if (!(show_all || show_bucket || show_user)) {
+ http_ret = -EINVAL;
+ return;
+ }
+
+ op_state.set_user_id(uid);
+
+ RGWUser user;
+ http_ret = user.init(store, op_state);
+ if (http_ret < 0)
+ return;
+
+ if (!op_state.has_existing_user()) {
+ http_ret = -ERR_NO_SUCH_USER;
+ return;
+ }
+
+ RGWUserInfo info;
+ string err_msg;
+ http_ret = user.info(info, &err_msg);
+ if (http_ret < 0)
+ return;
+
+ flusher.start(0);
+ if (show_all) {
+ UserQuotas quotas(info);
+ encode_json("quota", quotas, s->formatter);
+ } else if (show_user) {
+ encode_json("user_quota", info.user_quota, s->formatter);
+ } else {
+ encode_json("bucket_quota", info.bucket_quota, s->formatter);
+ }
+
+ flusher.flush();
+}
+
+class RGWOp_Quota_Set : public RGWRESTOp {
+
+public:
+ RGWOp_Quota_Set() {}
+
+ int check_caps(RGWUserCaps& caps) override {
+ return caps.check_cap("users", RGW_CAP_WRITE);
+ }
+
+ void execute() override;
+
+ const char* name() const override { return "set_quota_info"; }
+};
+
+/**
+ * set quota
+ *
+ * two different ways to set the quota info: as json struct in the message body or via http params.
+ *
+ * as json:
+ *
+ * PUT /admin/user?uid=<uid>[&quota-type=<type>]
+ *
+ * whereas quota-type is optional and is either user, or bucket
+ *
+ * if quota-type is not specified then we expect to get a structure that contains both quotas,
+ * otherwise we'll only get the relevant configuration.
+ *
+ * E.g., if quota type not specified:
+ * {
+ * "user_quota" : {
+ * "max_size_kb" : 4096,
+ * "max_objects" : -1,
+ * "enabled" : false
+ * },
+ * "bucket_quota" : {
+ * "max_size_kb" : 1024,
+ * "max_objects" : -1,
+ * "enabled" : true
+ * }
+ * }
+ *
+ *
+ * or if quota type is specified:
+ * {
+ * "max_size_kb" : 4096,
+ * "max_objects" : -1,
+ * "enabled" : false
+ * }
+ *
+ * Another option is not to pass any body and set the following http params:
+ *
+ *
+ * max-size-kb=<size>
+ * max-objects=<max objects>
+ * enabled[={true,false}]
+ *
+ * all params are optionals and default to the current settings. With this type of configuration the
+ * quota-type param is mandatory.
+ *
+ */
+
+void RGWOp_Quota_Set::execute()
+{
+ RGWUserAdminOpState op_state;
+
+ std::string uid_str;
+ std::string quota_type;
+
+ RESTArgs::get_string(s, "uid", uid_str, &uid_str);
+ RESTArgs::get_string(s, "quota-type", quota_type, &quota_type);
+
+ if (uid_str.empty()) {
+ http_ret = -EINVAL;
+ return;
+ }
+
+ rgw_user uid(uid_str);
+
+ bool set_all = quota_type.empty();
+ bool set_bucket = set_all || (quota_type == "bucket");
+ bool set_user = set_all || (quota_type == "user");
+
+ if (!(set_all || set_bucket || set_user)) {
+ ldout(store->ctx(), 20) << "invalid quota type" << dendl;
+ http_ret = -EINVAL;
+ return;
+ }
+
+ bool use_http_params;
+
+ if (s->content_length > 0) {
+ use_http_params = false;
+ } else {
+ const char *encoding = s->info.env->get("HTTP_TRANSFER_ENCODING");
+ use_http_params = (!encoding || strcmp(encoding, "chunked") != 0);
+ }
+
+ if (use_http_params && set_all) {
+ ldout(store->ctx(), 20) << "quota type was not specified, can't set all quotas via http headers" << dendl;
+ http_ret = -EINVAL;
+ return;
+ }
+
+ op_state.set_user_id(uid);
+
+ RGWUser user;
+ http_ret = user.init(store, op_state);
+ if (http_ret < 0) {
+ ldout(store->ctx(), 20) << "failed initializing user info: " << http_ret << dendl;
+ return;
+ }
+
+ if (!op_state.has_existing_user()) {
+ http_ret = -ERR_NO_SUCH_USER;
+ return;
+ }
+
+#define QUOTA_INPUT_MAX_LEN 1024
+ if (set_all) {
+ UserQuotas quotas;
+
+ if ((http_ret = rgw_rest_get_json_input(store->ctx(), s, quotas, QUOTA_INPUT_MAX_LEN, NULL)) < 0) {
+ ldout(store->ctx(), 20) << "failed to retrieve input" << dendl;
+ return;
+ }
+
+ op_state.set_user_quota(quotas.user_quota);
+ op_state.set_bucket_quota(quotas.bucket_quota);
+ } else {
+ RGWQuotaInfo quota;
+
+ if (!use_http_params) {
+ bool empty;
+ http_ret = rgw_rest_get_json_input(store->ctx(), s, quota, QUOTA_INPUT_MAX_LEN, &empty);
+ if (http_ret < 0) {
+ ldout(store->ctx(), 20) << "failed to retrieve input" << dendl;
+ if (!empty)
+ return;
+
+ /* was probably chunked input, but no content provided, configure via http params */
+ use_http_params = true;
+ }
+ }
+
+ if (use_http_params) {
+ RGWUserInfo info;
+ string err_msg;
+ http_ret = user.info(info, &err_msg);
+ if (http_ret < 0) {
+ ldout(store->ctx(), 20) << "failed to get user info: " << http_ret << dendl;
+ return;
+ }
+ RGWQuotaInfo *old_quota;
+ if (set_user) {
+ old_quota = &info.user_quota;
+ } else {
+ old_quota = &info.bucket_quota;
+ }
+
+ RESTArgs::get_int64(s, "max-objects", old_quota->max_objects, &quota.max_objects);
+ RESTArgs::get_int64(s, "max-size", old_quota->max_size, &quota.max_size);
+ int64_t max_size_kb;
+ bool has_max_size_kb = false;
+ RESTArgs::get_int64(s, "max-size-kb", 0, &max_size_kb, &has_max_size_kb);
+ if (has_max_size_kb) {
+ quota.max_size = max_size_kb * 1024;
+ }
+ RESTArgs::get_bool(s, "enabled", old_quota->enabled, &quota.enabled);
+ }
+
+ if (set_user) {
+ op_state.set_user_quota(quota);
+ } else {
+ op_state.set_bucket_quota(quota);
+ }
+ }
+
+ string err;
+ http_ret = user.modify(op_state, &err);
+ if (http_ret < 0) {
+ ldout(store->ctx(), 20) << "failed updating user info: " << http_ret << ": " << err << dendl;
+ return;
+ }
+}
+
+RGWOp *RGWHandler_User::op_get()
+{
+ if (s->info.args.sub_resource_exists("quota"))
+ return new RGWOp_Quota_Info;
+
+ if (s->info.args.sub_resource_exists("list"))
+ return new RGWOp_User_List;
+
+ return new RGWOp_User_Info;
+}
+
+RGWOp *RGWHandler_User::op_put()
+{
+ if (s->info.args.sub_resource_exists("subuser"))
+ return new RGWOp_Subuser_Create;
+
+ if (s->info.args.sub_resource_exists("key"))
+ return new RGWOp_Key_Create;
+
+ if (s->info.args.sub_resource_exists("caps"))
+ return new RGWOp_Caps_Add;
+
+ if (s->info.args.sub_resource_exists("quota"))
+ return new RGWOp_Quota_Set;
+
+ return new RGWOp_User_Create;
+}
+
+RGWOp *RGWHandler_User::op_post()
+{
+ if (s->info.args.sub_resource_exists("subuser"))
+ return new RGWOp_Subuser_Modify;
+
+ return new RGWOp_User_Modify;
+}
+
+RGWOp *RGWHandler_User::op_delete()
+{
+ if (s->info.args.sub_resource_exists("subuser"))
+ return new RGWOp_Subuser_Remove;
+
+ if (s->info.args.sub_resource_exists("key"))
+ return new RGWOp_Key_Remove;
+
+ if (s->info.args.sub_resource_exists("caps"))
+ return new RGWOp_Caps_Remove;
+
+ return new RGWOp_User_Remove;
+}
+
diff --git a/src/rgw/rgw_rest_user.h b/src/rgw/rgw_rest_user.h
new file mode 100644
index 00000000..047fe5ff
--- /dev/null
+++ b/src/rgw/rgw_rest_user.h
@@ -0,0 +1,38 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_REST_USER_H
+#define CEPH_RGW_REST_USER_H
+
+#include "rgw_rest.h"
+#include "rgw_rest_s3.h"
+
+
+class RGWHandler_User : public RGWHandler_Auth_S3 {
+protected:
+ RGWOp *op_get() override;
+ RGWOp *op_put() override;
+ RGWOp *op_post() override;
+ RGWOp *op_delete() override;
+public:
+ using RGWHandler_Auth_S3::RGWHandler_Auth_S3;
+ ~RGWHandler_User() override = default;
+
+ int read_permissions(RGWOp*) override {
+ return 0;
+ }
+};
+
+class RGWRESTMgr_User : public RGWRESTMgr {
+public:
+ RGWRESTMgr_User() = default;
+ ~RGWRESTMgr_User() override = default;
+
+ RGWHandler_REST *get_handler(struct req_state*,
+ const rgw::auth::StrategyRegistry& auth_registry,
+ const std::string&) override {
+ return new RGWHandler_User(auth_registry);
+ }
+};
+
+#endif
diff --git a/src/rgw/rgw_rest_user_policy.cc b/src/rgw/rgw_rest_user_policy.cc
new file mode 100644
index 00000000..d93f69ae
--- /dev/null
+++ b/src/rgw/rgw_rest_user_policy.cc
@@ -0,0 +1,363 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <errno.h>
+#include <regex>
+
+#include "common/errno.h"
+#include "common/Formatter.h"
+#include "common/ceph_json.h"
+
+#include "include/types.h"
+#include "rgw_string.h"
+
+#include "rgw_common.h"
+#include "rgw_op.h"
+#include "rgw_rest.h"
+#include "rgw_rest_user_policy.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+using rgw::IAM::Policy;
+
+void RGWRestUserPolicy::dump(Formatter *f) const
+{
+ encode_json("Policyname", policy_name , f);
+ encode_json("Username", user_name , f);
+ encode_json("Policydocument", policy, f);
+}
+
+void RGWRestUserPolicy::send_response()
+{
+ if (op_ret) {
+ set_req_state_err(s, op_ret);
+ }
+ dump_errno(s);
+ end_header(s);
+}
+
+int RGWRestUserPolicy::verify_permission()
+{
+ if (s->auth.identity->is_anonymous()) {
+ return -EACCES;
+ }
+
+ if(int ret = check_caps(s->user->caps); ret == 0) {
+ return ret;
+ }
+
+ uint64_t op = get_op();
+ string user_name = s->info.args.get("UserName");
+ rgw_user user_id(user_name);
+ if (! verify_user_permission(this, s, rgw::ARN(rgw::ARN(user_id.id,
+ "user",
+ user_id.tenant)), op)) {
+ return -EACCES;
+ }
+ return 0;
+}
+
+bool RGWRestUserPolicy::validate_input()
+{
+ if (policy_name.length() > MAX_POLICY_NAME_LEN) {
+ ldout(s->cct, 0) << "ERROR: Invalid policy name length " << dendl;
+ return false;
+ }
+
+ std::regex regex_policy_name("[A-Za-z0-9:=,.@-]+");
+ if (! std::regex_match(policy_name, regex_policy_name)) {
+ ldout(s->cct, 0) << "ERROR: Invalid chars in policy name " << dendl;
+ return false;
+ }
+
+ return true;
+}
+
+int RGWUserPolicyRead::check_caps(RGWUserCaps& caps)
+{
+ return caps.check_cap("user-policy", RGW_CAP_READ);
+}
+
+int RGWUserPolicyWrite::check_caps(RGWUserCaps& caps)
+{
+ return caps.check_cap("user-policy", RGW_CAP_WRITE);
+}
+
+uint64_t RGWPutUserPolicy::get_op()
+{
+ return rgw::IAM::iamPutUserPolicy;
+}
+
+int RGWPutUserPolicy::get_params()
+{
+ policy_name = url_decode(s->info.args.get("PolicyName"), true);
+ user_name = url_decode(s->info.args.get("UserName"), true);
+ policy = url_decode(s->info.args.get("PolicyDocument"), true);
+
+ if (policy_name.empty() || user_name.empty() || policy.empty()) {
+ ldout(s->cct, 20) << "ERROR: one of policy name, user name or policy document is empty"
+ << dendl;
+ return -EINVAL;
+ }
+
+ if (! validate_input()) {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+void RGWPutUserPolicy::execute()
+{
+ op_ret = get_params();
+ if (op_ret < 0) {
+ return;
+ }
+
+ bufferlist bl = bufferlist::static_from_string(policy);
+
+ RGWUserInfo info;
+ rgw_user user_id(user_name);
+ op_ret = rgw_get_user_info_by_uid(store, user_id, info);
+ if (op_ret < 0) {
+ op_ret = -ERR_NO_SUCH_ENTITY;
+ return;
+ }
+
+ map<string, bufferlist> uattrs;
+ op_ret = rgw_get_user_attrs_by_uid(store, user_id, uattrs);
+ if (op_ret == -ENOENT) {
+ op_ret = -ERR_NO_SUCH_ENTITY;
+ return;
+ }
+
+ try {
+ const Policy p(s->cct, s->user->user_id.tenant, bl);
+ map<string, string> policies;
+ if (auto it = uattrs.find(RGW_ATTR_USER_POLICY); it != uattrs.end()) {
+ bufferlist out_bl = uattrs[RGW_ATTR_USER_POLICY];
+ decode(policies, out_bl);
+ }
+ bufferlist in_bl;
+ policies[policy_name] = policy;
+ encode(policies, in_bl);
+ uattrs[RGW_ATTR_USER_POLICY] = in_bl;
+
+ RGWObjVersionTracker objv_tracker;
+ op_ret = rgw_store_user_info(store, info, &info, &objv_tracker, real_time(), false, &uattrs);
+ if (op_ret < 0) {
+ op_ret = -ERR_INTERNAL_ERROR;
+ }
+ } catch (rgw::IAM::PolicyParseException& e) {
+ ldout(s->cct, 20) << "failed to parse policy: " << e.what() << dendl;
+ op_ret = -ERR_MALFORMED_DOC;
+ }
+
+ if (op_ret == 0) {
+ s->formatter->open_object_section("PutUserPolicyResponse");
+ s->formatter->open_object_section("ResponseMetadata");
+ s->formatter->dump_string("RequestId", s->trans_id);
+ s->formatter->close_section();
+ s->formatter->close_section();
+ }
+}
+
+uint64_t RGWGetUserPolicy::get_op()
+{
+ return rgw::IAM::iamGetUserPolicy;
+}
+
+int RGWGetUserPolicy::get_params()
+{
+ policy_name = s->info.args.get("PolicyName");
+ user_name = s->info.args.get("UserName");
+
+ if (policy_name.empty() || user_name.empty()) {
+ ldout(s->cct, 20) << "ERROR: one of policy name or user name is empty"
+ << dendl;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+void RGWGetUserPolicy::execute()
+{
+ op_ret = get_params();
+ if (op_ret < 0) {
+ return;
+ }
+
+ rgw_user user_id(user_name);
+ map<string, bufferlist> uattrs;
+ op_ret = rgw_get_user_attrs_by_uid(store, user_id, uattrs);
+ if (op_ret == -ENOENT) {
+ ldout(s->cct, 0) << "ERROR: attrs not found for user" << user_name << dendl;
+ op_ret = -ERR_NO_SUCH_ENTITY;
+ return;
+ }
+
+ if (op_ret == 0) {
+ s->formatter->open_object_section("GetUserPolicyResponse");
+ s->formatter->open_object_section("ResponseMetadata");
+ s->formatter->dump_string("RequestId", s->trans_id);
+ s->formatter->close_section();
+ s->formatter->open_object_section("GetUserPolicyResult");
+ map<string, string> policies;
+ if (auto it = uattrs.find(RGW_ATTR_USER_POLICY); it != uattrs.end()) {
+ bufferlist bl = uattrs[RGW_ATTR_USER_POLICY];
+ decode(policies, bl);
+ if (auto it = policies.find(policy_name); it != policies.end()) {
+ policy = policies[policy_name];
+ dump(s->formatter);
+ } else {
+ ldout(s->cct, 0) << "ERROR: policy not found" << policy << dendl;
+ op_ret = -ERR_NO_SUCH_ENTITY;
+ return;
+ }
+ } else {
+ ldout(s->cct, 0) << "ERROR: RGW_ATTR_USER_POLICY not found" << dendl;
+ op_ret = -ERR_NO_SUCH_ENTITY;
+ return;
+ }
+ s->formatter->close_section();
+ s->formatter->close_section();
+ }
+ if (op_ret < 0) {
+ op_ret = -ERR_INTERNAL_ERROR;
+ }
+}
+
+uint64_t RGWListUserPolicies::get_op()
+{
+ return rgw::IAM::iamListUserPolicies;
+}
+
+int RGWListUserPolicies::get_params()
+{
+ user_name = s->info.args.get("UserName");
+
+ if (user_name.empty()) {
+ ldout(s->cct, 20) << "ERROR: user name is empty" << dendl;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+void RGWListUserPolicies::execute()
+{
+ op_ret = get_params();
+ if (op_ret < 0) {
+ return;
+ }
+
+ rgw_user user_id(user_name);
+ map<string, bufferlist> uattrs;
+ op_ret = rgw_get_user_attrs_by_uid(store, user_id, uattrs);
+ if (op_ret == -ENOENT) {
+ ldout(s->cct, 0) << "ERROR: attrs not found for user" << user_name << dendl;
+ op_ret = -ERR_NO_SUCH_ENTITY;
+ return;
+ }
+
+ if (op_ret == 0) {
+ map<string, string> policies;
+ if (auto it = uattrs.find(RGW_ATTR_USER_POLICY); it != uattrs.end()) {
+ s->formatter->open_object_section("ListUserPoliciesResponse");
+ s->formatter->open_object_section("ResponseMetadata");
+ s->formatter->dump_string("RequestId", s->trans_id);
+ s->formatter->close_section();
+ s->formatter->open_object_section("ListUserPoliciesResult");
+ bufferlist bl = uattrs[RGW_ATTR_USER_POLICY];
+ decode(policies, bl);
+ for (const auto& p : policies) {
+ s->formatter->open_object_section("PolicyNames");
+ s->formatter->dump_string("member", p.first);
+ s->formatter->close_section();
+ }
+ s->formatter->close_section();
+ s->formatter->close_section();
+ } else {
+ ldout(s->cct, 0) << "ERROR: RGW_ATTR_USER_POLICY not found" << dendl;
+ op_ret = -ERR_NO_SUCH_ENTITY;
+ return;
+ }
+ }
+ if (op_ret < 0) {
+ op_ret = -ERR_INTERNAL_ERROR;
+ }
+}
+
+uint64_t RGWDeleteUserPolicy::get_op()
+{
+ return rgw::IAM::iamDeleteUserPolicy;
+}
+
+int RGWDeleteUserPolicy::get_params()
+{
+ policy_name = s->info.args.get("PolicyName");
+ user_name = s->info.args.get("UserName");
+
+ if (policy_name.empty() || user_name.empty()) {
+ ldout(s->cct, 20) << "ERROR: One of policy name or user name is empty"<< dendl;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+void RGWDeleteUserPolicy::execute()
+{
+ op_ret = get_params();
+ if (op_ret < 0) {
+ return;
+ }
+
+ RGWUserInfo info;
+ rgw_user user_id(user_name);
+ op_ret = rgw_get_user_info_by_uid(store, user_id, info);
+ if (op_ret < 0) {
+ op_ret = -ERR_NO_SUCH_ENTITY;
+ return;
+ }
+
+ map<string, bufferlist> uattrs;
+ op_ret = rgw_get_user_attrs_by_uid(store, user_id, uattrs);
+ if (op_ret == -ENOENT) {
+ op_ret = -ERR_NO_SUCH_ENTITY;
+ return;
+ }
+
+ map<string, string> policies;
+ if (auto it = uattrs.find(RGW_ATTR_USER_POLICY); it != uattrs.end()) {
+ bufferlist out_bl = uattrs[RGW_ATTR_USER_POLICY];
+ decode(policies, out_bl);
+
+ if (auto p = policies.find(policy_name); p != policies.end()) {
+ bufferlist in_bl;
+ policies.erase(p);
+ encode(policies, in_bl);
+ uattrs[RGW_ATTR_USER_POLICY] = in_bl;
+
+ RGWObjVersionTracker objv_tracker;
+ op_ret = rgw_store_user_info(store, info, &info, &objv_tracker, real_time(), false, &uattrs);
+ if (op_ret < 0) {
+ op_ret = -ERR_INTERNAL_ERROR;
+ }
+ if (op_ret == 0) {
+ s->formatter->open_object_section("DeleteUserPoliciesResponse");
+ s->formatter->open_object_section("ResponseMetadata");
+ s->formatter->dump_string("RequestId", s->trans_id);
+ s->formatter->close_section();
+ s->formatter->close_section();
+ }
+ } else {
+ op_ret = -ERR_NO_SUCH_ENTITY;
+ return;
+ }
+ } else {
+ op_ret = -ERR_NO_SUCH_ENTITY;
+ return;
+ }
+}
diff --git a/src/rgw/rgw_rest_user_policy.h b/src/rgw/rgw_rest_user_policy.h
new file mode 100644
index 00000000..895f4e61
--- /dev/null
+++ b/src/rgw/rgw_rest_user_policy.h
@@ -0,0 +1,76 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_REST_USER_POLICY_H
+#define CEPH_RGW_REST_USER_POLICY_H
+
+class RGWRestUserPolicy : public RGWRESTOp {
+protected:
+ static constexpr int MAX_POLICY_NAME_LEN = 128;
+ string policy_name;
+ string user_name;
+ string policy;
+
+ bool validate_input();
+
+public:
+ int verify_permission() override;
+ virtual uint64_t get_op() = 0;
+ void send_response() override;
+ void dump(Formatter *f) const;
+};
+
+class RGWUserPolicyRead : public RGWRestUserPolicy {
+public:
+ RGWUserPolicyRead() = default;
+ int check_caps(RGWUserCaps& caps) override;
+};
+
+class RGWUserPolicyWrite : public RGWRestUserPolicy {
+public:
+ RGWUserPolicyWrite() = default;
+ int check_caps(RGWUserCaps& caps) override;
+};
+
+class RGWPutUserPolicy : public RGWUserPolicyWrite {
+public:
+ RGWPutUserPolicy() = default;
+ void execute() override;
+ int get_params();
+ const char* name() const override { return "put_user-policy"; }
+ uint64_t get_op() override;
+ RGWOpType get_type() override { return RGW_OP_PUT_USER_POLICY; }
+};
+
+class RGWGetUserPolicy : public RGWUserPolicyRead {
+public:
+ RGWGetUserPolicy() = default;
+ void execute() override;
+ int get_params();
+ const char* name() const override { return "get_user_policy"; }
+ uint64_t get_op() override;
+ RGWOpType get_type() override { return RGW_OP_GET_USER_POLICY; }
+};
+
+class RGWListUserPolicies : public RGWUserPolicyRead {
+public:
+ RGWListUserPolicies() = default;
+ void execute() override;
+ int get_params();
+ const char* name() const override { return "list_user_policies"; }
+ uint64_t get_op() override;
+ RGWOpType get_type() override { return RGW_OP_LIST_USER_POLICIES; }
+};
+
+class RGWDeleteUserPolicy : public RGWUserPolicyWrite {
+public:
+ RGWDeleteUserPolicy() = default;
+ void execute() override;
+ int get_params();
+ const char* name() const override { return "delete_user_policy"; }
+ uint64_t get_op() override;
+ RGWOpType get_type() override { return RGW_OP_DELETE_USER_POLICY; }
+};
+
+#endif /* CEPH_RGW_REST_USER_POLICY_H */
+
diff --git a/src/rgw/rgw_role.cc b/src/rgw/rgw_role.cc
new file mode 100644
index 00000000..6e6b137a
--- /dev/null
+++ b/src/rgw/rgw_role.cc
@@ -0,0 +1,502 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <errno.h>
+#include <ctime>
+#include <regex>
+
+#include "common/errno.h"
+#include "common/Formatter.h"
+#include "common/ceph_json.h"
+#include "common/ceph_time.h"
+#include "rgw_rados.h"
+#include "rgw_zone.h"
+
+#include "include/types.h"
+#include "rgw_string.h"
+
+#include "rgw_common.h"
+#include "rgw_tools.h"
+#include "rgw_role.h"
+
+#include "services/svc_zone.h"
+#include "services/svc_sys_obj.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+
+const string RGWRole::role_name_oid_prefix = "role_names.";
+const string RGWRole::role_oid_prefix = "roles.";
+const string RGWRole::role_path_oid_prefix = "role_paths.";
+const string RGWRole::role_arn_prefix = "arn:aws:iam::";
+
+int RGWRole::store_info(bool exclusive)
+{
+ using ceph::encode;
+ string oid = get_info_oid_prefix() + id;
+
+ bufferlist bl;
+ encode(*this, bl);
+ return rgw_put_system_obj(store, store->svc.zone->get_zone_params().roles_pool, oid,
+ bl, exclusive, NULL, real_time(), NULL);
+}
+
+int RGWRole::store_name(bool exclusive)
+{
+ RGWNameToId nameToId;
+ nameToId.obj_id = id;
+
+ string oid = tenant + get_names_oid_prefix() + name;
+
+ bufferlist bl;
+ using ceph::encode;
+ encode(nameToId, bl);
+ return rgw_put_system_obj(store, store->svc.zone->get_zone_params().roles_pool, oid,
+ bl, exclusive, NULL, real_time(), NULL);
+}
+
+int RGWRole::store_path(bool exclusive)
+{
+ string oid = tenant + get_path_oid_prefix() + path + get_info_oid_prefix() + id;
+
+ bufferlist bl;
+ return rgw_put_system_obj(store, store->svc.zone->get_zone_params().roles_pool, oid,
+ bl, exclusive, NULL, real_time(), NULL);
+}
+
+int RGWRole::create(bool exclusive)
+{
+ int ret;
+
+ if (! validate_input()) {
+ return -EINVAL;
+ }
+
+ /* check to see the name is not used */
+ ret = read_id(name, tenant, id);
+ if (exclusive && ret == 0) {
+ ldout(cct, 0) << "ERROR: name " << name << " already in use for role id "
+ << id << dendl;
+ return -EEXIST;
+ } else if ( ret < 0 && ret != -ENOENT) {
+ ldout(cct, 0) << "failed reading role id " << id << ": "
+ << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ /* create unique id */
+ uuid_d new_uuid;
+ char uuid_str[37];
+ new_uuid.generate_random();
+ new_uuid.print(uuid_str);
+ id = uuid_str;
+
+ //arn
+ arn = role_arn_prefix + tenant + ":role" + path + name;
+
+ // Creation time
+ real_clock::time_point t = real_clock::now();
+
+ struct timeval tv;
+ real_clock::to_timeval(t, tv);
+
+ char buf[30];
+ struct tm result;
+ gmtime_r(&tv.tv_sec, &result);
+ strftime(buf,30,"%Y-%m-%dT%H:%M:%S", &result);
+ sprintf(buf + strlen(buf),".%dZ",(int)tv.tv_usec/1000);
+ creation_date.assign(buf, strlen(buf));
+
+ auto& pool = store->svc.zone->get_zone_params().roles_pool;
+ ret = store_info(exclusive);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: storing role info in pool: " << pool.name << ": "
+ << id << ": " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ ret = store_name(exclusive);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: storing role name in pool: " << pool.name << ": "
+ << name << ": " << cpp_strerror(-ret) << dendl;
+
+ //Delete the role info that was stored in the previous call
+ string oid = get_info_oid_prefix() + id;
+ int info_ret = rgw_delete_system_obj(store, pool, oid, NULL);
+ if (info_ret < 0) {
+ ldout(cct, 0) << "ERROR: cleanup of role id from pool: " << pool.name << ": "
+ << id << ": " << cpp_strerror(-info_ret) << dendl;
+ }
+ return ret;
+ }
+
+ ret = store_path(exclusive);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: storing role path in pool: " << pool.name << ": "
+ << path << ": " << cpp_strerror(-ret) << dendl;
+ //Delete the role info that was stored in the previous call
+ string oid = get_info_oid_prefix() + id;
+ int info_ret = rgw_delete_system_obj(store, pool, oid, NULL);
+ if (info_ret < 0) {
+ ldout(cct, 0) << "ERROR: cleanup of role id from pool: " << pool.name << ": "
+ << id << ": " << cpp_strerror(-info_ret) << dendl;
+ }
+ //Delete role name that was stored in previous call
+ oid = tenant + get_names_oid_prefix() + name;
+ int name_ret = rgw_delete_system_obj(store, pool, oid, NULL);
+ if (name_ret < 0) {
+ ldout(cct, 0) << "ERROR: cleanup of role name from pool: " << pool.name << ": "
+ << name << ": " << cpp_strerror(-name_ret) << dendl;
+ }
+ return ret;
+ }
+ return 0;
+}
+
+int RGWRole::delete_obj()
+{
+ auto& pool = store->svc.zone->get_zone_params().roles_pool;
+
+ int ret = read_name();
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = read_info();
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (! perm_policy_map.empty()) {
+ return -ERR_DELETE_CONFLICT;
+ }
+
+ // Delete id
+ string oid = get_info_oid_prefix() + id;
+ ret = rgw_delete_system_obj(store, pool, oid, NULL);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: deleting role id from pool: " << pool.name << ": "
+ << id << ": " << cpp_strerror(-ret) << dendl;
+ }
+
+ // Delete name
+ oid = tenant + get_names_oid_prefix() + name;
+ ret = rgw_delete_system_obj(store, pool, oid, NULL);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: deleting role name from pool: " << pool.name << ": "
+ << name << ": " << cpp_strerror(-ret) << dendl;
+ }
+
+ // Delete path
+ oid = tenant + get_path_oid_prefix() + path + get_info_oid_prefix() + id;
+ ret = rgw_delete_system_obj(store, pool, oid, NULL);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: deleting role path from pool: " << pool.name << ": "
+ << path << ": " << cpp_strerror(-ret) << dendl;
+ }
+ return ret;
+}
+
+int RGWRole::get()
+{
+ int ret = read_name();
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = read_info();
+ if (ret < 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWRole::get_by_id()
+{
+ int ret = read_info();
+ if (ret < 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWRole::update()
+{
+ auto& pool = store->svc.zone->get_zone_params().roles_pool;
+
+ int ret = store_info(false);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: storing info in pool: " << pool.name << ": "
+ << id << ": " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ return 0;
+}
+
+void RGWRole::set_perm_policy(const string& policy_name, const string& perm_policy)
+{
+ perm_policy_map[policy_name] = perm_policy;
+}
+
+vector<string> RGWRole::get_role_policy_names()
+{
+ vector<string> policy_names;
+ for (const auto& it : perm_policy_map)
+ {
+ policy_names.push_back(std::move(it.first));
+ }
+
+ return policy_names;
+}
+
+int RGWRole::get_role_policy(const string& policy_name, string& perm_policy)
+{
+ const auto it = perm_policy_map.find(policy_name);
+ if (it == perm_policy_map.end()) {
+ ldout(cct, 0) << "ERROR: Policy name: " << policy_name << " not found" << dendl;
+ return -ENOENT;
+ } else {
+ perm_policy = it->second;
+ }
+ return 0;
+}
+
+int RGWRole::delete_policy(const string& policy_name)
+{
+ const auto& it = perm_policy_map.find(policy_name);
+ if (it == perm_policy_map.end()) {
+ ldout(cct, 0) << "ERROR: Policy name: " << policy_name << " not found" << dendl;
+ return -ENOENT;
+ } else {
+ perm_policy_map.erase(it);
+ }
+ return 0;
+}
+
+void RGWRole::dump(Formatter *f) const
+{
+ encode_json("RoleId", id , f);
+ encode_json("RoleName", name , f);
+ encode_json("Path", path, f);
+ encode_json("Arn", arn, f);
+ encode_json("CreateDate", creation_date, f);
+ encode_json("MaxSessionDuration", max_session_duration, f);
+ encode_json("AssumeRolePolicyDocument", trust_policy, f);
+}
+
+void RGWRole::decode_json(JSONObj *obj)
+{
+ JSONDecoder::decode_json("id", id, obj);
+ JSONDecoder::decode_json("name", name, obj);
+ JSONDecoder::decode_json("path", path, obj);
+ JSONDecoder::decode_json("arn", arn, obj);
+ JSONDecoder::decode_json("create_date", creation_date, obj);
+ JSONDecoder::decode_json("max_session_duration", max_session_duration, obj);
+ JSONDecoder::decode_json("assume_role_policy_document", trust_policy, obj);
+}
+
+int RGWRole::read_id(const string& role_name, const string& tenant, string& role_id)
+{
+ auto& pool = store->svc.zone->get_zone_params().roles_pool;
+ string oid = tenant + get_names_oid_prefix() + role_name;
+ bufferlist bl;
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+
+ int ret = rgw_get_system_obj(store, obj_ctx, pool, oid, bl, NULL, NULL);
+ if (ret < 0) {
+ return ret;
+ }
+
+ RGWNameToId nameToId;
+ try {
+ auto iter = bl.cbegin();
+ using ceph::decode;
+ decode(nameToId, iter);
+ } catch (buffer::error& err) {
+ ldout(cct, 0) << "ERROR: failed to decode role from pool: " << pool.name << ": "
+ << role_name << dendl;
+ return -EIO;
+ }
+ role_id = nameToId.obj_id;
+ return 0;
+}
+
+int RGWRole::read_info()
+{
+ auto& pool = store->svc.zone->get_zone_params().roles_pool;
+ string oid = get_info_oid_prefix() + id;
+ bufferlist bl;
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+
+ int ret = rgw_get_system_obj(store, obj_ctx, pool, oid, bl, NULL, NULL);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: failed reading role info from pool: " << pool.name <<
+ ": " << id << ": " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ try {
+ using ceph::decode;
+ auto iter = bl.cbegin();
+ decode(*this, iter);
+ } catch (buffer::error& err) {
+ ldout(cct, 0) << "ERROR: failed to decode role info from pool: " << pool.name <<
+ ": " << id << dendl;
+ return -EIO;
+ }
+
+ return 0;
+}
+
+int RGWRole::read_name()
+{
+ auto& pool = store->svc.zone->get_zone_params().roles_pool;
+ string oid = tenant + get_names_oid_prefix() + name;
+ bufferlist bl;
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+
+ int ret = rgw_get_system_obj(store, obj_ctx, pool, oid, bl, NULL, NULL);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: failed reading role name from pool: " << pool.name << ": "
+ << name << ": " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ RGWNameToId nameToId;
+ try {
+ using ceph::decode;
+ auto iter = bl.cbegin();
+ decode(nameToId, iter);
+ } catch (buffer::error& err) {
+ ldout(cct, 0) << "ERROR: failed to decode role name from pool: " << pool.name << ": "
+ << name << dendl;
+ return -EIO;
+ }
+ id = nameToId.obj_id;
+ return 0;
+}
+
+bool RGWRole::validate_input()
+{
+ if (name.length() > MAX_ROLE_NAME_LEN) {
+ ldout(cct, 0) << "ERROR: Invalid name length " << dendl;
+ return false;
+ }
+
+ if (path.length() > MAX_PATH_NAME_LEN) {
+ ldout(cct, 0) << "ERROR: Invalid path length " << dendl;
+ return false;
+ }
+
+ std::regex regex_name("[A-Za-z0-9:=,.@-]+");
+ if (! std::regex_match(name, regex_name)) {
+ ldout(cct, 0) << "ERROR: Invalid chars in name " << dendl;
+ return false;
+ }
+
+ std::regex regex_path("(/[!-~]+/)|(/)");
+ if (! std::regex_match(path,regex_path)) {
+ ldout(cct, 0) << "ERROR: Invalid chars in path " << dendl;
+ return false;
+ }
+
+ if (max_session_duration < SESSION_DURATION_MIN ||
+ max_session_duration > SESSION_DURATION_MAX) {
+ ldout(cct, 0) << "ERROR: Invalid session duration, should be between 3600 and 43200 seconds " << dendl;
+ return false;
+ }
+ return true;
+}
+
+void RGWRole::extract_name_tenant(const std::string& str)
+{
+ size_t pos = str.find('$');
+ if (pos != std::string::npos) {
+ tenant = str.substr(0, pos);
+ name = str.substr(pos + 1);
+ }
+}
+
+void RGWRole::update_trust_policy(string& trust_policy)
+{
+ this->trust_policy = trust_policy;
+}
+
+int RGWRole::get_roles_by_path_prefix(RGWRados *store,
+ CephContext *cct,
+ const string& path_prefix,
+ const string& tenant,
+ vector<RGWRole>& roles)
+{
+ auto pool = store->svc.zone->get_zone_params().roles_pool;
+ string prefix;
+
+ // List all roles if path prefix is empty
+ if (! path_prefix.empty()) {
+ prefix = tenant + role_path_oid_prefix + path_prefix;
+ } else {
+ prefix = tenant + role_path_oid_prefix;
+ }
+
+ //Get the filtered objects
+ list<string> result;
+ bool is_truncated;
+ RGWListRawObjsCtx ctx;
+ do {
+ list<string> oids;
+ int r = store->list_raw_objects(pool, prefix, 1000, ctx, oids, &is_truncated);
+ if (r < 0) {
+ ldout(cct, 0) << "ERROR: listing filtered objects failed: " << pool.name << ": "
+ << prefix << ": " << cpp_strerror(-r) << dendl;
+ return r;
+ }
+ for (const auto& iter : oids) {
+ result.push_back(iter.substr(role_path_oid_prefix.size()));
+ }
+ } while (is_truncated);
+
+ for (const auto& it : result) {
+ //Find the role oid prefix from the end
+ size_t pos = it.rfind(role_oid_prefix);
+ if (pos == string::npos) {
+ continue;
+ }
+ // Split the result into path and info_oid + id
+ string path = it.substr(0, pos);
+
+ /*Make sure that prefix is part of path (False results could've been returned)
+ because of the role info oid + id appended to the path)*/
+ if(path_prefix.empty() || path.find(path_prefix) != string::npos) {
+ //Get id from info oid prefix + id
+ string id = it.substr(pos + role_oid_prefix.length());
+
+ RGWRole role(cct, store);
+ role.set_id(id);
+ int ret = role.read_info();
+ if (ret < 0) {
+ return ret;
+ }
+ roles.push_back(std::move(role));
+ }
+ }
+
+ return 0;
+}
+
+const string& RGWRole::get_names_oid_prefix()
+{
+ return role_name_oid_prefix;
+}
+
+const string& RGWRole::get_info_oid_prefix()
+{
+ return role_oid_prefix;
+}
+
+const string& RGWRole::get_path_oid_prefix()
+{
+ return role_path_oid_prefix;
+}
diff --git a/src/rgw/rgw_role.h b/src/rgw/rgw_role.h
new file mode 100644
index 00000000..90976099
--- /dev/null
+++ b/src/rgw/rgw_role.h
@@ -0,0 +1,161 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_ROLE_H
+#define CEPH_RGW_ROLE_H
+
+#include <string>
+
+#include "common/ceph_context.h"
+
+#include "rgw/rgw_rados.h"
+
+class RGWRole
+{
+ using string = std::string;
+ static const string role_name_oid_prefix;
+ static const string role_oid_prefix;
+ static const string role_path_oid_prefix;
+ static const string role_arn_prefix;
+ static constexpr int MAX_ROLE_NAME_LEN = 64;
+ static constexpr int MAX_PATH_NAME_LEN = 512;
+ static constexpr uint64_t SESSION_DURATION_MIN = 3600; // in seconds
+ static constexpr uint64_t SESSION_DURATION_MAX = 43200; // in seconds
+
+ CephContext *cct;
+ RGWRados *store;
+ string id;
+ string name;
+ string path;
+ string arn;
+ string creation_date;
+ string trust_policy;
+ map<string, string> perm_policy_map;
+ string tenant;
+ uint64_t max_session_duration;
+
+ int store_info(bool exclusive);
+ int store_name(bool exclusive);
+ int store_path(bool exclusive);
+ int read_id(const string& role_name, const string& tenant, string& role_id);
+ int read_name();
+ int read_info();
+ void set_id(const string& id) { this->id = id; }
+ bool validate_input();
+ void extract_name_tenant(const std::string& str);
+
+public:
+ RGWRole(CephContext *cct,
+ RGWRados *store,
+ string name,
+ string path,
+ string trust_policy,
+ string tenant,
+ string max_session_duration_str="")
+ : cct(cct),
+ store(store),
+ name(std::move(name)),
+ path(std::move(path)),
+ trust_policy(std::move(trust_policy)),
+ tenant(std::move(tenant)) {
+ if (this->path.empty())
+ this->path = "/";
+ extract_name_tenant(this->name);
+ if (max_session_duration_str.empty()) {
+ max_session_duration = SESSION_DURATION_MIN;
+ } else {
+ max_session_duration = std::stoull(max_session_duration_str);
+ }
+ }
+
+ RGWRole(CephContext *cct,
+ RGWRados *store,
+ string name,
+ string tenant)
+ : cct(cct),
+ store(store),
+ name(std::move(name)),
+ tenant(std::move(tenant)) {
+ extract_name_tenant(this->name);
+ }
+
+ RGWRole(CephContext *cct,
+ RGWRados *store,
+ string id)
+ : cct(cct),
+ store(store),
+ id(std::move(id)) {}
+
+ RGWRole(CephContext *cct,
+ RGWRados *store)
+ : cct(cct),
+ store(store) {}
+
+ RGWRole() {}
+
+ ~RGWRole() = default;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(3, 1, bl);
+ encode(id, bl);
+ encode(name, bl);
+ encode(path, bl);
+ encode(arn, bl);
+ encode(creation_date, bl);
+ encode(trust_policy, bl);
+ encode(perm_policy_map, bl);
+ encode(tenant, bl);
+ encode(max_session_duration, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(2, bl);
+ decode(id, bl);
+ decode(name, bl);
+ decode(path, bl);
+ decode(arn, bl);
+ decode(creation_date, bl);
+ decode(trust_policy, bl);
+ decode(perm_policy_map, bl);
+ if (struct_v >= 2) {
+ decode(tenant, bl);
+ }
+ if (struct_v >= 3) {
+ decode(max_session_duration, bl);
+ }
+ DECODE_FINISH(bl);
+ }
+
+ const string& get_id() const { return id; }
+ const string& get_name() const { return name; }
+ const string& get_path() const { return path; }
+ const string& get_create_date() const { return creation_date; }
+ const string& get_assume_role_policy() const { return trust_policy;}
+ const uint64_t& get_max_session_duration() const { return max_session_duration; }
+
+ int create(bool exclusive);
+ int delete_obj();
+ int get();
+ int get_by_id();
+ int update();
+ void update_trust_policy(string& trust_policy);
+ void set_perm_policy(const string& policy_name, const string& perm_policy);
+ vector<string> get_role_policy_names();
+ int get_role_policy(const string& policy_name, string& perm_policy);
+ int delete_policy(const string& policy_name);
+ void dump(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+
+ static const string& get_names_oid_prefix();
+ static const string& get_info_oid_prefix();
+ static const string& get_path_oid_prefix();
+ static int get_roles_by_path_prefix(RGWRados *store,
+ CephContext *cct,
+ const string& path_prefix,
+ const string& tenant,
+ vector<RGWRole>& roles);
+};
+WRITE_CLASS_ENCODER(RGWRole)
+#endif /* CEPH_RGW_ROLE_H */
+
diff --git a/src/rgw/rgw_service.cc b/src/rgw/rgw_service.cc
new file mode 100644
index 00000000..0369806c
--- /dev/null
+++ b/src/rgw/rgw_service.cc
@@ -0,0 +1,191 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "rgw_service.h"
+
+#include "services/svc_finisher.h"
+#include "services/svc_notify.h"
+#include "services/svc_rados.h"
+#include "services/svc_zone.h"
+#include "services/svc_zone_utils.h"
+#include "services/svc_quota.h"
+#include "services/svc_sync_modules.h"
+#include "services/svc_sys_obj.h"
+#include "services/svc_sys_obj_cache.h"
+#include "services/svc_sys_obj_core.h"
+
+#include "common/errno.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+
+RGWServices_Def::RGWServices_Def() = default;
+RGWServices_Def::~RGWServices_Def()
+{
+ shutdown();
+}
+
+int RGWServices_Def::init(CephContext *cct,
+ bool have_cache,
+ bool raw)
+{
+ finisher = std::make_unique<RGWSI_Finisher>(cct);
+ notify = std::make_unique<RGWSI_Notify>(cct);
+ rados = std::make_unique<RGWSI_RADOS>(cct);
+ zone = std::make_unique<RGWSI_Zone>(cct);
+ zone_utils = std::make_unique<RGWSI_ZoneUtils>(cct);
+ quota = std::make_unique<RGWSI_Quota>(cct);
+ sync_modules = std::make_unique<RGWSI_SyncModules>(cct);
+ sysobj = std::make_unique<RGWSI_SysObj>(cct);
+ sysobj_core = std::make_unique<RGWSI_SysObj_Core>(cct);
+
+ if (have_cache) {
+ sysobj_cache = std::make_unique<RGWSI_SysObj_Cache>(cct);
+ }
+ finisher->init();
+ notify->init(zone.get(), rados.get(), finisher.get());
+ rados->init();
+ zone->init(sysobj.get(), rados.get(), sync_modules.get());
+ zone_utils->init(rados.get(), zone.get());
+ quota->init(zone.get());
+ sync_modules->init();
+ sysobj_core->core_init(rados.get(), zone.get());
+ if (have_cache) {
+ sysobj_cache->init(rados.get(), zone.get(), notify.get());
+ sysobj->init(rados.get(), sysobj_cache.get());
+ } else {
+ sysobj->init(rados.get(), sysobj_core.get());
+ }
+
+ can_shutdown = true;
+
+ int r = finisher->start();
+ if (r < 0) {
+ ldout(cct, 0) << "ERROR: failed to start finisher service (" << cpp_strerror(-r) << dendl;
+ return r;
+ }
+
+ if (!raw) {
+ r = notify->start();
+ if (r < 0) {
+ ldout(cct, 0) << "ERROR: failed to start notify service (" << cpp_strerror(-r) << dendl;
+ return r;
+ }
+ }
+
+ r = rados->start();
+ if (r < 0) {
+ ldout(cct, 0) << "ERROR: failed to start rados service (" << cpp_strerror(-r) << dendl;
+ return r;
+ }
+
+ if (!raw) {
+ r = zone->start();
+ if (r < 0) {
+ ldout(cct, 0) << "ERROR: failed to start zone service (" << cpp_strerror(-r) << dendl;
+ return r;
+ }
+ }
+
+ r = zone_utils->start();
+ if (r < 0) {
+ ldout(cct, 0) << "ERROR: failed to start zone_utils service (" << cpp_strerror(-r) << dendl;
+ return r;
+ }
+
+ r = quota->start();
+ if (r < 0) {
+ ldout(cct, 0) << "ERROR: failed to start quota service (" << cpp_strerror(-r) << dendl;
+ return r;
+ }
+
+ r = sysobj_core->start();
+ if (r < 0) {
+ ldout(cct, 0) << "ERROR: failed to start sysobj_core service (" << cpp_strerror(-r) << dendl;
+ return r;
+ }
+
+ if (have_cache) {
+ r = sysobj_cache->start();
+ if (r < 0) {
+ ldout(cct, 0) << "ERROR: failed to start sysobj_cache service (" << cpp_strerror(-r) << dendl;
+ return r;
+ }
+ }
+
+ r = sysobj->start();
+ if (r < 0) {
+ ldout(cct, 0) << "ERROR: failed to start sysobj service (" << cpp_strerror(-r) << dendl;
+ return r;
+ }
+
+ /* cache or core services will be started by sysobj */
+
+ return 0;
+}
+
+void RGWServices_Def::shutdown()
+{
+ if (!can_shutdown) {
+ return;
+ }
+
+ if (has_shutdown) {
+ return;
+ }
+
+ sysobj->shutdown();
+ sysobj_core->shutdown();
+ notify->shutdown();
+ if (sysobj_cache) {
+ sysobj_cache->shutdown();
+ }
+ quota->shutdown();
+ zone_utils->shutdown();
+ zone->shutdown();
+ rados->shutdown();
+
+ has_shutdown = true;
+
+}
+
+
+int RGWServices::do_init(CephContext *cct, bool have_cache, bool raw)
+{
+ int r = _svc.init(cct, have_cache, raw);
+ if (r < 0) {
+ return r;
+ }
+
+ finisher = _svc.finisher.get();
+ notify = _svc.notify.get();
+ rados = _svc.rados.get();
+ zone = _svc.zone.get();
+ zone_utils = _svc.zone_utils.get();
+ quota = _svc.quota.get();
+ sync_modules = _svc.sync_modules.get();
+ sysobj = _svc.sysobj.get();
+ cache = _svc.sysobj_cache.get();
+ core = _svc.sysobj_core.get();
+
+ return 0;
+}
+
+int RGWServiceInstance::start()
+{
+ if (start_state != StateInit) {
+ return 0;
+ }
+
+ start_state = StateStarting;; /* setting started prior to do_start() on purpose so that circular
+ references can call start() on each other */
+
+ int r = do_start();
+ if (r < 0) {
+ return r;
+ }
+
+ start_state = StateStarted;
+
+ return 0;
+}
diff --git a/src/rgw/rgw_service.h b/src/rgw/rgw_service.h
new file mode 100644
index 00000000..316bacdb
--- /dev/null
+++ b/src/rgw/rgw_service.h
@@ -0,0 +1,112 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_SERVICE_H
+#define CEPH_RGW_SERVICE_H
+
+
+#include <string>
+#include <vector>
+#include <memory>
+
+#include "rgw/rgw_common.h"
+
+struct RGWServices_Def;
+
+class RGWServiceInstance
+{
+ friend struct RGWServices_Def;
+
+protected:
+ CephContext *cct;
+
+ enum StartState {
+ StateInit = 0,
+ StateStarting = 1,
+ StateStarted = 2,
+ } start_state{StateInit};
+
+ virtual void shutdown() {}
+ virtual int do_start() {
+ return 0;
+ }
+public:
+ RGWServiceInstance(CephContext *_cct) : cct(_cct) {}
+ virtual ~RGWServiceInstance() {}
+
+ int start();
+ bool is_started() {
+ return (start_state == StateStarted);
+ }
+
+ CephContext *ctx() {
+ return cct;
+ }
+};
+
+class RGWSI_Finisher;
+class RGWSI_Notify;
+class RGWSI_RADOS;
+class RGWSI_Zone;
+class RGWSI_ZoneUtils;
+class RGWSI_Quota;
+class RGWSI_SyncModules;
+class RGWSI_SysObj;
+class RGWSI_SysObj_Core;
+class RGWSI_SysObj_Cache;
+
+struct RGWServices_Def
+{
+ bool can_shutdown{false};
+ bool has_shutdown{false};
+
+ std::unique_ptr<RGWSI_Finisher> finisher;
+ std::unique_ptr<RGWSI_Notify> notify;
+ std::unique_ptr<RGWSI_RADOS> rados;
+ std::unique_ptr<RGWSI_Zone> zone;
+ std::unique_ptr<RGWSI_ZoneUtils> zone_utils;
+ std::unique_ptr<RGWSI_Quota> quota;
+ std::unique_ptr<RGWSI_SyncModules> sync_modules;
+ std::unique_ptr<RGWSI_SysObj> sysobj;
+ std::unique_ptr<RGWSI_SysObj_Core> sysobj_core;
+ std::unique_ptr<RGWSI_SysObj_Cache> sysobj_cache;
+
+ RGWServices_Def();
+ ~RGWServices_Def();
+
+ int init(CephContext *cct, bool have_cache, bool raw_storage);
+ void shutdown();
+};
+
+
+struct RGWServices
+{
+ RGWServices_Def _svc;
+
+ RGWSI_Finisher *finisher{nullptr};
+ RGWSI_Notify *notify{nullptr};
+ RGWSI_RADOS *rados{nullptr};
+ RGWSI_Zone *zone{nullptr};
+ RGWSI_ZoneUtils *zone_utils{nullptr};
+ RGWSI_Quota *quota{nullptr};
+ RGWSI_SyncModules *sync_modules{nullptr};
+ RGWSI_SysObj *sysobj{nullptr};
+ RGWSI_SysObj_Cache *cache{nullptr};
+ RGWSI_SysObj_Core *core{nullptr};
+
+ int do_init(CephContext *cct, bool have_cache, bool raw_storage);
+
+ int init(CephContext *cct, bool have_cache) {
+ return do_init(cct, have_cache, false);
+ }
+
+ int init_raw(CephContext *cct, bool have_cache) {
+ return do_init(cct, have_cache, true);
+ }
+ void shutdown() {
+ _svc.shutdown();
+ }
+};
+
+
+#endif
diff --git a/src/rgw/rgw_string.cc b/src/rgw/rgw_string.cc
new file mode 100644
index 00000000..d49bba71
--- /dev/null
+++ b/src/rgw/rgw_string.cc
@@ -0,0 +1,45 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "rgw_string.h"
+
+static bool char_eq(char c1, char c2)
+{
+ return c1 == c2;
+}
+
+static bool ci_char_eq(char c1, char c2)
+{
+ return tolower(c1) == tolower(c2);
+}
+
+bool match_wildcards(boost::string_view pattern, boost::string_view input,
+ uint32_t flags)
+{
+ const auto eq = (flags & MATCH_CASE_INSENSITIVE) ? &ci_char_eq : &char_eq;
+
+ auto it1 = pattern.begin();
+ auto it2 = input.begin();
+ while (true) {
+ if (it1 == pattern.end())
+ return it2 == input.end();
+ if (*it1 == '*') {
+ if (it1 + 1 == pattern.end())
+ return true;
+ if (it2 == input.end() || eq(*(it1 + 1), *it2))
+ ++it1;
+ else
+ ++it2;
+ continue;
+ }
+ if (it2 == input.end())
+ return false;
+ if (*it1 == '?' || eq(*it1, *it2)) {
+ ++it1;
+ ++it2;
+ continue;
+ }
+ return false;
+ }
+ return false;
+}
diff --git a/src/rgw/rgw_string.h b/src/rgw/rgw_string.h
new file mode 100644
index 00000000..c5666753
--- /dev/null
+++ b/src/rgw/rgw_string.h
@@ -0,0 +1,236 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_STRING_H
+#define CEPH_RGW_STRING_H
+
+#include <errno.h>
+#include <stdlib.h>
+#include <limits.h>
+
+#include <boost/container/small_vector.hpp>
+#include <boost/utility/string_view.hpp>
+
+struct ltstr_nocase
+{
+ bool operator()(const std::string& s1, const std::string& s2) const
+ {
+ return strcasecmp(s1.c_str(), s2.c_str()) < 0;
+ }
+};
+
+static inline int stringcasecmp(const std::string& s1, const std::string& s2)
+{
+ return strcasecmp(s1.c_str(), s2.c_str());
+}
+
+static inline int stringcasecmp(const std::string& s1, const char *s2)
+{
+ return strcasecmp(s1.c_str(), s2);
+}
+
+static inline int stringcasecmp(const std::string& s1, int ofs, int size, const std::string& s2)
+{
+ return strncasecmp(s1.c_str() + ofs, s2.c_str(), size);
+}
+
+static inline int stringtoll(const std::string& s, int64_t *val)
+{
+ char *end;
+
+ long long result = strtoll(s.c_str(), &end, 10);
+ if (result == LLONG_MAX)
+ return -EINVAL;
+
+ if (*end)
+ return -EINVAL;
+
+ *val = (int64_t)result;
+
+ return 0;
+}
+
+static inline int stringtoull(const std::string& s, uint64_t *val)
+{
+ char *end;
+
+ unsigned long long result = strtoull(s.c_str(), &end, 10);
+ if (result == ULLONG_MAX)
+ return -EINVAL;
+
+ if (*end)
+ return -EINVAL;
+
+ *val = (uint64_t)result;
+
+ return 0;
+}
+
+static inline int stringtol(const std::string& s, int32_t *val)
+{
+ char *end;
+
+ long result = strtol(s.c_str(), &end, 10);
+ if (result == LONG_MAX)
+ return -EINVAL;
+
+ if (*end)
+ return -EINVAL;
+
+ *val = (int32_t)result;
+
+ return 0;
+}
+
+static inline int stringtoul(const std::string& s, uint32_t *val)
+{
+ char *end;
+
+ unsigned long result = strtoul(s.c_str(), &end, 10);
+ if (result == ULONG_MAX)
+ return -EINVAL;
+
+ if (*end)
+ return -EINVAL;
+
+ *val = (uint32_t)result;
+
+ return 0;
+}
+
+/* A converter between boost::string_view and null-terminated C-strings.
+ * It copies memory while trying to utilize the local memory instead of
+ * issuing dynamic allocations. */
+template<std::size_t N = 128>
+static inline boost::container::small_vector<char, N>
+sview2cstr(const boost::string_view& sv)
+{
+ boost::container::small_vector<char, N> cstr;
+ cstr.reserve(sv.size() + sizeof('\0'));
+
+ cstr.assign(std::begin(sv), std::end(sv));
+ cstr.push_back('\0');
+
+ return cstr;
+}
+
+/* std::strlen() isn't guaranteed to be computable at compile-time. Although
+ * newer GCCs actually do that, Clang doesn't. Please be aware this function
+ * IS NOT A DROP-IN REPLACEMENT FOR STRLEN -- it returns a different result
+ * for strings having \0 in the middle. */
+template<size_t N>
+static inline constexpr size_t sarrlen(const char (&arr)[N]) {
+ return N - 1;
+}
+
+namespace detail {
+
+// variadic sum() to add up string lengths for reserve()
+static inline constexpr size_t sum() { return 0; }
+template <typename... Args>
+constexpr size_t sum(size_t v, Args... args) { return v + sum(args...); }
+
+// traits for string_size()
+template <typename T>
+struct string_traits {
+ static constexpr size_t size(const T& s) { return s.size(); }
+};
+// specializations for char*/const char* use strlen()
+template <>
+struct string_traits<const char*> {
+ static size_t size(const char* s) { return std::strlen(s); }
+};
+template <>
+struct string_traits<char*> : string_traits<const char*> {};
+// constexpr specializations for char[]/const char[]
+template <std::size_t N>
+struct string_traits<const char[N]> {
+ static constexpr size_t size_(const char* s, size_t i) {
+ return i < N ? (*(s + i) == '\0' ? i : size_(s, i + 1))
+ : throw std::invalid_argument("Unterminated string constant.");
+ }
+ static constexpr size_t size(const char(&s)[N]) { return size_(s, 0); }
+};
+template <std::size_t N>
+struct string_traits<char[N]> : string_traits<const char[N]> {};
+
+// helpers for string_cat_reserve()
+static inline void append_to(std::string& s) {}
+template <typename... Args>
+void append_to(std::string& s, const boost::string_view& v, const Args&... args)
+{
+ s.append(v.begin(), v.end());
+ append_to(s, args...);
+}
+
+// helpers for string_join_reserve()
+static inline void join_next(std::string& s, const boost::string_view& d) {}
+template <typename... Args>
+void join_next(std::string& s, const boost::string_view& d,
+ const boost::string_view& v, const Args&... args)
+{
+ s.append(d.begin(), d.end());
+ s.append(v.begin(), v.end());
+ join_next(s, d, args...);
+}
+
+static inline void join(std::string& s, const boost::string_view& d) {}
+template <typename... Args>
+void join(std::string& s, const boost::string_view& d,
+ const boost::string_view& v, const Args&... args)
+{
+ s.append(v.begin(), v.end());
+ join_next(s, d, args...);
+}
+
+} // namespace detail
+
+/// return the length of a c string, string literal, or string type
+template <typename T>
+constexpr size_t string_size(const T& s)
+{
+ return detail::string_traits<T>::size(s);
+}
+
+/// concatenates the given string arguments, returning as a std::string that
+/// gets preallocated with reserve()
+template <typename... Args>
+std::string string_cat_reserve(const Args&... args)
+{
+ size_t total_size = detail::sum(string_size(args)...);
+ std::string result;
+ result.reserve(total_size);
+ detail::append_to(result, args...);
+ return result;
+}
+
+/// joins the given string arguments with a delimiter, returning as a
+/// std::string that gets preallocated with reserve()
+template <typename... Args>
+std::string string_join_reserve(const boost::string_view& delim,
+ const Args&... args)
+{
+ size_t delim_size = delim.size() * std::max<ssize_t>(0, sizeof...(args) - 1);
+ size_t total_size = detail::sum(string_size(args)...) + delim_size;
+ std::string result;
+ result.reserve(total_size);
+ detail::join(result, delim, args...);
+ return result;
+}
+template <typename... Args>
+std::string string_join_reserve(char delim, const Args&... args)
+{
+ return string_join_reserve(boost::string_view{&delim, 1}, args...);
+}
+
+
+/// use case-insensitive comparison in match_wildcards()
+static constexpr uint32_t MATCH_CASE_INSENSITIVE = 0x01;
+
+/// attempt to match the given input string with the pattern, which may contain
+/// the wildcard characters * and ?
+extern bool match_wildcards(boost::string_view pattern,
+ boost::string_view input,
+ uint32_t flags = 0);
+
+#endif
diff --git a/src/rgw/rgw_sts.cc b/src/rgw/rgw_sts.cc
new file mode 100644
index 00000000..0cef12ac
--- /dev/null
+++ b/src/rgw/rgw_sts.cc
@@ -0,0 +1,427 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <errno.h>
+#include <ctime>
+#include <regex>
+#include <boost/format.hpp>
+#include <boost/algorithm/string/replace.hpp>
+
+#include "common/errno.h"
+#include "common/Formatter.h"
+#include "common/ceph_json.h"
+#include "common/ceph_time.h"
+#include "rgw_rados.h"
+#include "auth/Crypto.h"
+#include "include/ceph_fs.h"
+#include "common/iso_8601.h"
+
+#include "include/types.h"
+#include "rgw_string.h"
+
+#include "rgw_b64.h"
+#include "rgw_common.h"
+#include "rgw_tools.h"
+#include "rgw_role.h"
+#include "rgw_user.h"
+#include "rgw_iam_policy.h"
+#include "rgw_sts.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+namespace STS {
+
+void Credentials::dump(Formatter *f) const
+{
+ encode_json("AccessKeyId", accessKeyId , f);
+ encode_json("Expiration", expiration , f);
+ encode_json("SecretAccessKey", secretAccessKey , f);
+ encode_json("SessionToken", sessionToken , f);
+}
+
+int Credentials::generateCredentials(CephContext* cct,
+ const uint64_t& duration,
+ const boost::optional<string>& policy,
+ const boost::optional<string>& roleId,
+ boost::optional<rgw_user> user,
+ rgw::auth::Identity* identity)
+{
+ uuid_d accessKey, secretKey;
+ char accessKeyId_str[MAX_ACCESS_KEY_LEN], secretAccessKey_str[MAX_SECRET_KEY_LEN];
+
+ //AccessKeyId
+ gen_rand_alphanumeric_plain(cct, accessKeyId_str, sizeof(accessKeyId_str));
+ accessKeyId = accessKeyId_str;
+
+ //SecretAccessKey
+ gen_rand_alphanumeric_upper(cct, secretAccessKey_str, sizeof(secretAccessKey_str));
+ secretAccessKey = secretAccessKey_str;
+
+ //Expiration
+ real_clock::time_point t = real_clock::now();
+ real_clock::time_point exp = t + std::chrono::seconds(duration);
+ expiration = ceph::to_iso_8601(exp);
+
+ //Session Token - Encrypt using AES
+ auto* cryptohandler = cct->get_crypto_handler(CEPH_CRYPTO_AES);
+ if (! cryptohandler) {
+ return -EINVAL;
+ }
+ string secret_s = cct->_conf->rgw_sts_key;
+ buffer::ptr secret(secret_s.c_str(), secret_s.length());
+ int ret = 0;
+ if (ret = cryptohandler->validate_secret(secret); ret < 0) {
+ ldout(cct, 0) << "ERROR: Invalid secret key" << dendl;
+ return ret;
+ }
+ string error;
+ auto* keyhandler = cryptohandler->get_key_handler(secret, error);
+ if (! keyhandler) {
+ return -EINVAL;
+ }
+ error.clear();
+ //Storing policy and roleId as part of token, so that they can be extracted
+ // from the token itself for policy evaluation.
+ SessionToken token;
+ //authentication info
+ token.access_key_id = accessKeyId;
+ token.secret_access_key = secretAccessKey;
+ token.expiration = expiration;
+
+ //Authorization info
+ if (policy)
+ token.policy = *policy;
+ else
+ token.policy = {};
+
+ if (roleId)
+ token.roleId = *roleId;
+ else
+ token.roleId = {};
+
+ if (user)
+ token.user = *user;
+ else {
+ rgw_user u({}, {});
+ token.user = u;
+ }
+
+ if (identity) {
+ token.acct_name = identity->get_acct_name();
+ token.perm_mask = identity->get_perm_mask();
+ token.is_admin = identity->is_admin_of(token.user);
+ token.acct_type = identity->get_identity_type();
+ } else {
+ token.acct_name = {};
+ token.perm_mask = 0;
+ token.is_admin = 0;
+ token.acct_type = TYPE_ROLE;
+ }
+
+ buffer::list input, enc_output;
+ encode(token, input);
+
+ if (ret = keyhandler->encrypt(input, enc_output, &error); ret < 0) {
+ return ret;
+ }
+
+ bufferlist encoded_op;
+ enc_output.encode_base64(encoded_op);
+ encoded_op.append('\0');
+ sessionToken = encoded_op.c_str();
+
+ return ret;
+}
+
+void AssumedRoleUser::dump(Formatter *f) const
+{
+ encode_json("Arn", arn , f);
+ encode_json("AssumeRoleId", assumeRoleId , f);
+}
+
+int AssumedRoleUser::generateAssumedRoleUser(CephContext* cct,
+ RGWRados *store,
+ const string& roleId,
+ const rgw::ARN& roleArn,
+ const string& roleSessionName)
+{
+ string resource = std::move(roleArn.resource);
+ boost::replace_first(resource, "role", "assumed-role");
+ resource.append("/");
+ resource.append(roleSessionName);
+
+ rgw::ARN assumed_role_arn(rgw::Partition::aws,
+ rgw::Service::sts,
+ "", roleArn.account, resource);
+ arn = assumed_role_arn.to_string();
+
+ //Assumeroleid = roleid:rolesessionname
+ assumeRoleId = roleId + ":" + roleSessionName;
+
+ return 0;
+}
+
+AssumeRoleRequestBase::AssumeRoleRequestBase( const string& duration,
+ const string& iamPolicy,
+ const string& roleArn,
+ const string& roleSessionName)
+ : iamPolicy(iamPolicy), roleArn(roleArn), roleSessionName(roleSessionName)
+{
+ if (duration.empty()) {
+ this->duration = DEFAULT_DURATION_IN_SECS;
+ } else {
+ this->duration = strict_strtoll(duration.c_str(), 10, &this->err_msg);
+ }
+}
+
+int AssumeRoleRequestBase::validate_input() const
+{
+ if (!err_msg.empty()) {
+ return -EINVAL;
+ }
+
+ if (duration < MIN_DURATION_IN_SECS ||
+ duration > MAX_DURATION_IN_SECS) {
+ return -EINVAL;
+ }
+
+ if (! iamPolicy.empty() &&
+ (iamPolicy.size() < MIN_POLICY_SIZE || iamPolicy.size() > MAX_POLICY_SIZE)) {
+ return -ERR_PACKED_POLICY_TOO_LARGE;
+ }
+
+ if (! roleArn.empty() &&
+ (roleArn.size() < MIN_ROLE_ARN_SIZE || roleArn.size() > MAX_ROLE_ARN_SIZE)) {
+ return -EINVAL;
+ }
+
+ if (! roleSessionName.empty()) {
+ if (roleSessionName.size() < MIN_ROLE_SESSION_SIZE || roleSessionName.size() > MAX_ROLE_SESSION_SIZE) {
+ return -EINVAL;
+ }
+
+ std::regex regex_roleSession("[A-Za-z0-9_=,.@-]+");
+ if (! std::regex_match(roleSessionName, regex_roleSession)) {
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+int AssumeRoleWithWebIdentityRequest::validate_input() const
+{
+ if (! providerId.empty()) {
+ if (providerId.length() < MIN_PROVIDER_ID_LEN ||
+ providerId.length() > MAX_PROVIDER_ID_LEN) {
+ return -EINVAL;
+ }
+ }
+ return AssumeRoleRequestBase::validate_input();
+}
+
+int AssumeRoleRequest::validate_input() const
+{
+ if (! externalId.empty()) {
+ if (externalId.length() < MIN_EXTERNAL_ID_LEN ||
+ externalId.length() > MAX_EXTERNAL_ID_LEN) {
+ return -EINVAL;
+ }
+
+ std::regex regex_externalId("[A-Za-z0-9_=,.@:/-]+");
+ if (! std::regex_match(externalId, regex_externalId)) {
+ return -EINVAL;
+ }
+ }
+ if (! serialNumber.empty()){
+ if (serialNumber.size() < MIN_SERIAL_NUMBER_SIZE || serialNumber.size() > MAX_SERIAL_NUMBER_SIZE) {
+ return -EINVAL;
+ }
+
+ std::regex regex_serialNumber("[A-Za-z0-9_=/:,.@-]+");
+ if (! std::regex_match(serialNumber, regex_serialNumber)) {
+ return -EINVAL;
+ }
+ }
+ if (! tokenCode.empty() && tokenCode.size() == TOKEN_CODE_SIZE) {
+ return -EINVAL;
+ }
+
+ return AssumeRoleRequestBase::validate_input();
+}
+
+std::tuple<int, RGWRole> STSService::getRoleInfo(const string& arn)
+{
+ if (auto r_arn = rgw::ARN::parse(arn); r_arn) {
+ auto pos = r_arn->resource.find_last_of('/');
+ string roleName = r_arn->resource.substr(pos + 1);
+ RGWRole role(cct, store, roleName, r_arn->account);
+ if (int ret = role.get(); ret < 0) {
+ if (ret == -ENOENT) {
+ ret = -ERR_NO_ROLE_FOUND;
+ }
+ return make_tuple(ret, this->role);
+ } else {
+ this->role = std::move(role);
+ return make_tuple(0, this->role);
+ }
+ } else {
+ return make_tuple(-EINVAL, this->role);
+ }
+}
+
+int STSService::storeARN(string& arn)
+{
+ int ret = 0;
+ RGWUserInfo info;
+ if (ret = rgw_get_user_info_by_uid(store, user_id, info); ret < 0) {
+ return -ERR_NO_SUCH_ENTITY;
+ }
+
+ info.assumed_role_arn = arn;
+
+ RGWObjVersionTracker objv_tracker;
+ if (ret = rgw_store_user_info(store, info, &info, &objv_tracker, real_time(),
+ false); ret < 0) {
+ return -ERR_INTERNAL_ERROR;
+ }
+ return ret;
+}
+
+AssumeRoleWithWebIdentityResponse STSService::assumeRoleWithWebIdentity(AssumeRoleWithWebIdentityRequest& req)
+{
+ AssumeRoleWithWebIdentityResponse response;
+ response.assumeRoleResp.packedPolicySize = 0;
+
+ if (req.getProviderId().empty()) {
+ response.providerId = req.getIss();
+ }
+ response.aud = req.getAud();
+ response.sub = req.getSub();
+
+ //Get the role info which is being assumed
+ boost::optional<rgw::ARN> r_arn = rgw::ARN::parse(req.getRoleARN());
+ if (r_arn == boost::none) {
+ response.assumeRoleResp.retCode = -EINVAL;
+ return response;
+ }
+
+ string roleId = role.get_id();
+ uint64_t roleMaxSessionDuration = role.get_max_session_duration();
+ req.setMaxDuration(roleMaxSessionDuration);
+
+ //Validate input
+ response.assumeRoleResp.retCode = req.validate_input();
+ if (response.assumeRoleResp.retCode < 0) {
+ return response;
+ }
+
+ //Calculate PackedPolicySize
+ string policy = req.getPolicy();
+ response.assumeRoleResp.packedPolicySize = (policy.size() / req.getMaxPolicySize()) * 100;
+
+ //Generate Assumed Role User
+ response.assumeRoleResp.retCode = response.assumeRoleResp.user.generateAssumedRoleUser(cct,
+ store,
+ roleId,
+ r_arn.get(),
+ req.getRoleSessionName());
+ if (response.assumeRoleResp.retCode < 0) {
+ return response;
+ }
+
+ //Generate Credentials
+ //Role and Policy provide the authorization info, user id and applier info are not needed
+ response.assumeRoleResp.retCode = response.assumeRoleResp.creds.generateCredentials(cct, req.getDuration(),
+ req.getPolicy(), roleId,
+ user_id, nullptr);
+ if (response.assumeRoleResp.retCode < 0) {
+ return response;
+ }
+
+ response.assumeRoleResp.retCode = 0;
+ return response;
+}
+
+AssumeRoleResponse STSService::assumeRole(AssumeRoleRequest& req)
+{
+ AssumeRoleResponse response;
+ response.packedPolicySize = 0;
+
+ //Get the role info which is being assumed
+ boost::optional<rgw::ARN> r_arn = rgw::ARN::parse(req.getRoleARN());
+ if (r_arn == boost::none) {
+ response.retCode = -EINVAL;
+ return response;
+ }
+
+ string roleId = role.get_id();
+ uint64_t roleMaxSessionDuration = role.get_max_session_duration();
+ req.setMaxDuration(roleMaxSessionDuration);
+
+ //Validate input
+ response.retCode = req.validate_input();
+ if (response.retCode < 0) {
+ return response;
+ }
+
+ //Calculate PackedPolicySize
+ string policy = req.getPolicy();
+ response.packedPolicySize = (policy.size() / req.getMaxPolicySize()) * 100;
+
+ //Generate Assumed Role User
+ response.retCode = response.user.generateAssumedRoleUser(cct, store, roleId, r_arn.get(), req.getRoleSessionName());
+ if (response.retCode < 0) {
+ return response;
+ }
+
+ //Generate Credentials
+ //Role and Policy provide the authorization info, user id and applier info are not needed
+ response.retCode = response.creds.generateCredentials(cct, req.getDuration(),
+ req.getPolicy(), roleId,
+ user_id, nullptr);
+ if (response.retCode < 0) {
+ return response;
+ }
+
+ //Save ARN with the user
+ string arn = response.user.getARN();
+ response.retCode = storeARN(arn);
+ if (response.retCode < 0) {
+ return response;
+ }
+
+ response.retCode = 0;
+ return response;
+}
+
+GetSessionTokenRequest::GetSessionTokenRequest(const string& duration, const string& serialNumber, const string& tokenCode)
+{
+ if (duration.empty()) {
+ this->duration = DEFAULT_DURATION_IN_SECS;
+ } else {
+ this->duration = stoull(duration);
+ }
+ this->serialNumber = serialNumber;
+ this->tokenCode = tokenCode;
+}
+
+GetSessionTokenResponse STSService::getSessionToken(GetSessionTokenRequest& req)
+{
+ int ret;
+ Credentials cred;
+
+ //Generate Credentials
+ if (ret = cred.generateCredentials(cct,
+ req.getDuration(),
+ boost::none,
+ boost::none,
+ user_id,
+ identity); ret < 0) {
+ return make_tuple(ret, cred);
+ }
+
+ return make_tuple(0, cred);
+}
+
+}
diff --git a/src/rgw/rgw_sts.h b/src/rgw/rgw_sts.h
new file mode 100644
index 00000000..1ad48504
--- /dev/null
+++ b/src/rgw/rgw_sts.h
@@ -0,0 +1,222 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_STS_H
+#define CEPH_RGW_STS_H
+
+#include "rgw_role.h"
+#include "rgw_auth.h"
+#include "rgw_web_idp.h"
+
+namespace STS {
+
+class AssumeRoleRequestBase {
+protected:
+ static constexpr uint64_t MIN_POLICY_SIZE = 1;
+ static constexpr uint64_t MAX_POLICY_SIZE = 2048;
+ static constexpr uint64_t DEFAULT_DURATION_IN_SECS = 3600;
+ static constexpr uint64_t MIN_DURATION_IN_SECS = 900;
+ static constexpr uint64_t MIN_ROLE_ARN_SIZE = 2;
+ static constexpr uint64_t MAX_ROLE_ARN_SIZE = 2048;
+ static constexpr uint64_t MIN_ROLE_SESSION_SIZE = 2;
+ static constexpr uint64_t MAX_ROLE_SESSION_SIZE = 64;
+ uint64_t MAX_DURATION_IN_SECS;
+ uint64_t duration;
+ string err_msg;
+ string iamPolicy;
+ string roleArn;
+ string roleSessionName;
+public:
+ AssumeRoleRequestBase(const string& duration,
+ const string& iamPolicy,
+ const string& roleArn,
+ const string& roleSessionName);
+ const string& getRoleARN() const { return roleArn; }
+ const string& getRoleSessionName() const { return roleSessionName; }
+ const string& getPolicy() const {return iamPolicy; }
+ static const uint64_t& getMaxPolicySize() { return MAX_POLICY_SIZE; }
+ void setMaxDuration(const uint64_t& maxDuration) { MAX_DURATION_IN_SECS = maxDuration; }
+ const uint64_t& getDuration() const { return duration; }
+ int validate_input() const;
+};
+
+class AssumeRoleWithWebIdentityRequest : public AssumeRoleRequestBase {
+ static constexpr uint64_t MIN_PROVIDER_ID_LEN = 4;
+ static constexpr uint64_t MAX_PROVIDER_ID_LEN = 2048;
+ string providerId;
+ string iamPolicy;
+ string iss;
+ string sub;
+ string aud;
+public:
+ AssumeRoleWithWebIdentityRequest( const string& duration,
+ const string& providerId,
+ const string& iamPolicy,
+ const string& roleArn,
+ const string& roleSessionName,
+ const string& iss,
+ const string& sub,
+ const string& aud)
+ : AssumeRoleRequestBase(duration, iamPolicy, roleArn, roleSessionName),
+ providerId(providerId), iss(iss), sub(sub), aud(aud) {}
+ const string& getProviderId() const { return providerId; }
+ const string& getIss() const { return iss; }
+ const string& getAud() const { return aud; }
+ const string& getSub() const { return sub; }
+ int validate_input() const;
+};
+
+class AssumeRoleRequest : public AssumeRoleRequestBase {
+ static constexpr uint64_t MIN_EXTERNAL_ID_LEN = 2;
+ static constexpr uint64_t MAX_EXTERNAL_ID_LEN = 1224;
+ static constexpr uint64_t MIN_SERIAL_NUMBER_SIZE = 9;
+ static constexpr uint64_t MAX_SERIAL_NUMBER_SIZE = 256;
+ static constexpr uint64_t TOKEN_CODE_SIZE = 6;
+ string externalId;
+ string serialNumber;
+ string tokenCode;
+public:
+ AssumeRoleRequest(const string& duration,
+ const string& externalId,
+ const string& iamPolicy,
+ const string& roleArn,
+ const string& roleSessionName,
+ const string& serialNumber,
+ const string& tokenCode)
+ : AssumeRoleRequestBase(duration, iamPolicy, roleArn, roleSessionName),
+ externalId(externalId), serialNumber(serialNumber), tokenCode(tokenCode){}
+ int validate_input() const;
+};
+
+class GetSessionTokenRequest {
+protected:
+ static constexpr uint64_t MIN_DURATION_IN_SECS = 900;
+ static constexpr uint64_t DEFAULT_DURATION_IN_SECS = 3600;
+ uint64_t duration;
+ string serialNumber;
+ string tokenCode;
+
+public:
+ GetSessionTokenRequest(const string& duration, const string& serialNumber, const string& tokenCode);
+
+ const uint64_t& getDuration() const { return duration; }
+ static const uint64_t& getMinDuration() { return MIN_DURATION_IN_SECS; }
+};
+
+class AssumedRoleUser {
+ string arn;
+ string assumeRoleId;
+public:
+ int generateAssumedRoleUser( CephContext* cct,
+ RGWRados *store,
+ const string& roleId,
+ const rgw::ARN& roleArn,
+ const string& roleSessionName);
+ const string& getARN() const { return arn; }
+ const string& getAssumeRoleId() const { return assumeRoleId; }
+ void dump(Formatter *f) const;
+};
+
+struct SessionToken {
+ string access_key_id;
+ string secret_access_key;
+ string expiration;
+ string policy;
+ string roleId;
+ rgw_user user;
+ string acct_name;
+ uint32_t perm_mask;
+ bool is_admin;
+ uint32_t acct_type;
+
+ SessionToken() {}
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(access_key_id, bl);
+ encode(secret_access_key, bl);
+ encode(expiration, bl);
+ encode(policy, bl);
+ encode(roleId, bl);
+ encode(user, bl);
+ encode(acct_name, bl);
+ encode(perm_mask, bl);
+ encode(is_admin, bl);
+ encode(acct_type, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(access_key_id, bl);
+ decode(secret_access_key, bl);
+ decode(expiration, bl);
+ decode(policy, bl);
+ decode(roleId, bl);
+ decode(user, bl);
+ decode(acct_name, bl);
+ decode(perm_mask, bl);
+ decode(is_admin, bl);
+ decode(acct_type, bl);
+ DECODE_FINISH(bl);
+ }
+};
+WRITE_CLASS_ENCODER(SessionToken)
+
+class Credentials {
+ static constexpr int MAX_ACCESS_KEY_LEN = 20;
+ static constexpr int MAX_SECRET_KEY_LEN = 40;
+ string accessKeyId;
+ string expiration;
+ string secretAccessKey;
+ string sessionToken;
+public:
+ int generateCredentials(CephContext* cct,
+ const uint64_t& duration,
+ const boost::optional<string>& policy,
+ const boost::optional<string>& roleId,
+ boost::optional<rgw_user> user,
+ rgw::auth::Identity* identity);
+ const string& getAccessKeyId() const { return accessKeyId; }
+ const string& getExpiration() const { return expiration; }
+ const string& getSecretAccessKey() const { return secretAccessKey; }
+ const string& getSessionToken() const { return sessionToken; }
+ void dump(Formatter *f) const;
+};
+
+struct AssumeRoleResponse {
+ int retCode;
+ AssumedRoleUser user;
+ Credentials creds;
+ uint64_t packedPolicySize;
+};
+
+struct AssumeRoleWithWebIdentityResponse {
+ AssumeRoleResponse assumeRoleResp;
+ string aud;
+ string providerId;
+ string sub;
+};
+
+using AssumeRoleResponse = struct AssumeRoleResponse ;
+using GetSessionTokenResponse = std::tuple<int, Credentials>;
+using AssumeRoleWithWebIdentityResponse = struct AssumeRoleWithWebIdentityResponse;
+
+class STSService {
+ CephContext* cct;
+ RGWRados *store;
+ rgw_user user_id;
+ RGWRole role;
+ rgw::auth::Identity* identity;
+ int storeARN(string& arn);
+public:
+ STSService() = default;
+ STSService(CephContext* cct, RGWRados *store, rgw_user user_id, rgw::auth::Identity* identity) : cct(cct), store(store), user_id(user_id), identity(identity) {}
+ std::tuple<int, RGWRole> getRoleInfo(const string& arn);
+ AssumeRoleResponse assumeRole(AssumeRoleRequest& req);
+ GetSessionTokenResponse getSessionToken(GetSessionTokenRequest& req);
+ AssumeRoleWithWebIdentityResponse assumeRoleWithWebIdentity(AssumeRoleWithWebIdentityRequest& req);
+};
+}
+#endif /* CEPH_RGW_STS_H */
+
diff --git a/src/rgw/rgw_swift_auth.cc b/src/rgw/rgw_swift_auth.cc
new file mode 100644
index 00000000..eb0264a3
--- /dev/null
+++ b/src/rgw/rgw_swift_auth.cc
@@ -0,0 +1,759 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <array>
+#include <algorithm>
+
+#include <boost/utility/string_view.hpp>
+#include <boost/container/static_vector.hpp>
+#include <boost/algorithm/string/predicate.hpp>
+#include <boost/algorithm/string.hpp>
+
+#include "rgw_swift_auth.h"
+#include "rgw_rest.h"
+
+#include "common/ceph_crypto.h"
+#include "common/Clock.h"
+
+#include "include/random.h"
+
+#include "rgw_client_io.h"
+#include "rgw_http_client.h"
+#include "include/str_list.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+#define DEFAULT_SWIFT_PREFIX "/swift"
+
+using namespace ceph::crypto;
+
+
+namespace rgw {
+namespace auth {
+namespace swift {
+
+/* TempURL: applier */
+void TempURLApplier::modify_request_state(const DoutPrefixProvider* dpp, req_state* s) const /* in/out */
+{
+ bool inline_exists = false;
+ const std::string& filename = s->info.args.get("filename");
+
+ s->info.args.get("inline", &inline_exists);
+ if (inline_exists) {
+ s->content_disp.override = "inline";
+ } else if (!filename.empty()) {
+ std::string fenc;
+ url_encode(filename, fenc);
+ s->content_disp.override = "attachment; filename=\"" + fenc + "\"";
+ } else {
+ std::string fenc;
+ url_encode(s->object.name, fenc);
+ s->content_disp.fallback = "attachment; filename=\"" + fenc + "\"";
+ }
+
+ ldpp_dout(dpp, 20) << "finished applying changes to req_state for TempURL: "
+ << " content_disp override " << s->content_disp.override
+ << " content_disp fallback " << s->content_disp.fallback
+ << dendl;
+
+}
+
+/* TempURL: engine */
+bool TempURLEngine::is_applicable(const req_state* const s) const noexcept
+{
+ return s->info.args.exists("temp_url_sig") ||
+ s->info.args.exists("temp_url_expires");
+}
+
+void TempURLEngine::get_owner_info(const DoutPrefixProvider* dpp, const req_state* const s,
+ RGWUserInfo& owner_info) const
+{
+ /* We cannot use req_state::bucket_name because it isn't available
+ * now. It will be initialized in RGWHandler_REST_SWIFT::postauth_init(). */
+ const string& bucket_name = s->init_state.url_bucket;
+
+ /* TempURL requires that bucket and object names are specified. */
+ if (bucket_name.empty() || s->object.empty()) {
+ throw -EPERM;
+ }
+
+ /* TempURL case is completely different than the Keystone auth - you may
+ * get account name only through extraction from URL. In turn, knowledge
+ * about account is neccessary to obtain its bucket tenant. Without that,
+ * the access would be limited to accounts with empty tenant. */
+ string bucket_tenant;
+ if (!s->account_name.empty()) {
+ RGWUserInfo uinfo;
+ bool found = false;
+
+ const rgw_user uid(s->account_name);
+ if (uid.tenant.empty()) {
+ const rgw_user tenanted_uid(uid.id, uid.id);
+
+ if (rgw_get_user_info_by_uid(store, tenanted_uid, uinfo) >= 0) {
+ /* Succeeded. */
+ bucket_tenant = uinfo.user_id.tenant;
+ found = true;
+ }
+ }
+
+ if (!found && rgw_get_user_info_by_uid(store, uid, uinfo) < 0) {
+ throw -EPERM;
+ } else {
+ bucket_tenant = uinfo.user_id.tenant;
+ }
+ }
+
+ /* Need to get user info of bucket owner. */
+ RGWBucketInfo bucket_info;
+ int ret = store->get_bucket_info(*s->sysobj_ctx,
+ bucket_tenant, bucket_name,
+ bucket_info, nullptr);
+ if (ret < 0) {
+ throw ret;
+ }
+
+ ldpp_dout(dpp, 20) << "temp url user (bucket owner): " << bucket_info.owner
+ << dendl;
+
+ if (rgw_get_user_info_by_uid(store, bucket_info.owner, owner_info) < 0) {
+ throw -EPERM;
+ }
+}
+
+std::string TempURLEngine::convert_from_iso8601(std::string expires) const
+{
+ /* Swift's TempURL allows clients to send the expiration as ISO8601-
+ * compatible strings. Though, only plain UNIX timestamp are taken
+ * for the HMAC calculations. We need to make the conversion. */
+ struct tm date_t;
+ if (!parse_iso8601(expires.c_str(), &date_t, nullptr, true)) {
+ return std::move(expires);
+ } else {
+ return std::to_string(internal_timegm(&date_t));
+ }
+}
+
+bool TempURLEngine::is_expired(const std::string& expires) const
+{
+ string err;
+ const utime_t now = ceph_clock_now();
+ const uint64_t expiration = (uint64_t)strict_strtoll(expires.c_str(),
+ 10, &err);
+ if (!err.empty()) {
+ dout(5) << "failed to parse temp_url_expires: " << err << dendl;
+ return true;
+ }
+
+ if (expiration <= (uint64_t)now.sec()) {
+ dout(5) << "temp url expired: " << expiration << " <= " << now.sec() << dendl;
+ return true;
+ }
+
+ return false;
+}
+
+bool TempURLEngine::is_disallowed_header_present(const req_info& info) const
+{
+ static const auto headers = {
+ "HTTP_X_OBJECT_MANIFEST",
+ };
+
+ return std::any_of(std::begin(headers), std::end(headers),
+ [&info](const char* header) {
+ return info.env->exists(header);
+ });
+}
+
+std::string extract_swift_subuser(const std::string& swift_user_name)
+{
+ size_t pos = swift_user_name.find(':');
+ if (std::string::npos == pos) {
+ return swift_user_name;
+ } else {
+ return swift_user_name.substr(pos + 1);
+ }
+}
+
+class TempURLEngine::SignatureHelper
+{
+private:
+ static constexpr uint32_t output_size =
+ CEPH_CRYPTO_HMACSHA1_DIGESTSIZE * 2 + 1;
+
+ unsigned char dest[CEPH_CRYPTO_HMACSHA1_DIGESTSIZE]; // 20
+ char dest_str[output_size];
+
+public:
+ SignatureHelper() = default;
+
+ const char* calc(const std::string& key,
+ const boost::string_view& method,
+ const boost::string_view& path,
+ const std::string& expires) {
+
+ using ceph::crypto::HMACSHA1;
+ using UCHARPTR = const unsigned char*;
+
+ HMACSHA1 hmac((UCHARPTR) key.c_str(), key.size());
+ hmac.Update((UCHARPTR) method.data(), method.size());
+ hmac.Update((UCHARPTR) "\n", 1);
+ hmac.Update((UCHARPTR) expires.c_str(), expires.size());
+ hmac.Update((UCHARPTR) "\n", 1);
+ hmac.Update((UCHARPTR) path.data(), path.size());
+ hmac.Final(dest);
+
+ buf_to_hex((UCHARPTR) dest, sizeof(dest), dest_str);
+
+ return dest_str;
+ }
+
+ bool is_equal_to(const std::string& rhs) const {
+ /* never allow out-of-range exception */
+ if (rhs.size() < (output_size - 1)) {
+ return false;
+ }
+ return rhs.compare(0 /* pos */, output_size, dest_str) == 0;
+ }
+
+}; /* TempURLEngine::SignatureHelper */
+
+class TempURLEngine::PrefixableSignatureHelper
+ : private TempURLEngine::SignatureHelper {
+ using base_t = SignatureHelper;
+
+ const boost::string_view decoded_uri;
+ const boost::string_view object_name;
+ boost::string_view no_obj_uri;
+
+ const boost::optional<const std::string&> prefix;
+
+public:
+ PrefixableSignatureHelper(const std::string& _decoded_uri,
+ const std::string& object_name,
+ const boost::optional<const std::string&> prefix)
+ : decoded_uri(_decoded_uri),
+ object_name(object_name),
+ prefix(prefix) {
+ /* Transform: v1/acct/cont/obj - > v1/acct/cont/
+ *
+ * NOTE(rzarzynski): we really want to substr() on boost::string_view,
+ * not std::string. Otherwise we would end with no_obj_uri referencing
+ * a temporary. */
+ no_obj_uri = \
+ decoded_uri.substr(0, decoded_uri.length() - object_name.length());
+ }
+
+ const char* calc(const std::string& key,
+ const boost::string_view& method,
+ const boost::string_view& path,
+ const std::string& expires) {
+ if (!prefix) {
+ return base_t::calc(key, method, path, expires);
+ } else {
+ const auto prefixed_path = \
+ string_cat_reserve("prefix:", no_obj_uri, *prefix);
+ return base_t::calc(key, method, prefixed_path, expires);
+ }
+ }
+
+ bool is_equal_to(const std::string& rhs) const {
+ bool is_auth_ok = base_t::is_equal_to(rhs);
+
+ if (prefix && is_auth_ok) {
+ const auto prefix_uri = string_cat_reserve(no_obj_uri, *prefix);
+ is_auth_ok = boost::algorithm::starts_with(decoded_uri, prefix_uri);
+ }
+
+ return is_auth_ok;
+ }
+}; /* TempURLEngine::PrefixableSignatureHelper */
+
+TempURLEngine::result_t
+TempURLEngine::authenticate(const DoutPrefixProvider* dpp, const req_state* const s) const
+{
+ if (! is_applicable(s)) {
+ return result_t::deny();
+ }
+
+ /* NOTE(rzarzynski): RGWHTTPArgs::get(), in contrast to RGWEnv::get(),
+ * never returns nullptr. If the requested parameter is absent, we will
+ * get the empty string. */
+ const std::string& temp_url_sig = s->info.args.get("temp_url_sig");
+ const std::string& temp_url_expires = \
+ convert_from_iso8601(s->info.args.get("temp_url_expires"));
+
+ if (temp_url_sig.empty() || temp_url_expires.empty()) {
+ return result_t::deny();
+ }
+
+ /* Though, for prefixed tempurls we need to differentiate between empty
+ * prefix and lack of prefix. Empty prefix means allowance for whole
+ * container. */
+ const boost::optional<const std::string&> temp_url_prefix = \
+ s->info.args.get_optional("temp_url_prefix");
+
+ RGWUserInfo owner_info;
+ try {
+ get_owner_info(dpp, s, owner_info);
+ } catch (...) {
+ ldpp_dout(dpp, 5) << "cannot get user_info of account's owner" << dendl;
+ return result_t::reject();
+ }
+
+ if (owner_info.temp_url_keys.empty()) {
+ ldpp_dout(dpp, 5) << "user does not have temp url key set, aborting" << dendl;
+ return result_t::reject();
+ }
+
+ if (is_expired(temp_url_expires)) {
+ ldpp_dout(dpp, 5) << "temp url link expired" << dendl;
+ return result_t::reject(-EPERM);
+ }
+
+ if (is_disallowed_header_present(s->info)) {
+ ldout(cct, 5) << "temp url rejected due to disallowed header" << dendl;
+ return result_t::reject(-EINVAL);
+ }
+
+ /* We need to verify two paths because of compliance with Swift, Tempest
+ * and old versions of RadosGW. The second item will have the prefix
+ * of Swift API entry point removed. */
+
+ /* XXX can we search this ONCE? */
+ const size_t pos = g_conf()->rgw_swift_url_prefix.find_last_not_of('/') + 1;
+ const boost::string_view ref_uri = s->decoded_uri;
+ const std::array<boost::string_view, 2> allowed_paths = {
+ ref_uri,
+ ref_uri.substr(pos + 1)
+ };
+
+ /* Account owner calculates the signature also against a HTTP method. */
+ boost::container::static_vector<boost::string_view, 3> allowed_methods;
+ if (strcmp("HEAD", s->info.method) == 0) {
+ /* HEAD requests are specially handled. */
+ /* TODO: after getting a newer boost (with static_vector supporting
+ * initializers lists), get back to the good notation:
+ * allowed_methods = {"HEAD", "GET", "PUT" };
+ * Just for now let's use emplace_back to construct the vector. */
+ allowed_methods.emplace_back("HEAD");
+ allowed_methods.emplace_back("GET");
+ allowed_methods.emplace_back("PUT");
+ } else if (strlen(s->info.method) > 0) {
+ allowed_methods.emplace_back(s->info.method);
+ }
+
+ /* Need to try each combination of keys, allowed path and methods. */
+ PrefixableSignatureHelper sig_helper {
+ s->decoded_uri,
+ s->object.name,
+ temp_url_prefix
+ };
+
+ for (const auto& kv : owner_info.temp_url_keys) {
+ const int temp_url_key_num = kv.first;
+ const string& temp_url_key = kv.second;
+
+ if (temp_url_key.empty()) {
+ continue;
+ }
+
+ for (const auto& path : allowed_paths) {
+ for (const auto& method : allowed_methods) {
+ const char* const local_sig = sig_helper.calc(temp_url_key, method,
+ path, temp_url_expires);
+
+ ldpp_dout(dpp, 20) << "temp url signature [" << temp_url_key_num
+ << "] (calculated): " << local_sig
+ << dendl;
+
+ if (sig_helper.is_equal_to(temp_url_sig)) {
+ auto apl = apl_factory->create_apl_turl(cct, s, owner_info);
+ return result_t::grant(std::move(apl));
+ } else {
+ ldpp_dout(dpp, 5) << "temp url signature mismatch: " << local_sig
+ << " != " << temp_url_sig << dendl;
+ }
+ }
+ }
+ }
+
+ return result_t::reject();
+}
+
+
+/* External token */
+bool ExternalTokenEngine::is_applicable(const std::string& token) const noexcept
+{
+ if (token.empty()) {
+ return false;
+ } else if (g_conf()->rgw_swift_auth_url.empty()) {
+ return false;
+ } else {
+ return true;
+ }
+}
+
+ExternalTokenEngine::result_t
+ExternalTokenEngine::authenticate(const DoutPrefixProvider* dpp,
+ const std::string& token,
+ const req_state* const s) const
+{
+ if (! is_applicable(token)) {
+ return result_t::deny();
+ }
+
+ std::string auth_url = g_conf()->rgw_swift_auth_url;
+ if (auth_url.back() != '/') {
+ auth_url.append("/");
+ }
+
+ auth_url.append("token");
+ char url_buf[auth_url.size() + 1 + token.length() + 1];
+ sprintf(url_buf, "%s/%s", auth_url.c_str(), token.c_str());
+
+ RGWHTTPHeadersCollector validator(cct, "GET", url_buf, { "X-Auth-Groups", "X-Auth-Ttl" });
+
+ ldpp_dout(dpp, 10) << "rgw_swift_validate_token url=" << url_buf << dendl;
+
+ int ret = validator.process();
+ if (ret < 0) {
+ throw ret;
+ }
+
+ std::string swift_user;
+ try {
+ std::vector<std::string> swift_groups;
+ get_str_vec(validator.get_header_value("X-Auth-Groups"),
+ ",", swift_groups);
+
+ if (0 == swift_groups.size()) {
+ return result_t::deny(-EPERM);
+ } else {
+ swift_user = std::move(swift_groups[0]);
+ }
+ } catch (const std::out_of_range&) {
+ /* The X-Auth-Groups header isn't present in the response. */
+ return result_t::deny(-EPERM);
+ }
+
+ if (swift_user.empty()) {
+ return result_t::deny(-EPERM);
+ }
+
+ ldpp_dout(dpp, 10) << "swift user=" << swift_user << dendl;
+
+ RGWUserInfo tmp_uinfo;
+ ret = rgw_get_user_info_by_swift(store, swift_user, tmp_uinfo);
+ if (ret < 0) {
+ ldpp_dout(dpp, 0) << "NOTICE: couldn't map swift user" << dendl;
+ throw ret;
+ }
+
+ auto apl = apl_factory->create_apl_local(cct, s, tmp_uinfo,
+ extract_swift_subuser(swift_user),
+ boost::none);
+ return result_t::grant(std::move(apl));
+}
+
+static int build_token(const string& swift_user,
+ const string& key,
+ const uint64_t nonce,
+ const utime_t& expiration,
+ bufferlist& bl)
+{
+ using ceph::encode;
+ encode(swift_user, bl);
+ encode(nonce, bl);
+ encode(expiration, bl);
+
+ bufferptr p(CEPH_CRYPTO_HMACSHA1_DIGESTSIZE);
+
+ char buf[bl.length() * 2 + 1];
+ buf_to_hex((const unsigned char *)bl.c_str(), bl.length(), buf);
+ dout(20) << "build_token token=" << buf << dendl;
+
+ char k[CEPH_CRYPTO_HMACSHA1_DIGESTSIZE];
+ // FIPS zeroization audit 20191116: this memset is not intended to
+ // wipe out a secret after use.
+ memset(k, 0, sizeof(k));
+ const char *s = key.c_str();
+ for (int i = 0; i < (int)key.length(); i++, s++) {
+ k[i % CEPH_CRYPTO_HMACSHA1_DIGESTSIZE] |= *s;
+ }
+ calc_hmac_sha1(k, sizeof(k), bl.c_str(), bl.length(), p.c_str());
+ ::ceph::crypto::zeroize_for_security(k, sizeof(k));
+
+ bl.append(p);
+
+ return 0;
+
+}
+
+static int encode_token(CephContext *cct, string& swift_user, string& key,
+ bufferlist& bl)
+{
+ const auto nonce = ceph::util::generate_random_number<uint64_t>();
+
+ utime_t expiration = ceph_clock_now();
+ expiration += cct->_conf->rgw_swift_token_expiration;
+
+ return build_token(swift_user, key, nonce, expiration, bl);
+}
+
+
+/* AUTH_rgwtk (signed token): engine */
+bool SignedTokenEngine::is_applicable(const std::string& token) const noexcept
+{
+ if (token.empty()) {
+ return false;
+ } else {
+ return token.compare(0, 10, "AUTH_rgwtk") == 0;
+ }
+}
+
+SignedTokenEngine::result_t
+SignedTokenEngine::authenticate(const DoutPrefixProvider* dpp,
+ const std::string& token,
+ const req_state* const s) const
+{
+ if (! is_applicable(token)) {
+ return result_t::deny(-EPERM);
+ }
+
+ /* Effective token string is the part after the prefix. */
+ const std::string etoken = token.substr(strlen("AUTH_rgwtk"));
+ const size_t etoken_len = etoken.length();
+
+ if (etoken_len & 1) {
+ ldpp_dout(dpp, 0) << "NOTICE: failed to verify token: odd token length="
+ << etoken_len << dendl;
+ throw -EINVAL;
+ }
+
+ ceph::bufferptr p(etoken_len/2);
+ int ret = hex_to_buf(etoken.c_str(), p.c_str(), etoken_len);
+ if (ret < 0) {
+ throw ret;
+ }
+
+ ceph::bufferlist tok_bl;
+ tok_bl.append(p);
+
+ uint64_t nonce;
+ utime_t expiration;
+ std::string swift_user;
+
+ try {
+ auto iter = tok_bl.cbegin();
+
+ using ceph::decode;
+ decode(swift_user, iter);
+ decode(nonce, iter);
+ decode(expiration, iter);
+ } catch (buffer::error& err) {
+ ldpp_dout(dpp, 0) << "NOTICE: failed to decode token" << dendl;
+ throw -EINVAL;
+ }
+
+ const utime_t now = ceph_clock_now();
+ if (expiration < now) {
+ ldpp_dout(dpp, 0) << "NOTICE: old timed out token was used now=" << now
+ << " token.expiration=" << expiration
+ << dendl;
+ return result_t::deny(-EPERM);
+ }
+
+ RGWUserInfo user_info;
+ ret = rgw_get_user_info_by_swift(store, swift_user, user_info);
+ if (ret < 0) {
+ throw ret;
+ }
+
+ ldpp_dout(dpp, 10) << "swift_user=" << swift_user << dendl;
+
+ const auto siter = user_info.swift_keys.find(swift_user);
+ if (siter == std::end(user_info.swift_keys)) {
+ return result_t::deny(-EPERM);
+ }
+
+ const auto swift_key = siter->second;
+
+ bufferlist local_tok_bl;
+ ret = build_token(swift_user, swift_key.key, nonce, expiration, local_tok_bl);
+ if (ret < 0) {
+ throw ret;
+ }
+
+ if (local_tok_bl.length() != tok_bl.length()) {
+ ldpp_dout(dpp, 0) << "NOTICE: tokens length mismatch:"
+ << " tok_bl.length()=" << tok_bl.length()
+ << " local_tok_bl.length()=" << local_tok_bl.length()
+ << dendl;
+ return result_t::deny(-EPERM);
+ }
+
+ if (memcmp(local_tok_bl.c_str(), tok_bl.c_str(),
+ local_tok_bl.length()) != 0) {
+ char buf[local_tok_bl.length() * 2 + 1];
+
+ buf_to_hex(reinterpret_cast<const unsigned char *>(local_tok_bl.c_str()),
+ local_tok_bl.length(), buf);
+
+ ldpp_dout(dpp, 0) << "NOTICE: tokens mismatch tok=" << buf << dendl;
+ return result_t::deny(-EPERM);
+ }
+
+ auto apl = apl_factory->create_apl_local(cct, s, user_info,
+ extract_swift_subuser(swift_user),
+ boost::none);
+ return result_t::grant(std::move(apl));
+}
+
+} /* namespace swift */
+} /* namespace auth */
+} /* namespace rgw */
+
+
+void RGW_SWIFT_Auth_Get::execute()
+{
+ int ret = -EPERM;
+
+ const char *key = s->info.env->get("HTTP_X_AUTH_KEY");
+ const char *user = s->info.env->get("HTTP_X_AUTH_USER");
+
+ s->prot_flags |= RGW_REST_SWIFT;
+
+ string user_str;
+ RGWUserInfo info;
+ bufferlist bl;
+ RGWAccessKey *swift_key;
+ map<string, RGWAccessKey>::iterator siter;
+
+ string swift_url = g_conf()->rgw_swift_url;
+ string swift_prefix = g_conf()->rgw_swift_url_prefix;
+ string tenant_path;
+
+ /*
+ * We did not allow an empty Swift prefix before, but we want it now.
+ * So, we take rgw_swift_url_prefix = "/" to yield the empty prefix.
+ * The rgw_swift_url_prefix = "" is the default and yields "/swift"
+ * in a backwards-compatible way.
+ */
+ if (swift_prefix.size() == 0) {
+ swift_prefix = DEFAULT_SWIFT_PREFIX;
+ } else if (swift_prefix == "/") {
+ swift_prefix.clear();
+ } else {
+ if (swift_prefix[0] != '/') {
+ swift_prefix.insert(0, "/");
+ }
+ }
+
+ if (swift_url.size() == 0) {
+ bool add_port = false;
+ const char *server_port = s->info.env->get("SERVER_PORT_SECURE");
+ const char *protocol;
+ if (server_port) {
+ add_port = (strcmp(server_port, "443") != 0);
+ protocol = "https";
+ } else {
+ server_port = s->info.env->get("SERVER_PORT");
+ add_port = (strcmp(server_port, "80") != 0);
+ protocol = "http";
+ }
+ const char *host = s->info.env->get("HTTP_HOST");
+ if (!host) {
+ dout(0) << "NOTICE: server is misconfigured, missing rgw_swift_url_prefix or rgw_swift_url, HTTP_HOST is not set" << dendl;
+ ret = -EINVAL;
+ goto done;
+ }
+ swift_url = protocol;
+ swift_url.append("://");
+ swift_url.append(host);
+ if (add_port && !strchr(host, ':')) {
+ swift_url.append(":");
+ swift_url.append(server_port);
+ }
+ }
+
+ if (!key || !user)
+ goto done;
+
+ user_str = user;
+
+ if ((ret = rgw_get_user_info_by_swift(store, user_str, info)) < 0)
+ {
+ ret = -EACCES;
+ goto done;
+ }
+
+ siter = info.swift_keys.find(user_str);
+ if (siter == info.swift_keys.end()) {
+ ret = -EPERM;
+ goto done;
+ }
+ swift_key = &siter->second;
+
+ if (swift_key->key.compare(key) != 0) {
+ dout(0) << "NOTICE: RGW_SWIFT_Auth_Get::execute(): bad swift key" << dendl;
+ ret = -EPERM;
+ goto done;
+ }
+
+ if (!g_conf()->rgw_swift_tenant_name.empty()) {
+ tenant_path = "/AUTH_";
+ tenant_path.append(g_conf()->rgw_swift_tenant_name);
+ } else if (g_conf()->rgw_swift_account_in_url) {
+ tenant_path = "/AUTH_";
+ tenant_path.append(info.user_id.to_str());
+ }
+
+ dump_header(s, "X-Storage-Url", swift_url + swift_prefix + "/v1" +
+ tenant_path);
+
+ using rgw::auth::swift::encode_token;
+ if ((ret = encode_token(s->cct, swift_key->id, swift_key->key, bl)) < 0)
+ goto done;
+
+ {
+ static constexpr size_t PREFIX_LEN = sizeof("AUTH_rgwtk") - 1;
+ char token_val[PREFIX_LEN + bl.length() * 2 + 1];
+
+ snprintf(token_val, PREFIX_LEN + 1, "AUTH_rgwtk");
+ buf_to_hex((const unsigned char *)bl.c_str(), bl.length(),
+ token_val + PREFIX_LEN);
+
+ dump_header(s, "X-Storage-Token", token_val);
+ dump_header(s, "X-Auth-Token", token_val);
+ }
+
+ ret = STATUS_NO_CONTENT;
+
+done:
+ set_req_state_err(s, ret);
+ dump_errno(s);
+ end_header(s);
+}
+
+int RGWHandler_SWIFT_Auth::init(RGWRados *store, struct req_state *state,
+ rgw::io::BasicClient *cio)
+{
+ state->dialect = "swift-auth";
+ state->formatter = new JSONFormatter;
+ state->format = RGW_FORMAT_JSON;
+
+ return RGWHandler::init(store, state, cio);
+}
+
+int RGWHandler_SWIFT_Auth::authorize(const DoutPrefixProvider *dpp)
+{
+ return 0;
+}
+
+RGWOp *RGWHandler_SWIFT_Auth::op_get()
+{
+ return new RGW_SWIFT_Auth_Get;
+}
+
diff --git a/src/rgw/rgw_swift_auth.h b/src/rgw/rgw_swift_auth.h
new file mode 100644
index 00000000..33d1cb22
--- /dev/null
+++ b/src/rgw/rgw_swift_auth.h
@@ -0,0 +1,341 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_SWIFT_AUTH_H
+#define CEPH_RGW_SWIFT_AUTH_H
+
+#include "rgw_common.h"
+#include "rgw_user.h"
+#include "rgw_op.h"
+#include "rgw_rest.h"
+#include "rgw_auth.h"
+#include "rgw_auth_keystone.h"
+#include "rgw_auth_filters.h"
+
+#define RGW_SWIFT_TOKEN_EXPIRATION (15 * 60)
+
+namespace rgw {
+namespace auth {
+namespace swift {
+
+/* TempURL: applier. */
+class TempURLApplier : public rgw::auth::LocalApplier {
+public:
+ TempURLApplier(CephContext* const cct,
+ const RGWUserInfo& user_info)
+ : LocalApplier(cct, user_info, LocalApplier::NO_SUBUSER, boost::none) {
+ };
+
+ void modify_request_state(const DoutPrefixProvider* dpp, req_state * s) const override; /* in/out */
+
+ struct Factory {
+ virtual ~Factory() {}
+ virtual aplptr_t create_apl_turl(CephContext* cct,
+ const req_state* s,
+ const RGWUserInfo& user_info) const = 0;
+ };
+};
+
+/* TempURL: engine */
+class TempURLEngine : public rgw::auth::Engine {
+ using result_t = rgw::auth::Engine::result_t;
+
+ CephContext* const cct;
+ /* const */ RGWRados* const store;
+ const TempURLApplier::Factory* const apl_factory;
+
+ /* Helper methods. */
+ void get_owner_info(const DoutPrefixProvider* dpp,
+ const req_state* s,
+ RGWUserInfo& owner_info) const;
+ std::string convert_from_iso8601(std::string expires) const;
+ bool is_applicable(const req_state* s) const noexcept;
+ bool is_expired(const std::string& expires) const;
+ bool is_disallowed_header_present(const req_info& info) const;
+
+ class SignatureHelper;
+ class PrefixableSignatureHelper;
+
+public:
+ TempURLEngine(CephContext* const cct,
+ /*const*/ RGWRados* const store,
+ const TempURLApplier::Factory* const apl_factory)
+ : cct(cct),
+ store(store),
+ apl_factory(apl_factory) {
+ }
+
+ /* Interface implementations. */
+ const char* get_name() const noexcept override {
+ return "rgw::auth::swift::TempURLEngine";
+ }
+
+ result_t authenticate(const DoutPrefixProvider* dpp, const req_state* const s) const override;
+};
+
+
+/* AUTH_rgwtk */
+class SignedTokenEngine : public rgw::auth::Engine {
+ using result_t = rgw::auth::Engine::result_t;
+
+ CephContext* const cct;
+ RGWRados* const store;
+ const rgw::auth::TokenExtractor* const extractor;
+ const rgw::auth::LocalApplier::Factory* const apl_factory;
+
+ bool is_applicable(const std::string& token) const noexcept;
+ result_t authenticate(const DoutPrefixProvider* dpp,
+ const std::string& token,
+ const req_state* s) const;
+
+public:
+ SignedTokenEngine(CephContext* const cct,
+ /* const */RGWRados* const store,
+ const rgw::auth::TokenExtractor* const extractor,
+ const rgw::auth::LocalApplier::Factory* const apl_factory)
+ : cct(cct),
+ store(store),
+ extractor(extractor),
+ apl_factory(apl_factory) {
+ }
+
+ const char* get_name() const noexcept override {
+ return "rgw::auth::swift::SignedTokenEngine";
+ }
+
+ result_t authenticate(const DoutPrefixProvider* dpp, const req_state* const s) const override {
+ return authenticate(dpp, extractor->get_token(s), s);
+ }
+};
+
+
+/* External token */
+class ExternalTokenEngine : public rgw::auth::Engine {
+ using result_t = rgw::auth::Engine::result_t;
+
+ CephContext* const cct;
+ RGWRados* const store;
+ const rgw::auth::TokenExtractor* const extractor;
+ const rgw::auth::LocalApplier::Factory* const apl_factory;
+
+ bool is_applicable(const std::string& token) const noexcept;
+ result_t authenticate(const DoutPrefixProvider* dpp,
+ const std::string& token,
+ const req_state* s) const;
+
+public:
+ ExternalTokenEngine(CephContext* const cct,
+ /* const */RGWRados* const store,
+ const rgw::auth::TokenExtractor* const extractor,
+ const rgw::auth::LocalApplier::Factory* const apl_factory)
+ : cct(cct),
+ store(store),
+ extractor(extractor),
+ apl_factory(apl_factory) {
+ }
+
+ const char* get_name() const noexcept override {
+ return "rgw::auth::swift::ExternalTokenEngine";
+ }
+
+ result_t authenticate(const DoutPrefixProvider* dpp, const req_state* const s) const override {
+ return authenticate(dpp, extractor->get_token(s), s);
+ }
+};
+
+/* SwiftAnonymous: applier. */
+class SwiftAnonymousApplier : public rgw::auth::LocalApplier {
+ public:
+ SwiftAnonymousApplier(CephContext* const cct,
+ const RGWUserInfo& user_info)
+ : LocalApplier(cct, user_info, LocalApplier::NO_SUBUSER, boost::none) {
+ };
+ bool is_admin_of(const rgw_user& uid) const {return false;}
+ bool is_owner_of(const rgw_user& uid) const {return uid.id.compare(RGW_USER_ANON_ID) == 0;}
+};
+
+class SwiftAnonymousEngine : public rgw::auth::AnonymousEngine {
+ const rgw::auth::TokenExtractor* const extractor;
+
+ bool is_applicable(const req_state* s) const noexcept override {
+ return extractor->get_token(s).empty();
+ }
+
+public:
+ SwiftAnonymousEngine(CephContext* const cct,
+ const SwiftAnonymousApplier::Factory* const apl_factory,
+ const rgw::auth::TokenExtractor* const extractor)
+ : AnonymousEngine(cct, apl_factory),
+ extractor(extractor) {
+ }
+
+ const char* get_name() const noexcept override {
+ return "rgw::auth::swift::SwiftAnonymousEngine";
+ }
+};
+
+
+class DefaultStrategy : public rgw::auth::Strategy,
+ public rgw::auth::TokenExtractor,
+ public rgw::auth::RemoteApplier::Factory,
+ public rgw::auth::LocalApplier::Factory,
+ public rgw::auth::swift::TempURLApplier::Factory {
+ RGWRados* const store;
+ ImplicitTenants& implicit_tenant_context;
+
+ /* The engines. */
+ const rgw::auth::swift::TempURLEngine tempurl_engine;
+ const rgw::auth::swift::SignedTokenEngine signed_engine;
+ boost::optional <const rgw::auth::keystone::TokenEngine> keystone_engine;
+ const rgw::auth::swift::ExternalTokenEngine external_engine;
+ const rgw::auth::swift::SwiftAnonymousEngine anon_engine;
+
+ using keystone_config_t = rgw::keystone::CephCtxConfig;
+ using keystone_cache_t = rgw::keystone::TokenCache;
+ using aplptr_t = rgw::auth::IdentityApplier::aplptr_t;
+ using acl_strategy_t = rgw::auth::RemoteApplier::acl_strategy_t;
+
+ /* The method implements TokenExtractor for X-Auth-Token present in req_state. */
+ std::string get_token(const req_state* const s) const override {
+ /* Returning a reference here would end in GCC complaining about a reference
+ * to temporary. */
+ return s->info.env->get("HTTP_X_AUTH_TOKEN", "");
+ }
+
+ aplptr_t create_apl_remote(CephContext* const cct,
+ const req_state* const s,
+ acl_strategy_t&& extra_acl_strategy,
+ const rgw::auth::RemoteApplier::AuthInfo &info) const override {
+ auto apl = \
+ rgw::auth::add_3rdparty(store, rgw_user(s->account_name),
+ rgw::auth::add_sysreq(cct, store, s,
+ rgw::auth::RemoteApplier(cct, store, std::move(extra_acl_strategy), info,
+ implicit_tenant_context,
+ rgw::auth::ImplicitTenants::IMPLICIT_TENANTS_SWIFT)));
+ /* TODO(rzarzynski): replace with static_ptr. */
+ return aplptr_t(new decltype(apl)(std::move(apl)));
+ }
+
+ aplptr_t create_apl_local(CephContext* const cct,
+ const req_state* const s,
+ const RGWUserInfo& user_info,
+ const std::string& subuser,
+ const boost::optional<uint32_t>& perm_mask) const override {
+ auto apl = \
+ rgw::auth::add_3rdparty(store, rgw_user(s->account_name),
+ rgw::auth::add_sysreq(cct, store, s,
+ rgw::auth::LocalApplier(cct, user_info, subuser, perm_mask)));
+ /* TODO(rzarzynski): replace with static_ptr. */
+ return aplptr_t(new decltype(apl)(std::move(apl)));
+ }
+
+ aplptr_t create_apl_turl(CephContext* const cct,
+ const req_state* const s,
+ const RGWUserInfo& user_info) const override {
+ /* TempURL doesn't need any user account override. It's a Swift-specific
+ * mechanism that requires account name internally, so there is no
+ * business with delegating the responsibility outside. */
+ return aplptr_t(new rgw::auth::swift::TempURLApplier(cct, user_info));
+ }
+
+public:
+ DefaultStrategy(CephContext* const cct,
+ ImplicitTenants& implicit_tenant_context,
+ RGWRados* const store)
+ : store(store),
+ implicit_tenant_context(implicit_tenant_context),
+ tempurl_engine(cct,
+ store,
+ static_cast<rgw::auth::swift::TempURLApplier::Factory*>(this)),
+ signed_engine(cct,
+ store,
+ static_cast<rgw::auth::TokenExtractor*>(this),
+ static_cast<rgw::auth::LocalApplier::Factory*>(this)),
+ external_engine(cct,
+ store,
+ static_cast<rgw::auth::TokenExtractor*>(this),
+ static_cast<rgw::auth::LocalApplier::Factory*>(this)),
+ anon_engine(cct,
+ static_cast<SwiftAnonymousApplier::Factory*>(this),
+ static_cast<rgw::auth::TokenExtractor*>(this)) {
+ /* When the constructor's body is being executed, all member engines
+ * should be initialized. Thus, we can safely add them. */
+ using Control = rgw::auth::Strategy::Control;
+
+ add_engine(Control::SUFFICIENT, tempurl_engine);
+ add_engine(Control::SUFFICIENT, signed_engine);
+
+ /* The auth strategy is responsible for deciding whether a parcular
+ * engine is disabled or not. */
+ if (! cct->_conf->rgw_keystone_url.empty()) {
+ keystone_engine.emplace(cct,
+ static_cast<rgw::auth::TokenExtractor*>(this),
+ static_cast<rgw::auth::RemoteApplier::Factory*>(this),
+ keystone_config_t::get_instance(),
+ keystone_cache_t::get_instance<keystone_config_t>());
+
+ add_engine(Control::SUFFICIENT, *keystone_engine);
+ }
+ if (! cct->_conf->rgw_swift_auth_url.empty()) {
+ add_engine(Control::SUFFICIENT, external_engine);
+ }
+
+ add_engine(Control::SUFFICIENT, anon_engine);
+ }
+
+ const char* get_name() const noexcept override {
+ return "rgw::auth::swift::DefaultStrategy";
+ }
+};
+
+} /* namespace swift */
+} /* namespace auth */
+} /* namespace rgw */
+
+
+class RGW_SWIFT_Auth_Get : public RGWOp {
+public:
+ RGW_SWIFT_Auth_Get() {}
+ ~RGW_SWIFT_Auth_Get() override {}
+
+ int verify_permission() override { return 0; }
+ void execute() override;
+ const char* name() const override { return "swift_auth_get"; }
+ dmc::client_id dmclock_client() override { return dmc::client_id::auth; }
+};
+
+class RGWHandler_SWIFT_Auth : public RGWHandler_REST {
+public:
+ RGWHandler_SWIFT_Auth() {}
+ ~RGWHandler_SWIFT_Auth() override {}
+ RGWOp *op_get() override;
+
+ int init(RGWRados *store, struct req_state *state, rgw::io::BasicClient *cio) override;
+ int authorize(const DoutPrefixProvider *dpp) override;
+ int postauth_init() override { return 0; }
+ int read_permissions(RGWOp *op) override { return 0; }
+
+ virtual RGWAccessControlPolicy *alloc_policy() { return NULL; }
+ virtual void free_policy(RGWAccessControlPolicy *policy) {}
+};
+
+class RGWRESTMgr_SWIFT_Auth : public RGWRESTMgr {
+public:
+ RGWRESTMgr_SWIFT_Auth() = default;
+ ~RGWRESTMgr_SWIFT_Auth() override = default;
+
+ RGWRESTMgr *get_resource_mgr(struct req_state* const s,
+ const std::string& uri,
+ std::string* const out_uri) override {
+ return this;
+ }
+
+ RGWHandler_REST* get_handler(struct req_state*,
+ const rgw::auth::StrategyRegistry&,
+ const std::string&) override {
+ return new RGWHandler_SWIFT_Auth;
+ }
+};
+
+
+#endif
diff --git a/src/rgw/rgw_sync.cc b/src/rgw/rgw_sync.cc
new file mode 100644
index 00000000..b0e95959
--- /dev/null
+++ b/src/rgw/rgw_sync.cc
@@ -0,0 +1,3136 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <boost/optional.hpp>
+
+#include "common/ceph_json.h"
+#include "common/RWLock.h"
+#include "common/RefCountedObj.h"
+#include "common/WorkQueue.h"
+#include "common/Throttle.h"
+#include "common/admin_socket.h"
+#include "common/errno.h"
+
+#include "rgw_common.h"
+#include "rgw_rados.h"
+#include "rgw_zone.h"
+#include "rgw_sync.h"
+#include "rgw_metadata.h"
+#include "rgw_rest_conn.h"
+#include "rgw_tools.h"
+#include "rgw_cr_rados.h"
+#include "rgw_cr_rest.h"
+#include "rgw_http_client.h"
+#include "rgw_sync_trace.h"
+
+#include "cls/lock/cls_lock_client.h"
+
+#include "services/svc_zone.h"
+
+#include <boost/asio/yield.hpp>
+
+#define dout_subsys ceph_subsys_rgw
+
+#undef dout_prefix
+#define dout_prefix (*_dout << "meta sync: ")
+
+static string mdlog_sync_status_oid = "mdlog.sync-status";
+static string mdlog_sync_status_shard_prefix = "mdlog.sync-status.shard";
+static string mdlog_sync_full_sync_index_prefix = "meta.full-sync.index";
+
+RGWSyncErrorLogger::RGWSyncErrorLogger(RGWRados *_store, const string &oid_prefix, int _num_shards) : store(_store), num_shards(_num_shards) {
+ for (int i = 0; i < num_shards; i++) {
+ oids.push_back(get_shard_oid(oid_prefix, i));
+ }
+}
+string RGWSyncErrorLogger::get_shard_oid(const string& oid_prefix, int shard_id) {
+ char buf[oid_prefix.size() + 16];
+ snprintf(buf, sizeof(buf), "%s.%d", oid_prefix.c_str(), shard_id);
+ return string(buf);
+}
+
+RGWCoroutine *RGWSyncErrorLogger::log_error_cr(const string& source_zone, const string& section, const string& name, uint32_t error_code, const string& message) {
+ cls_log_entry entry;
+
+ rgw_sync_error_info info(source_zone, error_code, message);
+ bufferlist bl;
+ encode(info, bl);
+ store->time_log_prepare_entry(entry, real_clock::now(), section, name, bl);
+
+ uint32_t shard_id = ++counter % num_shards;
+
+
+ return new RGWRadosTimelogAddCR(store, oids[shard_id], entry);
+}
+
+void RGWSyncBackoff::update_wait_time()
+{
+ if (cur_wait == 0) {
+ cur_wait = 1;
+ } else {
+ cur_wait = (cur_wait << 1);
+ }
+ if (cur_wait >= max_secs) {
+ cur_wait = max_secs;
+ }
+}
+
+void RGWSyncBackoff::backoff_sleep()
+{
+ update_wait_time();
+ sleep(cur_wait);
+}
+
+void RGWSyncBackoff::backoff(RGWCoroutine *op)
+{
+ update_wait_time();
+ op->wait(utime_t(cur_wait, 0));
+}
+
+int RGWBackoffControlCR::operate() {
+ reenter(this) {
+ // retry the operation until it succeeds
+ while (true) {
+ yield {
+ Mutex::Locker l(lock);
+ cr = alloc_cr();
+ cr->get();
+ call(cr);
+ }
+ {
+ Mutex::Locker l(lock);
+ cr->put();
+ cr = NULL;
+ }
+ if (retcode >= 0) {
+ break;
+ }
+ if (retcode != -EBUSY && retcode != -EAGAIN) {
+ ldout(cct, 0) << "ERROR: RGWBackoffControlCR called coroutine returned " << retcode << dendl;
+ if (exit_on_error) {
+ return set_cr_error(retcode);
+ }
+ }
+ if (reset_backoff) {
+ backoff.reset();
+ }
+ yield backoff.backoff(this);
+ }
+
+ // run an optional finisher
+ yield call(alloc_finisher_cr());
+ if (retcode < 0) {
+ ldout(cct, 0) << "ERROR: call to finisher_cr() failed: retcode=" << retcode << dendl;
+ return set_cr_error(retcode);
+ }
+ return set_cr_done();
+ }
+ return 0;
+}
+
+void rgw_mdlog_info::decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("num_objects", num_shards, obj);
+ JSONDecoder::decode_json("period", period, obj);
+ JSONDecoder::decode_json("realm_epoch", realm_epoch, obj);
+}
+
+void rgw_mdlog_entry::decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("id", id, obj);
+ JSONDecoder::decode_json("section", section, obj);
+ JSONDecoder::decode_json("name", name, obj);
+ utime_t ut;
+ JSONDecoder::decode_json("timestamp", ut, obj);
+ timestamp = ut.to_real_time();
+ JSONDecoder::decode_json("data", log_data, obj);
+}
+
+void rgw_mdlog_shard_data::decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("marker", marker, obj);
+ JSONDecoder::decode_json("truncated", truncated, obj);
+ JSONDecoder::decode_json("entries", entries, obj);
+};
+
+int RGWShardCollectCR::operate() {
+ reenter(this) {
+ while (spawn_next()) {
+ current_running++;
+
+ while (current_running >= max_concurrent) {
+ int child_ret;
+ yield wait_for_child();
+ if (collect_next(&child_ret)) {
+ current_running--;
+ if (child_ret < 0 && child_ret != -ENOENT) {
+ ldout(cct, 10) << __func__ << ": failed to fetch log status, ret=" << child_ret << dendl;
+ status = child_ret;
+ }
+ }
+ }
+ }
+ while (current_running > 0) {
+ int child_ret;
+ yield wait_for_child();
+ if (collect_next(&child_ret)) {
+ current_running--;
+ if (child_ret < 0 && child_ret != -ENOENT) {
+ ldout(cct, 10) << __func__ << ": failed to fetch log status, ret=" << child_ret << dendl;
+ status = child_ret;
+ }
+ }
+ }
+ if (status < 0) {
+ return set_cr_error(status);
+ }
+ return set_cr_done();
+ }
+ return 0;
+}
+
+class RGWReadRemoteMDLogInfoCR : public RGWShardCollectCR {
+ RGWMetaSyncEnv *sync_env;
+
+ const std::string& period;
+ int num_shards;
+ map<int, RGWMetadataLogInfo> *mdlog_info;
+
+ int shard_id;
+#define READ_MDLOG_MAX_CONCURRENT 10
+
+public:
+ RGWReadRemoteMDLogInfoCR(RGWMetaSyncEnv *_sync_env,
+ const std::string& period, int _num_shards,
+ map<int, RGWMetadataLogInfo> *_mdlog_info) : RGWShardCollectCR(_sync_env->cct, READ_MDLOG_MAX_CONCURRENT),
+ sync_env(_sync_env),
+ period(period), num_shards(_num_shards),
+ mdlog_info(_mdlog_info), shard_id(0) {}
+ bool spawn_next() override;
+};
+
+class RGWListRemoteMDLogCR : public RGWShardCollectCR {
+ RGWMetaSyncEnv *sync_env;
+
+ const std::string& period;
+ map<int, string> shards;
+ int max_entries_per_shard;
+ map<int, rgw_mdlog_shard_data> *result;
+
+ map<int, string>::iterator iter;
+#define READ_MDLOG_MAX_CONCURRENT 10
+
+public:
+ RGWListRemoteMDLogCR(RGWMetaSyncEnv *_sync_env,
+ const std::string& period, map<int, string>& _shards,
+ int _max_entries_per_shard,
+ map<int, rgw_mdlog_shard_data> *_result) : RGWShardCollectCR(_sync_env->cct, READ_MDLOG_MAX_CONCURRENT),
+ sync_env(_sync_env), period(period),
+ max_entries_per_shard(_max_entries_per_shard),
+ result(_result) {
+ shards.swap(_shards);
+ iter = shards.begin();
+ }
+ bool spawn_next() override;
+};
+
+RGWRemoteMetaLog::~RGWRemoteMetaLog()
+{
+ delete error_logger;
+}
+
+int RGWRemoteMetaLog::read_log_info(rgw_mdlog_info *log_info)
+{
+ rgw_http_param_pair pairs[] = { { "type", "metadata" },
+ { NULL, NULL } };
+
+ int ret = conn->get_json_resource("/admin/log", pairs, *log_info);
+ if (ret < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: failed to fetch mdlog info" << dendl;
+ return ret;
+ }
+
+ ldpp_dout(dpp, 20) << "remote mdlog, num_shards=" << log_info->num_shards << dendl;
+
+ return 0;
+}
+
+int RGWRemoteMetaLog::read_master_log_shards_info(const string &master_period, map<int, RGWMetadataLogInfo> *shards_info)
+{
+ if (store->svc.zone->is_meta_master()) {
+ return 0;
+ }
+
+ rgw_mdlog_info log_info;
+ int ret = read_log_info(&log_info);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return run(new RGWReadRemoteMDLogInfoCR(&sync_env, master_period, log_info.num_shards, shards_info));
+}
+
+int RGWRemoteMetaLog::read_master_log_shards_next(const string& period, map<int, string> shard_markers, map<int, rgw_mdlog_shard_data> *result)
+{
+ if (store->svc.zone->is_meta_master()) {
+ return 0;
+ }
+
+ return run(new RGWListRemoteMDLogCR(&sync_env, period, shard_markers, 1, result));
+}
+
+int RGWRemoteMetaLog::init()
+{
+ conn = store->svc.zone->get_master_conn();
+
+ int ret = http_manager.start();
+ if (ret < 0) {
+ ldpp_dout(dpp, 0) << "failed in http_manager.start() ret=" << ret << dendl;
+ return ret;
+ }
+
+ error_logger = new RGWSyncErrorLogger(store, RGW_SYNC_ERROR_LOG_SHARD_PREFIX, ERROR_LOGGER_SHARDS);
+
+ init_sync_env(&sync_env);
+
+ tn = sync_env.sync_tracer->add_node(sync_env.sync_tracer->root_node, "meta");
+
+ return 0;
+}
+
+void RGWRemoteMetaLog::finish()
+{
+ going_down = true;
+ stop();
+}
+
+#define CLONE_MAX_ENTRIES 100
+
+int RGWMetaSyncStatusManager::init()
+{
+ if (store->svc.zone->is_meta_master()) {
+ return 0;
+ }
+
+ if (!store->svc.zone->get_master_conn()) {
+ lderr(store->ctx()) << "no REST connection to master zone" << dendl;
+ return -EIO;
+ }
+
+ int r = rgw_init_ioctx(store->get_rados_handle(), store->svc.zone->get_zone_params().log_pool, ioctx, true);
+ if (r < 0) {
+ lderr(store->ctx()) << "ERROR: failed to open log pool (" << store->svc.zone->get_zone_params().log_pool << " ret=" << r << dendl;
+ return r;
+ }
+
+ r = master_log.init();
+ if (r < 0) {
+ lderr(store->ctx()) << "ERROR: failed to init remote log, r=" << r << dendl;
+ return r;
+ }
+
+ RGWMetaSyncEnv& sync_env = master_log.get_sync_env();
+
+ rgw_meta_sync_status sync_status;
+ r = read_sync_status(&sync_status);
+ if (r < 0 && r != -ENOENT) {
+ lderr(store->ctx()) << "ERROR: failed to read sync status, r=" << r << dendl;
+ return r;
+ }
+
+ int num_shards = sync_status.sync_info.num_shards;
+
+ for (int i = 0; i < num_shards; i++) {
+ shard_objs[i] = rgw_raw_obj(store->svc.zone->get_zone_params().log_pool, sync_env.shard_obj_name(i));
+ }
+
+ RWLock::WLocker wl(ts_to_shard_lock);
+ for (int i = 0; i < num_shards; i++) {
+ clone_markers.push_back(string());
+ utime_shard ut;
+ ut.shard_id = i;
+ ts_to_shard[ut] = i;
+ }
+
+ return 0;
+}
+
+unsigned RGWMetaSyncStatusManager::get_subsys() const
+{
+ return dout_subsys;
+}
+
+std::ostream& RGWMetaSyncStatusManager::gen_prefix(std::ostream& out) const
+{
+ return out << "meta sync: ";
+}
+
+void RGWMetaSyncEnv::init(const DoutPrefixProvider *_dpp, CephContext *_cct, RGWRados *_store, RGWRESTConn *_conn,
+ RGWAsyncRadosProcessor *_async_rados, RGWHTTPManager *_http_manager,
+ RGWSyncErrorLogger *_error_logger, RGWSyncTraceManager *_sync_tracer) {
+ dpp = _dpp;
+ cct = _cct;
+ store = _store;
+ conn = _conn;
+ async_rados = _async_rados;
+ http_manager = _http_manager;
+ error_logger = _error_logger;
+ sync_tracer = _sync_tracer;
+}
+
+string RGWMetaSyncEnv::status_oid()
+{
+ return mdlog_sync_status_oid;
+}
+
+string RGWMetaSyncEnv::shard_obj_name(int shard_id)
+{
+ char buf[mdlog_sync_status_shard_prefix.size() + 16];
+ snprintf(buf, sizeof(buf), "%s.%d", mdlog_sync_status_shard_prefix.c_str(), shard_id);
+
+ return string(buf);
+}
+
+class RGWAsyncReadMDLogEntries : public RGWAsyncRadosRequest {
+ RGWRados *store;
+ RGWMetadataLog *mdlog;
+ int shard_id;
+ int max_entries;
+
+protected:
+ int _send_request() override {
+ real_time from_time;
+ real_time end_time;
+
+ void *handle;
+
+ mdlog->init_list_entries(shard_id, from_time, end_time, marker, &handle);
+
+ int ret = mdlog->list_entries(handle, max_entries, entries, &marker, &truncated);
+
+ mdlog->complete_list_entries(handle);
+
+ return ret;
+ }
+public:
+ string marker;
+ list<cls_log_entry> entries;
+ bool truncated;
+
+ RGWAsyncReadMDLogEntries(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, RGWRados *_store,
+ RGWMetadataLog* mdlog, int _shard_id,
+ std::string _marker, int _max_entries)
+ : RGWAsyncRadosRequest(caller, cn), store(_store), mdlog(mdlog),
+ shard_id(_shard_id), max_entries(_max_entries), marker(std::move(_marker)) {}
+};
+
+class RGWReadMDLogEntriesCR : public RGWSimpleCoroutine {
+ RGWMetaSyncEnv *sync_env;
+ RGWMetadataLog *const mdlog;
+ int shard_id;
+ string marker;
+ string *pmarker;
+ int max_entries;
+ list<cls_log_entry> *entries;
+ bool *truncated;
+
+ RGWAsyncReadMDLogEntries *req{nullptr};
+
+public:
+ RGWReadMDLogEntriesCR(RGWMetaSyncEnv *_sync_env, RGWMetadataLog* mdlog,
+ int _shard_id, string*_marker, int _max_entries,
+ list<cls_log_entry> *_entries, bool *_truncated)
+ : RGWSimpleCoroutine(_sync_env->cct), sync_env(_sync_env), mdlog(mdlog),
+ shard_id(_shard_id), pmarker(_marker), max_entries(_max_entries),
+ entries(_entries), truncated(_truncated) {}
+
+ ~RGWReadMDLogEntriesCR() override {
+ if (req) {
+ req->finish();
+ }
+ }
+
+ int send_request() override {
+ marker = *pmarker;
+ req = new RGWAsyncReadMDLogEntries(this, stack->create_completion_notifier(),
+ sync_env->store, mdlog, shard_id, marker,
+ max_entries);
+ sync_env->async_rados->queue(req);
+ return 0;
+ }
+
+ int request_complete() override {
+ *pmarker = std::move(req->marker);
+ *entries = std::move(req->entries);
+ *truncated = req->truncated;
+ return req->get_ret_status();
+ }
+};
+
+
+class RGWReadRemoteMDLogShardInfoCR : public RGWCoroutine {
+ RGWMetaSyncEnv *env;
+ RGWRESTReadResource *http_op;
+
+ const std::string& period;
+ int shard_id;
+ RGWMetadataLogInfo *shard_info;
+
+public:
+ RGWReadRemoteMDLogShardInfoCR(RGWMetaSyncEnv *env, const std::string& period,
+ int _shard_id, RGWMetadataLogInfo *_shard_info)
+ : RGWCoroutine(env->store->ctx()), env(env), http_op(NULL),
+ period(period), shard_id(_shard_id), shard_info(_shard_info) {}
+
+ int operate() override {
+ auto store = env->store;
+ RGWRESTConn *conn = store->svc.zone->get_master_conn();
+ reenter(this) {
+ yield {
+ char buf[16];
+ snprintf(buf, sizeof(buf), "%d", shard_id);
+ rgw_http_param_pair pairs[] = { { "type" , "metadata" },
+ { "id", buf },
+ { "period", period.c_str() },
+ { "info" , NULL },
+ { NULL, NULL } };
+
+ string p = "/admin/log/";
+
+ http_op = new RGWRESTReadResource(conn, p, pairs, NULL,
+ env->http_manager);
+
+ init_new_io(http_op);
+
+ int ret = http_op->aio_read();
+ if (ret < 0) {
+ ldpp_dout(env->dpp, 0) << "ERROR: failed to read from " << p << dendl;
+ log_error() << "failed to send http operation: " << http_op->to_str() << " ret=" << ret << std::endl;
+ http_op->put();
+ return set_cr_error(ret);
+ }
+
+ return io_block(0);
+ }
+ yield {
+ int ret = http_op->wait(shard_info);
+ http_op->put();
+ if (ret < 0) {
+ return set_cr_error(ret);
+ }
+ return set_cr_done();
+ }
+ }
+ return 0;
+ }
+};
+
+class RGWListRemoteMDLogShardCR : public RGWSimpleCoroutine {
+ RGWMetaSyncEnv *sync_env;
+ RGWRESTReadResource *http_op;
+
+ const std::string& period;
+ int shard_id;
+ string marker;
+ uint32_t max_entries;
+ rgw_mdlog_shard_data *result;
+
+public:
+ RGWListRemoteMDLogShardCR(RGWMetaSyncEnv *env, const std::string& period,
+ int _shard_id, const string& _marker, uint32_t _max_entries,
+ rgw_mdlog_shard_data *_result)
+ : RGWSimpleCoroutine(env->store->ctx()), sync_env(env), http_op(NULL),
+ period(period), shard_id(_shard_id), marker(_marker), max_entries(_max_entries), result(_result) {}
+
+ int send_request() override {
+ RGWRESTConn *conn = sync_env->conn;
+
+ char buf[32];
+ snprintf(buf, sizeof(buf), "%d", shard_id);
+
+ char max_entries_buf[32];
+ snprintf(max_entries_buf, sizeof(max_entries_buf), "%d", (int)max_entries);
+
+ const char *marker_key = (marker.empty() ? "" : "marker");
+
+ rgw_http_param_pair pairs[] = { { "type", "metadata" },
+ { "id", buf },
+ { "period", period.c_str() },
+ { "max-entries", max_entries_buf },
+ { marker_key, marker.c_str() },
+ { NULL, NULL } };
+
+ string p = "/admin/log/";
+
+ http_op = new RGWRESTReadResource(conn, p, pairs, NULL, sync_env->http_manager);
+ init_new_io(http_op);
+
+ int ret = http_op->aio_read();
+ if (ret < 0) {
+ ldpp_dout(sync_env->dpp, 0) << "ERROR: failed to read from " << p << dendl;
+ log_error() << "failed to send http operation: " << http_op->to_str() << " ret=" << ret << std::endl;
+ http_op->put();
+ return ret;
+ }
+
+ return 0;
+ }
+
+ int request_complete() override {
+ int ret = http_op->wait(result);
+ http_op->put();
+ if (ret < 0 && ret != -ENOENT) {
+ ldpp_dout(sync_env->dpp, 0) << "ERROR: failed to list remote mdlog shard, ret=" << ret << dendl;
+ return ret;
+ }
+ return 0;
+ }
+};
+
+bool RGWReadRemoteMDLogInfoCR::spawn_next() {
+ if (shard_id >= num_shards) {
+ return false;
+ }
+ spawn(new RGWReadRemoteMDLogShardInfoCR(sync_env, period, shard_id, &(*mdlog_info)[shard_id]), false);
+ shard_id++;
+ return true;
+}
+
+bool RGWListRemoteMDLogCR::spawn_next() {
+ if (iter == shards.end()) {
+ return false;
+ }
+
+ spawn(new RGWListRemoteMDLogShardCR(sync_env, period, iter->first, iter->second, max_entries_per_shard, &(*result)[iter->first]), false);
+ ++iter;
+ return true;
+}
+
+class RGWInitSyncStatusCoroutine : public RGWCoroutine {
+ RGWMetaSyncEnv *sync_env;
+
+ rgw_meta_sync_info status;
+ vector<RGWMetadataLogInfo> shards_info;
+ boost::intrusive_ptr<RGWContinuousLeaseCR> lease_cr;
+ boost::intrusive_ptr<RGWCoroutinesStack> lease_stack;
+public:
+ RGWInitSyncStatusCoroutine(RGWMetaSyncEnv *_sync_env,
+ const rgw_meta_sync_info &status)
+ : RGWCoroutine(_sync_env->store->ctx()), sync_env(_sync_env),
+ status(status), shards_info(status.num_shards),
+ lease_cr(nullptr), lease_stack(nullptr) {}
+
+ ~RGWInitSyncStatusCoroutine() override {
+ if (lease_cr) {
+ lease_cr->abort();
+ }
+ }
+
+ int operate() override {
+ int ret;
+ reenter(this) {
+ yield {
+ set_status("acquiring sync lock");
+ uint32_t lock_duration = cct->_conf->rgw_sync_lease_period;
+ string lock_name = "sync_lock";
+ RGWRados *store = sync_env->store;
+ lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, store,
+ rgw_raw_obj(store->svc.zone->get_zone_params().log_pool, sync_env->status_oid()),
+ lock_name, lock_duration, this));
+ lease_stack.reset(spawn(lease_cr.get(), false));
+ }
+ while (!lease_cr->is_locked()) {
+ if (lease_cr->is_done()) {
+ ldpp_dout(sync_env->dpp, 5) << "lease cr failed, done early " << dendl;
+ set_status("lease lock failed, early abort");
+ return set_cr_error(lease_cr->get_ret_status());
+ }
+ set_sleeping(true);
+ yield;
+ }
+ yield {
+ set_status("writing sync status");
+ RGWRados *store = sync_env->store;
+ call(new RGWSimpleRadosWriteCR<rgw_meta_sync_info>(sync_env->async_rados, store->svc.sysobj,
+ rgw_raw_obj(store->svc.zone->get_zone_params().log_pool, sync_env->status_oid()),
+ status));
+ }
+
+ if (retcode < 0) {
+ set_status("failed to write sync status");
+ ldpp_dout(sync_env->dpp, 0) << "ERROR: failed to write sync status, retcode=" << retcode << dendl;
+ yield lease_cr->go_down();
+ return set_cr_error(retcode);
+ }
+ /* fetch current position in logs */
+ set_status("fetching remote log position");
+ yield {
+ for (int i = 0; i < (int)status.num_shards; i++) {
+ spawn(new RGWReadRemoteMDLogShardInfoCR(sync_env, status.period, i,
+ &shards_info[i]), false);
+ }
+ }
+
+ drain_all_but_stack(lease_stack.get()); /* the lease cr still needs to run */
+
+ yield {
+ set_status("updating sync status");
+ for (int i = 0; i < (int)status.num_shards; i++) {
+ rgw_meta_sync_marker marker;
+ RGWMetadataLogInfo& info = shards_info[i];
+ marker.next_step_marker = info.marker;
+ marker.timestamp = info.last_update;
+ RGWRados *store = sync_env->store;
+ spawn(new RGWSimpleRadosWriteCR<rgw_meta_sync_marker>(sync_env->async_rados,
+ store->svc.sysobj,
+ rgw_raw_obj(store->svc.zone->get_zone_params().log_pool, sync_env->shard_obj_name(i)),
+ marker), true);
+ }
+ }
+ yield {
+ set_status("changing sync state: build full sync maps");
+ status.state = rgw_meta_sync_info::StateBuildingFullSyncMaps;
+ RGWRados *store = sync_env->store;
+ call(new RGWSimpleRadosWriteCR<rgw_meta_sync_info>(sync_env->async_rados, store->svc.sysobj,
+ rgw_raw_obj(store->svc.zone->get_zone_params().log_pool, sync_env->status_oid()),
+ status));
+ }
+ set_status("drop lock lease");
+ yield lease_cr->go_down();
+ while (collect(&ret, NULL)) {
+ if (ret < 0) {
+ return set_cr_error(ret);
+ }
+ yield;
+ }
+ drain_all();
+ return set_cr_done();
+ }
+ return 0;
+ }
+};
+
+class RGWReadSyncStatusMarkersCR : public RGWShardCollectCR {
+ static constexpr int MAX_CONCURRENT_SHARDS = 16;
+
+ RGWMetaSyncEnv *env;
+ const int num_shards;
+ int shard_id{0};
+ map<uint32_t, rgw_meta_sync_marker>& markers;
+
+ public:
+ RGWReadSyncStatusMarkersCR(RGWMetaSyncEnv *env, int num_shards,
+ map<uint32_t, rgw_meta_sync_marker>& markers)
+ : RGWShardCollectCR(env->cct, MAX_CONCURRENT_SHARDS),
+ env(env), num_shards(num_shards), markers(markers)
+ {}
+ bool spawn_next() override;
+};
+
+bool RGWReadSyncStatusMarkersCR::spawn_next()
+{
+ if (shard_id >= num_shards) {
+ return false;
+ }
+ using CR = RGWSimpleRadosReadCR<rgw_meta_sync_marker>;
+ rgw_raw_obj obj{env->store->svc.zone->get_zone_params().log_pool,
+ env->shard_obj_name(shard_id)};
+ spawn(new CR(env->async_rados, env->store->svc.sysobj, obj, &markers[shard_id]), false);
+ shard_id++;
+ return true;
+}
+
+class RGWReadSyncStatusCoroutine : public RGWCoroutine {
+ RGWMetaSyncEnv *sync_env;
+ rgw_meta_sync_status *sync_status;
+
+public:
+ RGWReadSyncStatusCoroutine(RGWMetaSyncEnv *_sync_env,
+ rgw_meta_sync_status *_status)
+ : RGWCoroutine(_sync_env->cct), sync_env(_sync_env), sync_status(_status)
+ {}
+ int operate() override;
+};
+
+int RGWReadSyncStatusCoroutine::operate()
+{
+ reenter(this) {
+ // read sync info
+ using ReadInfoCR = RGWSimpleRadosReadCR<rgw_meta_sync_info>;
+ yield {
+ bool empty_on_enoent = false; // fail on ENOENT
+ rgw_raw_obj obj{sync_env->store->svc.zone->get_zone_params().log_pool,
+ sync_env->status_oid()};
+ call(new ReadInfoCR(sync_env->async_rados, sync_env->store->svc.sysobj, obj,
+ &sync_status->sync_info, empty_on_enoent));
+ }
+ if (retcode < 0) {
+ ldpp_dout(sync_env->dpp, 4) << "failed to read sync status info with "
+ << cpp_strerror(retcode) << dendl;
+ return set_cr_error(retcode);
+ }
+ // read shard markers
+ using ReadMarkersCR = RGWReadSyncStatusMarkersCR;
+ yield call(new ReadMarkersCR(sync_env, sync_status->sync_info.num_shards,
+ sync_status->sync_markers));
+ if (retcode < 0) {
+ ldpp_dout(sync_env->dpp, 4) << "failed to read sync status markers with "
+ << cpp_strerror(retcode) << dendl;
+ return set_cr_error(retcode);
+ }
+ return set_cr_done();
+ }
+ return 0;
+}
+
+class RGWFetchAllMetaCR : public RGWCoroutine {
+ RGWMetaSyncEnv *sync_env;
+
+ int num_shards;
+
+
+ int ret_status;
+
+ list<string> sections;
+ list<string>::iterator sections_iter;
+
+ struct meta_list_result {
+ list<string> keys;
+ string marker;
+ uint64_t count{0};
+ bool truncated{false};
+
+ void decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("keys", keys, obj);
+ JSONDecoder::decode_json("marker", marker, obj);
+ JSONDecoder::decode_json("count", count, obj);
+ JSONDecoder::decode_json("truncated", truncated, obj);
+ }
+ } result;
+ list<string>::iterator iter;
+
+ std::unique_ptr<RGWShardedOmapCRManager> entries_index;
+
+ boost::intrusive_ptr<RGWContinuousLeaseCR> lease_cr;
+ boost::intrusive_ptr<RGWCoroutinesStack> lease_stack;
+ bool lost_lock;
+ bool failed;
+
+ string marker;
+
+ map<uint32_t, rgw_meta_sync_marker>& markers;
+
+ RGWSyncTraceNodeRef tn;
+
+public:
+ RGWFetchAllMetaCR(RGWMetaSyncEnv *_sync_env, int _num_shards,
+ map<uint32_t, rgw_meta_sync_marker>& _markers,
+ RGWSyncTraceNodeRef& _tn_parent) : RGWCoroutine(_sync_env->cct), sync_env(_sync_env),
+ num_shards(_num_shards),
+ ret_status(0), lease_cr(nullptr), lease_stack(nullptr),
+ lost_lock(false), failed(false), markers(_markers) {
+ tn = sync_env->sync_tracer->add_node(_tn_parent, "fetch_all_meta");
+ }
+
+ ~RGWFetchAllMetaCR() override {
+ }
+
+ void append_section_from_set(set<string>& all_sections, const string& name) {
+ set<string>::iterator iter = all_sections.find(name);
+ if (iter != all_sections.end()) {
+ sections.emplace_back(std::move(*iter));
+ all_sections.erase(iter);
+ }
+ }
+ /*
+ * meta sync should go in the following order: user, bucket.instance, bucket
+ * then whatever other sections exist (if any)
+ */
+ void rearrange_sections() {
+ set<string> all_sections;
+ std::move(sections.begin(), sections.end(),
+ std::inserter(all_sections, all_sections.end()));
+ sections.clear();
+
+ append_section_from_set(all_sections, "user");
+ append_section_from_set(all_sections, "bucket.instance");
+ append_section_from_set(all_sections, "bucket");
+
+ std::move(all_sections.begin(), all_sections.end(),
+ std::back_inserter(sections));
+ }
+
+ int operate() override {
+ RGWRESTConn *conn = sync_env->conn;
+
+ reenter(this) {
+ yield {
+ set_status(string("acquiring lock (") + sync_env->status_oid() + ")");
+ uint32_t lock_duration = cct->_conf->rgw_sync_lease_period;
+ string lock_name = "sync_lock";
+ lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados,
+ sync_env->store,
+ rgw_raw_obj(sync_env->store->svc.zone->get_zone_params().log_pool, sync_env->status_oid()),
+ lock_name, lock_duration, this));
+ lease_stack.reset(spawn(lease_cr.get(), false));
+ }
+ while (!lease_cr->is_locked()) {
+ if (lease_cr->is_done()) {
+ ldpp_dout(sync_env->dpp, 5) << "lease cr failed, done early " << dendl;
+ set_status("failed acquiring lock");
+ return set_cr_error(lease_cr->get_ret_status());
+ }
+ set_sleeping(true);
+ yield;
+ }
+ entries_index.reset(new RGWShardedOmapCRManager(sync_env->async_rados, sync_env->store, this, num_shards,
+ sync_env->store->svc.zone->get_zone_params().log_pool,
+ mdlog_sync_full_sync_index_prefix));
+ yield {
+ call(new RGWReadRESTResourceCR<list<string> >(cct, conn, sync_env->http_manager,
+ "/admin/metadata", NULL, &sections));
+ }
+ if (get_ret_status() < 0) {
+ ldpp_dout(sync_env->dpp, 0) << "ERROR: failed to fetch metadata sections" << dendl;
+ yield entries_index->finish();
+ yield lease_cr->go_down();
+ drain_all();
+ return set_cr_error(get_ret_status());
+ }
+ rearrange_sections();
+ sections_iter = sections.begin();
+ for (; sections_iter != sections.end(); ++sections_iter) {
+ do {
+ yield {
+#define META_FULL_SYNC_CHUNK_SIZE "1000"
+ string entrypoint = string("/admin/metadata/") + *sections_iter;
+ rgw_http_param_pair pairs[] = { { "max-entries", META_FULL_SYNC_CHUNK_SIZE },
+ { "marker", result.marker.c_str() },
+ { NULL, NULL } };
+ result.keys.clear();
+ call(new RGWReadRESTResourceCR<meta_list_result >(cct, conn, sync_env->http_manager,
+ entrypoint, pairs, &result));
+ }
+ ret_status = get_ret_status();
+ if (ret_status == -ENOENT) {
+ set_retcode(0); /* reset coroutine status so that we don't return it */
+ ret_status = 0;
+ }
+ if (ret_status < 0) {
+ tn->log(0, SSTR("ERROR: failed to fetch metadata section: " << *sections_iter));
+ yield entries_index->finish();
+ yield lease_cr->go_down();
+ drain_all();
+ return set_cr_error(ret_status);
+ }
+ iter = result.keys.begin();
+ for (; iter != result.keys.end(); ++iter) {
+ if (!lease_cr->is_locked()) {
+ lost_lock = true;
+ break;
+ }
+ yield; // allow entries_index consumer to make progress
+
+ tn->log(20, SSTR("list metadata: section=" << *sections_iter << " key=" << *iter));
+ string s = *sections_iter + ":" + *iter;
+ int shard_id;
+ RGWRados *store = sync_env->store;
+ int ret = store->meta_mgr->get_log_shard_id(*sections_iter, *iter, &shard_id);
+ if (ret < 0) {
+ tn->log(0, SSTR("ERROR: could not determine shard id for " << *sections_iter << ":" << *iter));
+ ret_status = ret;
+ break;
+ }
+ if (!entries_index->append(s, shard_id)) {
+ break;
+ }
+ }
+ } while (result.truncated);
+ }
+ yield {
+ if (!entries_index->finish()) {
+ failed = true;
+ }
+ }
+ if (!failed) {
+ for (map<uint32_t, rgw_meta_sync_marker>::iterator iter = markers.begin(); iter != markers.end(); ++iter) {
+ int shard_id = (int)iter->first;
+ rgw_meta_sync_marker& marker = iter->second;
+ marker.total_entries = entries_index->get_total_entries(shard_id);
+ spawn(new RGWSimpleRadosWriteCR<rgw_meta_sync_marker>(sync_env->async_rados, sync_env->store->svc.sysobj,
+ rgw_raw_obj(sync_env->store->svc.zone->get_zone_params().log_pool, sync_env->shard_obj_name(shard_id)),
+ marker), true);
+ }
+ }
+
+ drain_all_but_stack(lease_stack.get()); /* the lease cr still needs to run */
+
+ yield lease_cr->go_down();
+
+ int ret;
+ while (collect(&ret, NULL)) {
+ if (ret < 0) {
+ return set_cr_error(ret);
+ }
+ yield;
+ }
+ drain_all();
+ if (failed) {
+ yield return set_cr_error(-EIO);
+ }
+ if (lost_lock) {
+ yield return set_cr_error(-EBUSY);
+ }
+
+ if (ret_status < 0) {
+ yield return set_cr_error(ret_status);
+ }
+
+ yield return set_cr_done();
+ }
+ return 0;
+ }
+};
+
+static string full_sync_index_shard_oid(int shard_id)
+{
+ char buf[mdlog_sync_full_sync_index_prefix.size() + 16];
+ snprintf(buf, sizeof(buf), "%s.%d", mdlog_sync_full_sync_index_prefix.c_str(), shard_id);
+ return string(buf);
+}
+
+class RGWReadRemoteMetadataCR : public RGWCoroutine {
+ RGWMetaSyncEnv *sync_env;
+
+ RGWRESTReadResource *http_op;
+
+ string section;
+ string key;
+
+ bufferlist *pbl;
+
+ RGWSyncTraceNodeRef tn;
+
+public:
+ RGWReadRemoteMetadataCR(RGWMetaSyncEnv *_sync_env,
+ const string& _section, const string& _key, bufferlist *_pbl,
+ const RGWSyncTraceNodeRef& _tn_parent) : RGWCoroutine(_sync_env->cct), sync_env(_sync_env),
+ http_op(NULL),
+ section(_section),
+ key(_key),
+ pbl(_pbl) {
+ tn = sync_env->sync_tracer->add_node(_tn_parent, "read_remote_meta",
+ section + ":" + key);
+ }
+
+ int operate() override {
+ RGWRESTConn *conn = sync_env->conn;
+ reenter(this) {
+ yield {
+ rgw_http_param_pair pairs[] = { { "key" , key.c_str()},
+ { NULL, NULL } };
+
+ string p = string("/admin/metadata/") + section + "/" + key;
+
+ http_op = new RGWRESTReadResource(conn, p, pairs, NULL, sync_env->http_manager);
+
+ init_new_io(http_op);
+
+ int ret = http_op->aio_read();
+ if (ret < 0) {
+ ldpp_dout(sync_env->dpp, 0) << "ERROR: failed to fetch mdlog data" << dendl;
+ log_error() << "failed to send http operation: " << http_op->to_str() << " ret=" << ret << std::endl;
+ http_op->put();
+ return set_cr_error(ret);
+ }
+
+ return io_block(0);
+ }
+ yield {
+ int ret = http_op->wait(pbl);
+ http_op->put();
+ if (ret < 0) {
+ return set_cr_error(ret);
+ }
+ return set_cr_done();
+ }
+ }
+ return 0;
+ }
+};
+
+class RGWAsyncMetaStoreEntry : public RGWAsyncRadosRequest {
+ RGWRados *store;
+ string raw_key;
+ bufferlist bl;
+protected:
+ int _send_request() override {
+ int ret = store->meta_mgr->put(raw_key, bl, RGWMetadataHandler::APPLY_ALWAYS);
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "ERROR: can't store key: " << raw_key << " ret=" << ret << dendl;
+ return ret;
+ }
+ return 0;
+ }
+public:
+ RGWAsyncMetaStoreEntry(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, RGWRados *_store,
+ const string& _raw_key,
+ bufferlist& _bl) : RGWAsyncRadosRequest(caller, cn), store(_store),
+ raw_key(_raw_key), bl(_bl) {}
+};
+
+
+class RGWMetaStoreEntryCR : public RGWSimpleCoroutine {
+ RGWMetaSyncEnv *sync_env;
+ string raw_key;
+ bufferlist bl;
+
+ RGWAsyncMetaStoreEntry *req;
+
+public:
+ RGWMetaStoreEntryCR(RGWMetaSyncEnv *_sync_env,
+ const string& _raw_key,
+ bufferlist& _bl) : RGWSimpleCoroutine(_sync_env->cct), sync_env(_sync_env),
+ raw_key(_raw_key), bl(_bl), req(NULL) {
+ }
+
+ ~RGWMetaStoreEntryCR() override {
+ if (req) {
+ req->finish();
+ }
+ }
+
+ int send_request() override {
+ req = new RGWAsyncMetaStoreEntry(this, stack->create_completion_notifier(),
+ sync_env->store, raw_key, bl);
+ sync_env->async_rados->queue(req);
+ return 0;
+ }
+
+ int request_complete() override {
+ return req->get_ret_status();
+ }
+};
+
+class RGWAsyncMetaRemoveEntry : public RGWAsyncRadosRequest {
+ RGWRados *store;
+ string raw_key;
+protected:
+ int _send_request() override {
+ int ret = store->meta_mgr->remove(raw_key);
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "ERROR: can't remove key: " << raw_key << " ret=" << ret << dendl;
+ return ret;
+ }
+ return 0;
+ }
+public:
+ RGWAsyncMetaRemoveEntry(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, RGWRados *_store,
+ const string& _raw_key) : RGWAsyncRadosRequest(caller, cn), store(_store),
+ raw_key(_raw_key) {}
+};
+
+
+class RGWMetaRemoveEntryCR : public RGWSimpleCoroutine {
+ RGWMetaSyncEnv *sync_env;
+ string raw_key;
+
+ RGWAsyncMetaRemoveEntry *req;
+
+public:
+ RGWMetaRemoveEntryCR(RGWMetaSyncEnv *_sync_env,
+ const string& _raw_key) : RGWSimpleCoroutine(_sync_env->cct), sync_env(_sync_env),
+ raw_key(_raw_key), req(NULL) {
+ }
+
+ ~RGWMetaRemoveEntryCR() override {
+ if (req) {
+ req->finish();
+ }
+ }
+
+ int send_request() override {
+ req = new RGWAsyncMetaRemoveEntry(this, stack->create_completion_notifier(),
+ sync_env->store, raw_key);
+ sync_env->async_rados->queue(req);
+ return 0;
+ }
+
+ int request_complete() override {
+ int r = req->get_ret_status();
+ if (r == -ENOENT) {
+ r = 0;
+ }
+ return r;
+ }
+};
+
+#define META_SYNC_UPDATE_MARKER_WINDOW 10
+
+
+int RGWLastCallerWinsCR::operate() {
+ RGWCoroutine *call_cr;
+ reenter(this) {
+ while (cr) {
+ call_cr = cr;
+ cr = nullptr;
+ yield call(call_cr);
+ /* cr might have been modified at this point */
+ }
+ return set_cr_done();
+ }
+ return 0;
+}
+
+class RGWMetaSyncShardMarkerTrack : public RGWSyncShardMarkerTrack<string, string> {
+ RGWMetaSyncEnv *sync_env;
+
+ string marker_oid;
+ rgw_meta_sync_marker sync_marker;
+
+ RGWSyncTraceNodeRef tn;
+
+public:
+ RGWMetaSyncShardMarkerTrack(RGWMetaSyncEnv *_sync_env,
+ const string& _marker_oid,
+ const rgw_meta_sync_marker& _marker,
+ RGWSyncTraceNodeRef& _tn) : RGWSyncShardMarkerTrack(META_SYNC_UPDATE_MARKER_WINDOW),
+ sync_env(_sync_env),
+ marker_oid(_marker_oid),
+ sync_marker(_marker),
+ tn(_tn){}
+
+ RGWCoroutine *store_marker(const string& new_marker, uint64_t index_pos, const real_time& timestamp) override {
+ sync_marker.marker = new_marker;
+ if (index_pos > 0) {
+ sync_marker.pos = index_pos;
+ }
+
+ if (!real_clock::is_zero(timestamp)) {
+ sync_marker.timestamp = timestamp;
+ }
+
+ ldpp_dout(sync_env->dpp, 20) << __func__ << "(): updating marker marker_oid=" << marker_oid << " marker=" << new_marker << " realm_epoch=" << sync_marker.realm_epoch << dendl;
+ tn->log(20, SSTR("new marker=" << new_marker));
+ RGWRados *store = sync_env->store;
+ return new RGWSimpleRadosWriteCR<rgw_meta_sync_marker>(sync_env->async_rados,
+ store->svc.sysobj,
+ rgw_raw_obj(store->svc.zone->get_zone_params().log_pool, marker_oid),
+ sync_marker);
+ }
+
+ RGWOrderCallCR *allocate_order_control_cr() override {
+ return new RGWLastCallerWinsCR(sync_env->cct);
+ }
+};
+
+RGWMetaSyncSingleEntryCR::RGWMetaSyncSingleEntryCR(RGWMetaSyncEnv *_sync_env,
+ const string& _raw_key, const string& _entry_marker,
+ const RGWMDLogStatus& _op_status,
+ RGWMetaSyncShardMarkerTrack *_marker_tracker, const RGWSyncTraceNodeRef& _tn_parent) : RGWCoroutine(_sync_env->cct),
+ sync_env(_sync_env),
+ raw_key(_raw_key), entry_marker(_entry_marker),
+ op_status(_op_status),
+ pos(0), sync_status(0),
+ marker_tracker(_marker_tracker), tries(0) {
+ error_injection = (sync_env->cct->_conf->rgw_sync_meta_inject_err_probability > 0);
+ tn = sync_env->sync_tracer->add_node(_tn_parent, "entry", raw_key);
+}
+
+int RGWMetaSyncSingleEntryCR::operate() {
+ reenter(this) {
+#define NUM_TRANSIENT_ERROR_RETRIES 10
+
+ if (error_injection &&
+ rand() % 10000 < cct->_conf->rgw_sync_meta_inject_err_probability * 10000.0) {
+ ldpp_dout(sync_env->dpp, 0) << __FILE__ << ":" << __LINE__ << ": injecting meta sync error on key=" << raw_key << dendl;
+ return set_cr_error(-EIO);
+ }
+
+ if (op_status != MDLOG_STATUS_COMPLETE) {
+ tn->log(20, "skipping pending operation");
+ yield call(marker_tracker->finish(entry_marker));
+ if (retcode < 0) {
+ return set_cr_error(retcode);
+ }
+ return set_cr_done();
+ }
+ tn->set_flag(RGW_SNS_FLAG_ACTIVE);
+ for (tries = 0; tries < NUM_TRANSIENT_ERROR_RETRIES; tries++) {
+ yield {
+ pos = raw_key.find(':');
+ section = raw_key.substr(0, pos);
+ key = raw_key.substr(pos + 1);
+ tn->log(10, SSTR("fetching remote metadata entry" << (tries == 0 ? "" : " (retry)")));
+ call(new RGWReadRemoteMetadataCR(sync_env, section, key, &md_bl, tn));
+ }
+
+ sync_status = retcode;
+
+ if (sync_status == -ENOENT) {
+ /* FIXME: do we need to remove the entry from the local zone? */
+ break;
+ }
+
+ if ((sync_status == -EAGAIN || sync_status == -ECANCELED) && (tries < NUM_TRANSIENT_ERROR_RETRIES - 1)) {
+ ldpp_dout(sync_env->dpp, 20) << *this << ": failed to fetch remote metadata: " << section << ":" << key << ", will retry" << dendl;
+ continue;
+ }
+
+ if (sync_status < 0) {
+ tn->log(10, SSTR("failed to send read remote metadata entry: section=" << section << " key=" << key << " status=" << sync_status));
+ log_error() << "failed to send read remote metadata entry: section=" << section << " key=" << key << " status=" << sync_status << std::endl;
+ yield call(sync_env->error_logger->log_error_cr(sync_env->conn->get_remote_id(), section, key, -sync_status,
+ string("failed to read remote metadata entry: ") + cpp_strerror(-sync_status)));
+ return set_cr_error(sync_status);
+ }
+
+ break;
+ }
+
+ retcode = 0;
+ for (tries = 0; tries < NUM_TRANSIENT_ERROR_RETRIES; tries++) {
+ if (sync_status != -ENOENT) {
+ tn->log(10, SSTR("storing local metadata entry"));
+ yield call(new RGWMetaStoreEntryCR(sync_env, raw_key, md_bl));
+ } else {
+ tn->log(10, SSTR("removing local metadata entry"));
+ yield call(new RGWMetaRemoveEntryCR(sync_env, raw_key));
+ }
+ if ((retcode == -EAGAIN || retcode == -ECANCELED) && (tries < NUM_TRANSIENT_ERROR_RETRIES - 1)) {
+ ldpp_dout(sync_env->dpp, 20) << *this << ": failed to store metadata: " << section << ":" << key << ", got retcode=" << retcode << dendl;
+ continue;
+ }
+ break;
+ }
+
+ sync_status = retcode;
+
+ if (sync_status == 0 && marker_tracker) {
+ /* update marker */
+ yield call(marker_tracker->finish(entry_marker));
+ sync_status = retcode;
+ }
+ if (sync_status < 0) {
+ tn->log(10, SSTR("failed, status=" << sync_status));
+ return set_cr_error(sync_status);
+ }
+ tn->log(10, "success");
+ return set_cr_done();
+ }
+ return 0;
+}
+
+class RGWCloneMetaLogCoroutine : public RGWCoroutine {
+ RGWMetaSyncEnv *sync_env;
+ RGWMetadataLog *mdlog;
+
+ const std::string& period;
+ int shard_id;
+ string marker;
+ bool truncated = false;
+ string *new_marker;
+
+ int max_entries = CLONE_MAX_ENTRIES;
+
+ RGWRESTReadResource *http_op = nullptr;
+ boost::intrusive_ptr<RGWMetadataLogInfoCompletion> completion;
+
+ RGWMetadataLogInfo shard_info;
+ rgw_mdlog_shard_data data;
+
+public:
+ RGWCloneMetaLogCoroutine(RGWMetaSyncEnv *_sync_env, RGWMetadataLog* mdlog,
+ const std::string& period, int _id,
+ const string& _marker, string *_new_marker)
+ : RGWCoroutine(_sync_env->cct), sync_env(_sync_env), mdlog(mdlog),
+ period(period), shard_id(_id), marker(_marker), new_marker(_new_marker) {
+ if (new_marker) {
+ *new_marker = marker;
+ }
+ }
+ ~RGWCloneMetaLogCoroutine() override {
+ if (http_op) {
+ http_op->put();
+ }
+ if (completion) {
+ completion->cancel();
+ }
+ }
+
+ int operate() override;
+
+ int state_init();
+ int state_read_shard_status();
+ int state_read_shard_status_complete();
+ int state_send_rest_request();
+ int state_receive_rest_response();
+ int state_store_mdlog_entries();
+ int state_store_mdlog_entries_complete();
+};
+
+class RGWMetaSyncShardCR : public RGWCoroutine {
+ RGWMetaSyncEnv *sync_env;
+
+ const rgw_pool& pool;
+ const std::string& period; //< currently syncing period id
+ const epoch_t realm_epoch; //< realm_epoch of period
+ RGWMetadataLog* mdlog; //< log of syncing period
+ uint32_t shard_id;
+ rgw_meta_sync_marker& sync_marker;
+ boost::optional<rgw_meta_sync_marker> temp_marker; //< for pending updates
+ string marker;
+ string max_marker;
+ const std::string& period_marker; //< max marker stored in next period
+
+ RGWRadosGetOmapKeysCR::ResultPtr omapkeys;
+ std::set<std::string> entries;
+ std::set<std::string>::iterator iter;
+
+ string oid;
+
+ RGWMetaSyncShardMarkerTrack *marker_tracker = nullptr;
+
+ list<cls_log_entry> log_entries;
+ list<cls_log_entry>::iterator log_iter;
+ bool truncated = false;
+
+ string mdlog_marker;
+ string raw_key;
+ rgw_mdlog_entry mdlog_entry;
+
+ Mutex inc_lock;
+ Cond inc_cond;
+
+ boost::asio::coroutine incremental_cr;
+ boost::asio::coroutine full_cr;
+
+ boost::intrusive_ptr<RGWContinuousLeaseCR> lease_cr;
+ boost::intrusive_ptr<RGWCoroutinesStack> lease_stack;
+
+ bool lost_lock = false;
+
+ bool *reset_backoff;
+
+ // hold a reference to the cr stack while it's in the map
+ using StackRef = boost::intrusive_ptr<RGWCoroutinesStack>;
+ map<StackRef, string> stack_to_pos;
+ map<string, string> pos_to_prev;
+
+ bool can_adjust_marker = false;
+ bool done_with_period = false;
+
+ int total_entries = 0;
+
+ RGWSyncTraceNodeRef tn;
+public:
+ RGWMetaSyncShardCR(RGWMetaSyncEnv *_sync_env, const rgw_pool& _pool,
+ const std::string& period, epoch_t realm_epoch,
+ RGWMetadataLog* mdlog, uint32_t _shard_id,
+ rgw_meta_sync_marker& _marker,
+ const std::string& period_marker, bool *_reset_backoff,
+ RGWSyncTraceNodeRef& _tn)
+ : RGWCoroutine(_sync_env->cct), sync_env(_sync_env), pool(_pool),
+ period(period), realm_epoch(realm_epoch), mdlog(mdlog),
+ shard_id(_shard_id), sync_marker(_marker),
+ period_marker(period_marker), inc_lock("RGWMetaSyncShardCR::inc_lock"),
+ reset_backoff(_reset_backoff), tn(_tn) {
+ *reset_backoff = false;
+ }
+
+ ~RGWMetaSyncShardCR() override {
+ delete marker_tracker;
+ if (lease_cr) {
+ lease_cr->abort();
+ }
+ }
+
+ void set_marker_tracker(RGWMetaSyncShardMarkerTrack *mt) {
+ delete marker_tracker;
+ marker_tracker = mt;
+ }
+
+ int operate() override {
+ int r;
+ while (true) {
+ switch (sync_marker.state) {
+ case rgw_meta_sync_marker::FullSync:
+ r = full_sync();
+ if (r < 0) {
+ ldpp_dout(sync_env->dpp, 10) << "sync: full_sync: shard_id=" << shard_id << " r=" << r << dendl;
+ return set_cr_error(r);
+ }
+ return 0;
+ case rgw_meta_sync_marker::IncrementalSync:
+ r = incremental_sync();
+ if (r < 0) {
+ ldpp_dout(sync_env->dpp, 10) << "sync: incremental_sync: shard_id=" << shard_id << " r=" << r << dendl;
+ return set_cr_error(r);
+ }
+ return 0;
+ }
+ }
+ /* unreachable */
+ return 0;
+ }
+
+ void collect_children()
+ {
+ int child_ret;
+ RGWCoroutinesStack *child;
+ while (collect_next(&child_ret, &child)) {
+ auto iter = stack_to_pos.find(child);
+ if (iter == stack_to_pos.end()) {
+ /* some other stack that we don't care about */
+ continue;
+ }
+
+ string& pos = iter->second;
+
+ if (child_ret < 0) {
+ ldpp_dout(sync_env->dpp, 0) << *this << ": child operation stack=" << child << " entry=" << pos << " returned " << child_ret << dendl;
+ }
+
+ map<string, string>::iterator prev_iter = pos_to_prev.find(pos);
+ ceph_assert(prev_iter != pos_to_prev.end());
+
+ /*
+ * we should get -EAGAIN for transient errors, for which we want to retry, so we don't
+ * update the marker and abort. We'll get called again for these. Permanent errors will be
+ * handled by marking the entry at the error log shard, so that we retry on it separately
+ */
+ if (child_ret == -EAGAIN) {
+ can_adjust_marker = false;
+ }
+
+ if (pos_to_prev.size() == 1) {
+ if (can_adjust_marker) {
+ sync_marker.marker = pos;
+ }
+ pos_to_prev.erase(prev_iter);
+ } else {
+ ceph_assert(pos_to_prev.size() > 1);
+ pos_to_prev.erase(prev_iter);
+ prev_iter = pos_to_prev.begin();
+ if (can_adjust_marker) {
+ sync_marker.marker = prev_iter->second;
+ }
+ }
+
+ ldpp_dout(sync_env->dpp, 4) << *this << ": adjusting marker pos=" << sync_marker.marker << dendl;
+ stack_to_pos.erase(iter);
+ }
+ }
+
+ int full_sync() {
+#define OMAP_GET_MAX_ENTRIES 100
+ int max_entries = OMAP_GET_MAX_ENTRIES;
+ reenter(&full_cr) {
+ set_status("full_sync");
+ tn->log(10, "start full sync");
+ oid = full_sync_index_shard_oid(shard_id);
+ can_adjust_marker = true;
+ /* grab lock */
+ yield {
+ uint32_t lock_duration = cct->_conf->rgw_sync_lease_period;
+ string lock_name = "sync_lock";
+ RGWRados *store = sync_env->store;
+ lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, store,
+ rgw_raw_obj(pool, sync_env->shard_obj_name(shard_id)),
+ lock_name, lock_duration, this));
+ lease_stack.reset(spawn(lease_cr.get(), false));
+ lost_lock = false;
+ }
+ while (!lease_cr->is_locked()) {
+ if (lease_cr->is_done()) {
+ drain_all();
+ tn->log(5, "failed to take lease");
+ return lease_cr->get_ret_status();
+ }
+ set_sleeping(true);
+ yield;
+ }
+ tn->log(10, "took lease");
+
+ /* lock succeeded, a retry now should avoid previous backoff status */
+ *reset_backoff = true;
+
+ /* prepare marker tracker */
+ set_marker_tracker(new RGWMetaSyncShardMarkerTrack(sync_env,
+ sync_env->shard_obj_name(shard_id),
+ sync_marker, tn));
+
+ marker = sync_marker.marker;
+
+ total_entries = sync_marker.pos;
+
+ /* sync! */
+ do {
+ if (!lease_cr->is_locked()) {
+ tn->log(10, "lost lease");
+ lost_lock = true;
+ break;
+ }
+ omapkeys = std::make_shared<RGWRadosGetOmapKeysCR::Result>();
+ yield call(new RGWRadosGetOmapKeysCR(sync_env->store, rgw_raw_obj(pool, oid),
+ marker, max_entries, omapkeys));
+ if (retcode < 0) {
+ ldpp_dout(sync_env->dpp, 0) << "ERROR: " << __func__ << "(): RGWRadosGetOmapKeysCR() returned ret=" << retcode << dendl;
+ tn->log(0, SSTR("ERROR: failed to list omap keys, status=" << retcode));
+ yield lease_cr->go_down();
+ drain_all();
+ return retcode;
+ }
+ entries = std::move(omapkeys->entries);
+ tn->log(20, SSTR("retrieved " << entries.size() << " entries to sync"));
+ if (entries.size() > 0) {
+ tn->set_flag(RGW_SNS_FLAG_ACTIVE); /* actually have entries to sync */
+ }
+ iter = entries.begin();
+ for (; iter != entries.end(); ++iter) {
+ marker = *iter;
+ tn->log(20, SSTR("full sync: " << marker));
+ total_entries++;
+ if (!marker_tracker->start(marker, total_entries, real_time())) {
+ tn->log(0, SSTR("ERROR: cannot start syncing " << marker << ". Duplicate entry?"));
+ } else {
+ // fetch remote and write locally
+ yield {
+ RGWCoroutinesStack *stack = spawn(new RGWMetaSyncSingleEntryCR(sync_env, marker, marker, MDLOG_STATUS_COMPLETE, marker_tracker, tn), false);
+ // stack_to_pos holds a reference to the stack
+ stack_to_pos[stack] = marker;
+ pos_to_prev[marker] = marker;
+ }
+ }
+ }
+ collect_children();
+ } while (omapkeys->more && can_adjust_marker);
+
+ tn->unset_flag(RGW_SNS_FLAG_ACTIVE); /* actually have entries to sync */
+
+ while (num_spawned() > 1) {
+ yield wait_for_child();
+ collect_children();
+ }
+
+ if (!lost_lock) {
+ /* update marker to reflect we're done with full sync */
+ if (can_adjust_marker) {
+ // apply updates to a temporary marker, or operate() will send us
+ // to incremental_sync() after we yield
+ temp_marker = sync_marker;
+ temp_marker->state = rgw_meta_sync_marker::IncrementalSync;
+ temp_marker->marker = std::move(temp_marker->next_step_marker);
+ temp_marker->next_step_marker.clear();
+ temp_marker->realm_epoch = realm_epoch;
+ ldpp_dout(sync_env->dpp, 4) << *this << ": saving marker pos=" << temp_marker->marker << " realm_epoch=" << realm_epoch << dendl;
+
+ using WriteMarkerCR = RGWSimpleRadosWriteCR<rgw_meta_sync_marker>;
+ yield call(new WriteMarkerCR(sync_env->async_rados, sync_env->store->svc.sysobj,
+ rgw_raw_obj(pool, sync_env->shard_obj_name(shard_id)),
+ *temp_marker));
+ }
+
+ if (retcode < 0) {
+ ldpp_dout(sync_env->dpp, 0) << "ERROR: failed to set sync marker: retcode=" << retcode << dendl;
+ yield lease_cr->go_down();
+ drain_all();
+ return retcode;
+ }
+ }
+
+ /*
+ * if we reached here, it means that lost_lock is true, otherwise the state
+ * change in the previous block will prevent us from reaching here
+ */
+
+ yield lease_cr->go_down();
+
+ lease_cr.reset();
+
+ drain_all();
+
+ if (!can_adjust_marker) {
+ return -EAGAIN;
+ }
+
+ if (lost_lock) {
+ return -EBUSY;
+ }
+
+ tn->log(10, "full sync complete");
+
+ // apply the sync marker update
+ ceph_assert(temp_marker);
+ sync_marker = std::move(*temp_marker);
+ temp_marker = boost::none;
+ // must not yield after this point!
+ }
+ return 0;
+ }
+
+
+ int incremental_sync() {
+ reenter(&incremental_cr) {
+ set_status("incremental_sync");
+ tn->log(10, "start incremental sync");
+ can_adjust_marker = true;
+ /* grab lock */
+ if (!lease_cr) { /* could have had a lease_cr lock from previous state */
+ yield {
+ uint32_t lock_duration = cct->_conf->rgw_sync_lease_period;
+ string lock_name = "sync_lock";
+ RGWRados *store = sync_env->store;
+ lease_cr.reset( new RGWContinuousLeaseCR(sync_env->async_rados, store,
+ rgw_raw_obj(pool, sync_env->shard_obj_name(shard_id)),
+ lock_name, lock_duration, this));
+ lease_stack.reset(spawn(lease_cr.get(), false));
+ lost_lock = false;
+ }
+ while (!lease_cr->is_locked()) {
+ if (lease_cr->is_done()) {
+ drain_all();
+ tn->log(10, "failed to take lease");
+ return lease_cr->get_ret_status();
+ }
+ set_sleeping(true);
+ yield;
+ }
+ }
+ tn->log(10, "took lease");
+ // if the period has advanced, we can't use the existing marker
+ if (sync_marker.realm_epoch < realm_epoch) {
+ ldpp_dout(sync_env->dpp, 4) << "clearing marker=" << sync_marker.marker
+ << " from old realm_epoch=" << sync_marker.realm_epoch
+ << " (now " << realm_epoch << ')' << dendl;
+ sync_marker.realm_epoch = realm_epoch;
+ sync_marker.marker.clear();
+ }
+ mdlog_marker = sync_marker.marker;
+ set_marker_tracker(new RGWMetaSyncShardMarkerTrack(sync_env,
+ sync_env->shard_obj_name(shard_id),
+ sync_marker, tn));
+
+ /*
+ * mdlog_marker: the remote sync marker positiion
+ * sync_marker: the local sync marker position
+ * max_marker: the max mdlog position that we fetched
+ * marker: the current position we try to sync
+ * period_marker: the last marker before the next period begins (optional)
+ */
+ marker = max_marker = sync_marker.marker;
+ /* inc sync */
+ do {
+ if (!lease_cr->is_locked()) {
+ lost_lock = true;
+ tn->log(10, "lost lease");
+ break;
+ }
+#define INCREMENTAL_MAX_ENTRIES 100
+ ldpp_dout(sync_env->dpp, 20) << __func__ << ":" << __LINE__ << ": shard_id=" << shard_id << " mdlog_marker=" << mdlog_marker << " sync_marker.marker=" << sync_marker.marker << " period_marker=" << period_marker << dendl;
+ if (!period_marker.empty() && period_marker <= mdlog_marker) {
+ tn->log(10, SSTR("finished syncing current period: mdlog_marker=" << mdlog_marker << " sync_marker=" << sync_marker.marker << " period_marker=" << period_marker));
+ done_with_period = true;
+ break;
+ }
+ if (mdlog_marker <= max_marker) {
+ /* we're at the tip, try to bring more entries */
+ ldpp_dout(sync_env->dpp, 20) << __func__ << ":" << __LINE__ << ": shard_id=" << shard_id << " syncing mdlog for shard_id=" << shard_id << dendl;
+ yield call(new RGWCloneMetaLogCoroutine(sync_env, mdlog,
+ period, shard_id,
+ mdlog_marker, &mdlog_marker));
+ }
+ if (retcode < 0) {
+ tn->log(10, SSTR(*this << ": failed to fetch more log entries, retcode=" << retcode));
+ yield lease_cr->go_down();
+ drain_all();
+ *reset_backoff = false; // back off and try again later
+ return retcode;
+ }
+ *reset_backoff = true; /* if we got to this point, all systems function */
+ if (mdlog_marker > max_marker) {
+ tn->set_flag(RGW_SNS_FLAG_ACTIVE); /* actually have entries to sync */
+ tn->log(20, SSTR("mdlog_marker=" << mdlog_marker << " sync_marker=" << sync_marker.marker));
+ marker = max_marker;
+ yield call(new RGWReadMDLogEntriesCR(sync_env, mdlog, shard_id,
+ &max_marker, INCREMENTAL_MAX_ENTRIES,
+ &log_entries, &truncated));
+ if (retcode < 0) {
+ tn->log(10, SSTR("failed to list mdlog entries, retcode=" << retcode));
+ yield lease_cr->go_down();
+ drain_all();
+ *reset_backoff = false; // back off and try again later
+ return retcode;
+ }
+ for (log_iter = log_entries.begin(); log_iter != log_entries.end() && !done_with_period; ++log_iter) {
+ if (!period_marker.empty() && period_marker <= log_iter->id) {
+ done_with_period = true;
+ if (period_marker < log_iter->id) {
+ tn->log(10, SSTR("found key=" << log_iter->id
+ << " past period_marker=" << period_marker));
+ break;
+ }
+ ldpp_dout(sync_env->dpp, 10) << "found key at period_marker=" << period_marker << dendl;
+ // sync this entry, then return control to RGWMetaSyncCR
+ }
+ if (!mdlog_entry.convert_from(*log_iter)) {
+ tn->log(0, SSTR("ERROR: failed to convert mdlog entry, shard_id=" << shard_id << " log_entry: " << log_iter->id << ":" << log_iter->section << ":" << log_iter->name << ":" << log_iter->timestamp << " ... skipping entry"));
+ continue;
+ }
+ tn->log(20, SSTR("log_entry: " << log_iter->id << ":" << log_iter->section << ":" << log_iter->name << ":" << log_iter->timestamp));
+ if (!marker_tracker->start(log_iter->id, 0, log_iter->timestamp.to_real_time())) {
+ ldpp_dout(sync_env->dpp, 0) << "ERROR: cannot start syncing " << log_iter->id << ". Duplicate entry?" << dendl;
+ } else {
+ raw_key = log_iter->section + ":" + log_iter->name;
+ yield {
+ RGWCoroutinesStack *stack = spawn(new RGWMetaSyncSingleEntryCR(sync_env, raw_key, log_iter->id, mdlog_entry.log_data.status, marker_tracker, tn), false);
+ ceph_assert(stack);
+ // stack_to_pos holds a reference to the stack
+ stack_to_pos[stack] = log_iter->id;
+ pos_to_prev[log_iter->id] = marker;
+ }
+ }
+ marker = log_iter->id;
+ }
+ }
+ collect_children();
+ ldpp_dout(sync_env->dpp, 20) << __func__ << ":" << __LINE__ << ": shard_id=" << shard_id << " mdlog_marker=" << mdlog_marker << " max_marker=" << max_marker << " sync_marker.marker=" << sync_marker.marker << " period_marker=" << period_marker << dendl;
+ if (done_with_period) {
+ // return control to RGWMetaSyncCR and advance to the next period
+ tn->log(10, SSTR(*this << ": done with period"));
+ break;
+ }
+ if (mdlog_marker == max_marker && can_adjust_marker) {
+ tn->unset_flag(RGW_SNS_FLAG_ACTIVE);
+#define INCREMENTAL_INTERVAL 20
+ yield wait(utime_t(INCREMENTAL_INTERVAL, 0));
+ }
+ } while (can_adjust_marker);
+
+ tn->unset_flag(RGW_SNS_FLAG_ACTIVE);
+
+ while (num_spawned() > 1) {
+ yield wait_for_child();
+ collect_children();
+ }
+
+ yield lease_cr->go_down();
+
+ drain_all();
+
+ if (lost_lock) {
+ return -EBUSY;
+ }
+
+ if (!can_adjust_marker) {
+ return -EAGAIN;
+ }
+
+ return set_cr_done();
+ }
+ /* TODO */
+ return 0;
+ }
+};
+
+class RGWMetaSyncShardControlCR : public RGWBackoffControlCR
+{
+ RGWMetaSyncEnv *sync_env;
+
+ const rgw_pool& pool;
+ const std::string& period;
+ epoch_t realm_epoch;
+ RGWMetadataLog* mdlog;
+ uint32_t shard_id;
+ rgw_meta_sync_marker sync_marker;
+ const std::string period_marker;
+
+ RGWSyncTraceNodeRef tn;
+
+ static constexpr bool exit_on_error = false; // retry on all errors
+public:
+ RGWMetaSyncShardControlCR(RGWMetaSyncEnv *_sync_env, const rgw_pool& _pool,
+ const std::string& period, epoch_t realm_epoch,
+ RGWMetadataLog* mdlog, uint32_t _shard_id,
+ const rgw_meta_sync_marker& _marker,
+ std::string&& period_marker,
+ RGWSyncTraceNodeRef& _tn_parent)
+ : RGWBackoffControlCR(_sync_env->cct, exit_on_error), sync_env(_sync_env),
+ pool(_pool), period(period), realm_epoch(realm_epoch), mdlog(mdlog),
+ shard_id(_shard_id), sync_marker(_marker),
+ period_marker(std::move(period_marker)) {
+ tn = sync_env->sync_tracer->add_node(_tn_parent, "shard",
+ std::to_string(shard_id));
+ }
+
+ RGWCoroutine *alloc_cr() override {
+ return new RGWMetaSyncShardCR(sync_env, pool, period, realm_epoch, mdlog,
+ shard_id, sync_marker, period_marker, backoff_ptr(), tn);
+ }
+
+ RGWCoroutine *alloc_finisher_cr() override {
+ RGWRados *store = sync_env->store;
+ return new RGWSimpleRadosReadCR<rgw_meta_sync_marker>(sync_env->async_rados, store->svc.sysobj,
+ rgw_raw_obj(pool, sync_env->shard_obj_name(shard_id)),
+ &sync_marker);
+ }
+};
+
+class RGWMetaSyncCR : public RGWCoroutine {
+ RGWMetaSyncEnv *sync_env;
+ const rgw_pool& pool;
+ RGWPeriodHistory::Cursor cursor; //< sync position in period history
+ RGWPeriodHistory::Cursor next; //< next period in history
+ rgw_meta_sync_status sync_status;
+ RGWSyncTraceNodeRef tn;
+
+ std::mutex mutex; //< protect access to shard_crs
+
+ // TODO: it should be enough to hold a reference on the stack only, as calling
+ // RGWCoroutinesStack::wakeup() doesn't refer to the RGWCoroutine if it has
+ // already completed
+ using ControlCRRef = boost::intrusive_ptr<RGWMetaSyncShardControlCR>;
+ using StackRef = boost::intrusive_ptr<RGWCoroutinesStack>;
+ using RefPair = std::pair<ControlCRRef, StackRef>;
+ map<int, RefPair> shard_crs;
+ int ret{0};
+
+public:
+ RGWMetaSyncCR(RGWMetaSyncEnv *_sync_env, const RGWPeriodHistory::Cursor &cursor,
+ const rgw_meta_sync_status& _sync_status, RGWSyncTraceNodeRef& _tn)
+ : RGWCoroutine(_sync_env->cct), sync_env(_sync_env),
+ pool(sync_env->store->svc.zone->get_zone_params().log_pool),
+ cursor(cursor), sync_status(_sync_status), tn(_tn) {}
+
+ ~RGWMetaSyncCR() {
+ }
+
+ int operate() override {
+ reenter(this) {
+ // loop through one period at a time
+ tn->log(1, "start");
+ for (;;) {
+ if (cursor == sync_env->store->period_history->get_current()) {
+ next = RGWPeriodHistory::Cursor{};
+ if (cursor) {
+ ldpp_dout(sync_env->dpp, 10) << "RGWMetaSyncCR on current period="
+ << cursor.get_period().get_id() << dendl;
+ } else {
+ ldpp_dout(sync_env->dpp, 10) << "RGWMetaSyncCR with no period" << dendl;
+ }
+ } else {
+ next = cursor;
+ next.next();
+ ldpp_dout(sync_env->dpp, 10) << "RGWMetaSyncCR on period="
+ << cursor.get_period().get_id() << ", next="
+ << next.get_period().get_id() << dendl;
+ }
+
+ yield {
+ // get the mdlog for the current period (may be empty)
+ auto& period_id = sync_status.sync_info.period;
+ auto realm_epoch = sync_status.sync_info.realm_epoch;
+ auto mdlog = sync_env->store->meta_mgr->get_log(period_id);
+
+ tn->log(1, SSTR("realm epoch=" << realm_epoch << " period id=" << period_id));
+
+ // prevent wakeup() from accessing shard_crs while we're spawning them
+ std::lock_guard<std::mutex> lock(mutex);
+
+ // sync this period on each shard
+ for (const auto& m : sync_status.sync_markers) {
+ uint32_t shard_id = m.first;
+ auto& marker = m.second;
+
+ std::string period_marker;
+ if (next) {
+ // read the maximum marker from the next period's sync status
+ period_marker = next.get_period().get_sync_status()[shard_id];
+ if (period_marker.empty()) {
+ // no metadata changes have occurred on this shard, skip it
+ ldpp_dout(sync_env->dpp, 10) << "RGWMetaSyncCR: skipping shard " << shard_id
+ << " with empty period marker" << dendl;
+ continue;
+ }
+ }
+
+ using ShardCR = RGWMetaSyncShardControlCR;
+ auto cr = new ShardCR(sync_env, pool, period_id, realm_epoch,
+ mdlog, shard_id, marker,
+ std::move(period_marker), tn);
+ auto stack = spawn(cr, false);
+ shard_crs[shard_id] = RefPair{cr, stack};
+ }
+ }
+ // wait for each shard to complete
+ while (ret == 0 && num_spawned() > 0) {
+ yield wait_for_child();
+ collect(&ret, nullptr);
+ }
+ drain_all();
+ {
+ // drop shard cr refs under lock
+ std::lock_guard<std::mutex> lock(mutex);
+ shard_crs.clear();
+ }
+ if (ret < 0) {
+ return set_cr_error(ret);
+ }
+ // advance to the next period
+ ceph_assert(next);
+ cursor = next;
+
+ // write the updated sync info
+ sync_status.sync_info.period = cursor.get_period().get_id();
+ sync_status.sync_info.realm_epoch = cursor.get_epoch();
+ yield call(new RGWSimpleRadosWriteCR<rgw_meta_sync_info>(sync_env->async_rados,
+ sync_env->store->svc.sysobj,
+ rgw_raw_obj(pool, sync_env->status_oid()),
+ sync_status.sync_info));
+ }
+ }
+ return 0;
+ }
+
+ void wakeup(int shard_id) {
+ std::lock_guard<std::mutex> lock(mutex);
+ auto iter = shard_crs.find(shard_id);
+ if (iter == shard_crs.end()) {
+ return;
+ }
+ iter->second.first->wakeup();
+ }
+};
+
+void RGWRemoteMetaLog::init_sync_env(RGWMetaSyncEnv *env) {
+ env->dpp = dpp;
+ env->cct = store->ctx();
+ env->store = store;
+ env->conn = conn;
+ env->async_rados = async_rados;
+ env->http_manager = &http_manager;
+ env->error_logger = error_logger;
+ env->sync_tracer = store->get_sync_tracer();
+}
+
+int RGWRemoteMetaLog::read_sync_status(rgw_meta_sync_status *sync_status)
+{
+ if (store->svc.zone->is_meta_master()) {
+ return 0;
+ }
+ // cannot run concurrently with run_sync(), so run in a separate manager
+ RGWCoroutinesManager crs(store->ctx(), store->get_cr_registry());
+ RGWHTTPManager http_manager(store->ctx(), crs.get_completion_mgr());
+ int ret = http_manager.start();
+ if (ret < 0) {
+ ldpp_dout(dpp, 0) << "failed in http_manager.start() ret=" << ret << dendl;
+ return ret;
+ }
+ RGWMetaSyncEnv sync_env_local = sync_env;
+ sync_env_local.http_manager = &http_manager;
+ tn->log(20, "read sync status");
+ ret = crs.run(new RGWReadSyncStatusCoroutine(&sync_env_local, sync_status));
+ http_manager.stop();
+ return ret;
+}
+
+int RGWRemoteMetaLog::init_sync_status()
+{
+ if (store->svc.zone->is_meta_master()) {
+ return 0;
+ }
+
+ rgw_mdlog_info mdlog_info;
+ int r = read_log_info(&mdlog_info);
+ if (r < 0) {
+ lderr(store->ctx()) << "ERROR: fail to fetch master log info (r=" << r << ")" << dendl;
+ return r;
+ }
+
+ rgw_meta_sync_info sync_info;
+ sync_info.num_shards = mdlog_info.num_shards;
+ auto cursor = store->period_history->get_current();
+ if (cursor) {
+ sync_info.period = cursor.get_period().get_id();
+ sync_info.realm_epoch = cursor.get_epoch();
+ }
+
+ return run(new RGWInitSyncStatusCoroutine(&sync_env, sync_info));
+}
+
+int RGWRemoteMetaLog::store_sync_info(const rgw_meta_sync_info& sync_info)
+{
+ tn->log(20, "store sync info");
+ return run(new RGWSimpleRadosWriteCR<rgw_meta_sync_info>(async_rados, store->svc.sysobj,
+ rgw_raw_obj(store->svc.zone->get_zone_params().log_pool, sync_env.status_oid()),
+ sync_info));
+}
+
+// return a cursor to the period at our sync position
+static RGWPeriodHistory::Cursor get_period_at(RGWRados* store,
+ const rgw_meta_sync_info& info)
+{
+ if (info.period.empty()) {
+ // return an empty cursor with error=0
+ return RGWPeriodHistory::Cursor{};
+ }
+
+ // look for an existing period in our history
+ auto cursor = store->period_history->lookup(info.realm_epoch);
+ if (cursor) {
+ // verify that the period ids match
+ auto& existing = cursor.get_period().get_id();
+ if (existing != info.period) {
+ lderr(store->ctx()) << "ERROR: sync status period=" << info.period
+ << " does not match period=" << existing
+ << " in history at realm epoch=" << info.realm_epoch << dendl;
+ return RGWPeriodHistory::Cursor{-EEXIST};
+ }
+ return cursor;
+ }
+
+ // read the period from rados or pull it from the master
+ RGWPeriod period;
+ int r = store->period_puller->pull(info.period, period);
+ if (r < 0) {
+ lderr(store->ctx()) << "ERROR: failed to read period id "
+ << info.period << ": " << cpp_strerror(r) << dendl;
+ return RGWPeriodHistory::Cursor{r};
+ }
+ // attach the period to our history
+ cursor = store->period_history->attach(std::move(period));
+ if (!cursor) {
+ r = cursor.get_error();
+ lderr(store->ctx()) << "ERROR: failed to read period history back to "
+ << info.period << ": " << cpp_strerror(r) << dendl;
+ }
+ return cursor;
+}
+
+int RGWRemoteMetaLog::run_sync()
+{
+ if (store->svc.zone->is_meta_master()) {
+ return 0;
+ }
+
+ int r = 0;
+
+ // get shard count and oldest log period from master
+ rgw_mdlog_info mdlog_info;
+ for (;;) {
+ if (going_down) {
+ ldpp_dout(dpp, 1) << __func__ << "(): going down" << dendl;
+ return 0;
+ }
+ r = read_log_info(&mdlog_info);
+ if (r == -EIO || r == -ENOENT) {
+ // keep retrying if master isn't alive or hasn't initialized the log
+ ldpp_dout(dpp, 10) << __func__ << "(): waiting for master.." << dendl;
+ backoff.backoff_sleep();
+ continue;
+ }
+ backoff.reset();
+ if (r < 0) {
+ lderr(store->ctx()) << "ERROR: fail to fetch master log info (r=" << r << ")" << dendl;
+ return r;
+ }
+ break;
+ }
+
+ rgw_meta_sync_status sync_status;
+ do {
+ if (going_down) {
+ ldpp_dout(dpp, 1) << __func__ << "(): going down" << dendl;
+ return 0;
+ }
+ r = run(new RGWReadSyncStatusCoroutine(&sync_env, &sync_status));
+ if (r < 0 && r != -ENOENT) {
+ ldpp_dout(dpp, 0) << "ERROR: failed to fetch sync status r=" << r << dendl;
+ return r;
+ }
+
+ if (!mdlog_info.period.empty()) {
+ // restart sync if the remote has a period, but:
+ // a) our status does not, or
+ // b) our sync period comes before the remote's oldest log period
+ if (sync_status.sync_info.period.empty() ||
+ sync_status.sync_info.realm_epoch < mdlog_info.realm_epoch) {
+ sync_status.sync_info.state = rgw_meta_sync_info::StateInit;
+ string reason;
+ if (sync_status.sync_info.period.empty()) {
+ reason = "period is empty";
+ } else {
+ reason = SSTR("sync_info realm epoch is behind: " << sync_status.sync_info.realm_epoch << " < " << mdlog_info.realm_epoch);
+ }
+ tn->log(1, "initialize sync (reason: " + reason + ")");
+ ldpp_dout(dpp, 1) << "epoch=" << sync_status.sync_info.realm_epoch
+ << " in sync status comes before remote's oldest mdlog epoch="
+ << mdlog_info.realm_epoch << ", restarting sync" << dendl;
+ }
+ }
+
+ if (sync_status.sync_info.state == rgw_meta_sync_info::StateInit) {
+ ldpp_dout(dpp, 20) << __func__ << "(): init" << dendl;
+ sync_status.sync_info.num_shards = mdlog_info.num_shards;
+ auto cursor = store->period_history->get_current();
+ if (cursor) {
+ // run full sync, then start incremental from the current period/epoch
+ sync_status.sync_info.period = cursor.get_period().get_id();
+ sync_status.sync_info.realm_epoch = cursor.get_epoch();
+ }
+ r = run(new RGWInitSyncStatusCoroutine(&sync_env, sync_status.sync_info));
+ if (r == -EBUSY) {
+ backoff.backoff_sleep();
+ continue;
+ }
+ backoff.reset();
+ if (r < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: failed to init sync status r=" << r << dendl;
+ return r;
+ }
+ }
+ } while (sync_status.sync_info.state == rgw_meta_sync_info::StateInit);
+
+ auto num_shards = sync_status.sync_info.num_shards;
+ if (num_shards != mdlog_info.num_shards) {
+ lderr(store->ctx()) << "ERROR: can't sync, mismatch between num shards, master num_shards=" << mdlog_info.num_shards << " local num_shards=" << num_shards << dendl;
+ return -EINVAL;
+ }
+
+ RGWPeriodHistory::Cursor cursor;
+ do {
+ r = run(new RGWReadSyncStatusCoroutine(&sync_env, &sync_status));
+ if (r < 0 && r != -ENOENT) {
+ tn->log(0, SSTR("ERROR: failed to fetch sync status r=" << r));
+ return r;
+ }
+
+ switch ((rgw_meta_sync_info::SyncState)sync_status.sync_info.state) {
+ case rgw_meta_sync_info::StateBuildingFullSyncMaps:
+ tn->log(20, "building full sync maps");
+ r = run(new RGWFetchAllMetaCR(&sync_env, num_shards, sync_status.sync_markers, tn));
+ if (r == -EBUSY || r == -EAGAIN) {
+ backoff.backoff_sleep();
+ continue;
+ }
+ backoff.reset();
+ if (r < 0) {
+ tn->log(0, SSTR("ERROR: failed to fetch all metadata keys (r=" << r << ")"));
+ return r;
+ }
+
+ sync_status.sync_info.state = rgw_meta_sync_info::StateSync;
+ r = store_sync_info(sync_status.sync_info);
+ if (r < 0) {
+ tn->log(0, SSTR("ERROR: failed to update sync status (r=" << r << ")"));
+ return r;
+ }
+ /* fall through */
+ case rgw_meta_sync_info::StateSync:
+ tn->log(20, "sync");
+ // find our position in the period history (if any)
+ cursor = get_period_at(store, sync_status.sync_info);
+ r = cursor.get_error();
+ if (r < 0) {
+ return r;
+ }
+ meta_sync_cr = new RGWMetaSyncCR(&sync_env, cursor, sync_status, tn);
+ r = run(meta_sync_cr);
+ if (r < 0) {
+ tn->log(0, "ERROR: failed to fetch all metadata keys");
+ return r;
+ }
+ break;
+ default:
+ tn->log(0, "ERROR: bad sync state!");
+ return -EIO;
+ }
+ } while (!going_down);
+
+ return 0;
+}
+
+void RGWRemoteMetaLog::wakeup(int shard_id)
+{
+ if (!meta_sync_cr) {
+ return;
+ }
+ meta_sync_cr->wakeup(shard_id);
+}
+
+int RGWCloneMetaLogCoroutine::operate()
+{
+ reenter(this) {
+ do {
+ yield {
+ ldpp_dout(sync_env->dpp, 20) << __func__ << ": shard_id=" << shard_id << ": init request" << dendl;
+ return state_init();
+ }
+ yield {
+ ldpp_dout(sync_env->dpp, 20) << __func__ << ": shard_id=" << shard_id << ": reading shard status" << dendl;
+ return state_read_shard_status();
+ }
+ yield {
+ ldpp_dout(sync_env->dpp, 20) << __func__ << ": shard_id=" << shard_id << ": reading shard status complete" << dendl;
+ return state_read_shard_status_complete();
+ }
+ yield {
+ ldpp_dout(sync_env->dpp, 20) << __func__ << ": shard_id=" << shard_id << ": sending rest request" << dendl;
+ return state_send_rest_request();
+ }
+ yield {
+ ldpp_dout(sync_env->dpp, 20) << __func__ << ": shard_id=" << shard_id << ": receiving rest response" << dendl;
+ return state_receive_rest_response();
+ }
+ yield {
+ ldpp_dout(sync_env->dpp, 20) << __func__ << ": shard_id=" << shard_id << ": storing mdlog entries" << dendl;
+ return state_store_mdlog_entries();
+ }
+ } while (truncated);
+ yield {
+ ldpp_dout(sync_env->dpp, 20) << __func__ << ": shard_id=" << shard_id << ": storing mdlog entries complete" << dendl;
+ return state_store_mdlog_entries_complete();
+ }
+ }
+
+ return 0;
+}
+
+int RGWCloneMetaLogCoroutine::state_init()
+{
+ data = rgw_mdlog_shard_data();
+
+ return 0;
+}
+
+int RGWCloneMetaLogCoroutine::state_read_shard_status()
+{
+ const bool add_ref = false; // default constructs with refs=1
+
+ completion.reset(new RGWMetadataLogInfoCompletion(
+ [this](int ret, const cls_log_header& header) {
+ if (ret < 0) {
+ if (ret != -ENOENT) {
+ ldpp_dout(sync_env->dpp, 1) << "ERROR: failed to read mdlog info with "
+ << cpp_strerror(ret) << dendl;
+ }
+ } else {
+ shard_info.marker = header.max_marker;
+ shard_info.last_update = header.max_time.to_real_time();
+ }
+ // wake up parent stack
+ io_complete();
+ }), add_ref);
+
+ int ret = mdlog->get_info_async(shard_id, completion.get());
+ if (ret < 0) {
+ ldpp_dout(sync_env->dpp, 0) << "ERROR: mdlog->get_info_async() returned ret=" << ret << dendl;
+ return set_cr_error(ret);
+ }
+
+ return io_block(0);
+}
+
+int RGWCloneMetaLogCoroutine::state_read_shard_status_complete()
+{
+ completion.reset();
+
+ ldpp_dout(sync_env->dpp, 20) << "shard_id=" << shard_id << " marker=" << shard_info.marker << " last_update=" << shard_info.last_update << dendl;
+
+ marker = shard_info.marker;
+
+ return 0;
+}
+
+int RGWCloneMetaLogCoroutine::state_send_rest_request()
+{
+ RGWRESTConn *conn = sync_env->conn;
+
+ char buf[32];
+ snprintf(buf, sizeof(buf), "%d", shard_id);
+
+ char max_entries_buf[32];
+ snprintf(max_entries_buf, sizeof(max_entries_buf), "%d", max_entries);
+
+ const char *marker_key = (marker.empty() ? "" : "marker");
+
+ rgw_http_param_pair pairs[] = { { "type", "metadata" },
+ { "id", buf },
+ { "period", period.c_str() },
+ { "max-entries", max_entries_buf },
+ { marker_key, marker.c_str() },
+ { NULL, NULL } };
+
+ http_op = new RGWRESTReadResource(conn, "/admin/log", pairs, NULL, sync_env->http_manager);
+
+ init_new_io(http_op);
+
+ int ret = http_op->aio_read();
+ if (ret < 0) {
+ ldpp_dout(sync_env->dpp, 0) << "ERROR: failed to fetch mdlog data" << dendl;
+ log_error() << "failed to send http operation: " << http_op->to_str() << " ret=" << ret << std::endl;
+ http_op->put();
+ http_op = NULL;
+ return set_cr_error(ret);
+ }
+
+ return io_block(0);
+}
+
+int RGWCloneMetaLogCoroutine::state_receive_rest_response()
+{
+ int ret = http_op->wait(&data);
+ if (ret < 0) {
+ error_stream << "http operation failed: " << http_op->to_str() << " status=" << http_op->get_http_status() << std::endl;
+ ldpp_dout(sync_env->dpp, 5) << "failed to wait for op, ret=" << ret << dendl;
+ http_op->put();
+ http_op = NULL;
+ return set_cr_error(ret);
+ }
+ http_op->put();
+ http_op = NULL;
+
+ ldpp_dout(sync_env->dpp, 20) << "remote mdlog, shard_id=" << shard_id << " num of shard entries: " << data.entries.size() << dendl;
+
+ truncated = ((int)data.entries.size() == max_entries);
+
+ if (data.entries.empty()) {
+ if (new_marker) {
+ *new_marker = marker;
+ }
+ return set_cr_done();
+ }
+
+ if (new_marker) {
+ *new_marker = data.entries.back().id;
+ }
+
+ return 0;
+}
+
+
+int RGWCloneMetaLogCoroutine::state_store_mdlog_entries()
+{
+ list<cls_log_entry> dest_entries;
+
+ vector<rgw_mdlog_entry>::iterator iter;
+ for (iter = data.entries.begin(); iter != data.entries.end(); ++iter) {
+ rgw_mdlog_entry& entry = *iter;
+ ldpp_dout(sync_env->dpp, 20) << "entry: name=" << entry.name << dendl;
+
+ cls_log_entry dest_entry;
+ dest_entry.id = entry.id;
+ dest_entry.section = entry.section;
+ dest_entry.name = entry.name;
+ dest_entry.timestamp = utime_t(entry.timestamp);
+
+ encode(entry.log_data, dest_entry.data);
+
+ dest_entries.push_back(dest_entry);
+
+ marker = entry.id;
+ }
+
+ RGWAioCompletionNotifier *cn = stack->create_completion_notifier();
+
+ int ret = mdlog->store_entries_in_shard(dest_entries, shard_id, cn->completion());
+ if (ret < 0) {
+ cn->put();
+ ldpp_dout(sync_env->dpp, 10) << "failed to store md log entries shard_id=" << shard_id << " ret=" << ret << dendl;
+ return set_cr_error(ret);
+ }
+ return io_block(0);
+}
+
+int RGWCloneMetaLogCoroutine::state_store_mdlog_entries_complete()
+{
+ return set_cr_done();
+}
+
+
+// TODO: move into rgw_sync_trim.cc
+#undef dout_prefix
+#define dout_prefix (*_dout << "meta trim: ")
+
+/// purge all log shards for the given mdlog
+class PurgeLogShardsCR : public RGWShardCollectCR {
+ RGWRados *const store;
+ const RGWMetadataLog* mdlog;
+ const int num_shards;
+ rgw_raw_obj obj;
+ int i{0};
+
+ static constexpr int max_concurrent = 16;
+
+ public:
+ PurgeLogShardsCR(RGWRados *store, const RGWMetadataLog* mdlog,
+ const rgw_pool& pool, int num_shards)
+ : RGWShardCollectCR(store->ctx(), max_concurrent),
+ store(store), mdlog(mdlog), num_shards(num_shards), obj(pool, "")
+ {}
+
+ bool spawn_next() override {
+ if (i == num_shards) {
+ return false;
+ }
+ mdlog->get_shard_oid(i++, obj.oid);
+ spawn(new RGWRadosRemoveCR(store, obj), false);
+ return true;
+ }
+};
+
+using Cursor = RGWPeriodHistory::Cursor;
+
+/// purge mdlogs from the oldest up to (but not including) the given realm_epoch
+class PurgePeriodLogsCR : public RGWCoroutine {
+ RGWRados *const store;
+ RGWMetadataManager *const metadata;
+ RGWObjVersionTracker objv;
+ Cursor cursor;
+ epoch_t realm_epoch;
+ epoch_t *last_trim_epoch; //< update last trim on success
+
+ public:
+ PurgePeriodLogsCR(RGWRados *store, epoch_t realm_epoch, epoch_t *last_trim)
+ : RGWCoroutine(store->ctx()), store(store), metadata(store->meta_mgr),
+ realm_epoch(realm_epoch), last_trim_epoch(last_trim)
+ {}
+
+ int operate() override;
+};
+
+int PurgePeriodLogsCR::operate()
+{
+ reenter(this) {
+ // read our current oldest log period
+ yield call(metadata->read_oldest_log_period_cr(&cursor, &objv));
+ if (retcode < 0) {
+ return set_cr_error(retcode);
+ }
+ ceph_assert(cursor);
+ ldout(cct, 20) << "oldest log realm_epoch=" << cursor.get_epoch()
+ << " period=" << cursor.get_period().get_id() << dendl;
+
+ // trim -up to- the given realm_epoch
+ while (cursor.get_epoch() < realm_epoch) {
+ ldout(cct, 4) << "purging log shards for realm_epoch=" << cursor.get_epoch()
+ << " period=" << cursor.get_period().get_id() << dendl;
+ yield {
+ const auto mdlog = metadata->get_log(cursor.get_period().get_id());
+ const auto& pool = store->svc.zone->get_zone_params().log_pool;
+ auto num_shards = cct->_conf->rgw_md_log_max_shards;
+ call(new PurgeLogShardsCR(store, mdlog, pool, num_shards));
+ }
+ if (retcode < 0) {
+ ldout(cct, 1) << "failed to remove log shards: "
+ << cpp_strerror(retcode) << dendl;
+ return set_cr_error(retcode);
+ }
+ ldout(cct, 10) << "removed log shards for realm_epoch=" << cursor.get_epoch()
+ << " period=" << cursor.get_period().get_id() << dendl;
+
+ // update our mdlog history
+ yield call(metadata->trim_log_period_cr(cursor, &objv));
+ if (retcode == -ENOENT) {
+ // must have raced to update mdlog history. return success and allow the
+ // winner to continue purging
+ ldout(cct, 10) << "already removed log shards for realm_epoch=" << cursor.get_epoch()
+ << " period=" << cursor.get_period().get_id() << dendl;
+ return set_cr_done();
+ } else if (retcode < 0) {
+ ldout(cct, 1) << "failed to remove log shards for realm_epoch="
+ << cursor.get_epoch() << " period=" << cursor.get_period().get_id()
+ << " with: " << cpp_strerror(retcode) << dendl;
+ return set_cr_error(retcode);
+ }
+
+ if (*last_trim_epoch < cursor.get_epoch()) {
+ *last_trim_epoch = cursor.get_epoch();
+ }
+
+ ceph_assert(cursor.has_next()); // get_current() should always come after
+ cursor.next();
+ }
+ return set_cr_done();
+ }
+ return 0;
+}
+
+namespace {
+
+using connection_map = std::map<std::string, std::unique_ptr<RGWRESTConn>>;
+
+/// construct a RGWRESTConn for each zone in the realm
+template <typename Zonegroups>
+connection_map make_peer_connections(RGWRados *store,
+ const Zonegroups& zonegroups)
+{
+ connection_map connections;
+ for (auto& g : zonegroups) {
+ for (auto& z : g.second.zones) {
+ std::unique_ptr<RGWRESTConn> conn{
+ new RGWRESTConn(store->ctx(), store->svc.zone, z.first, z.second.endpoints)};
+ connections.emplace(z.first, std::move(conn));
+ }
+ }
+ return connections;
+}
+
+/// return the marker that it's safe to trim up to
+const std::string& get_stable_marker(const rgw_meta_sync_marker& m)
+{
+ return m.state == m.FullSync ? m.next_step_marker : m.marker;
+}
+
+/// comparison operator for take_min_status()
+bool operator<(const rgw_meta_sync_marker& lhs, const rgw_meta_sync_marker& rhs)
+{
+ // sort by stable marker
+ return get_stable_marker(lhs) < get_stable_marker(rhs);
+}
+
+/// populate the status with the minimum stable marker of each shard for any
+/// peer whose realm_epoch matches the minimum realm_epoch in the input
+template <typename Iter>
+int take_min_status(CephContext *cct, Iter first, Iter last,
+ rgw_meta_sync_status *status)
+{
+ if (first == last) {
+ return -EINVAL;
+ }
+ const size_t num_shards = cct->_conf->rgw_md_log_max_shards;
+
+ status->sync_info.realm_epoch = std::numeric_limits<epoch_t>::max();
+ for (auto p = first; p != last; ++p) {
+ // validate peer's shard count
+ if (p->sync_markers.size() != num_shards) {
+ ldout(cct, 1) << "take_min_status got peer status with "
+ << p->sync_markers.size() << " shards, expected "
+ << num_shards << dendl;
+ return -EINVAL;
+ }
+ if (p->sync_info.realm_epoch < status->sync_info.realm_epoch) {
+ // earlier epoch, take its entire status
+ *status = std::move(*p);
+ } else if (p->sync_info.realm_epoch == status->sync_info.realm_epoch) {
+ // same epoch, take any earlier markers
+ auto m = status->sync_markers.begin();
+ for (auto& shard : p->sync_markers) {
+ if (shard.second < m->second) {
+ m->second = std::move(shard.second);
+ }
+ ++m;
+ }
+ }
+ }
+ return 0;
+}
+
+struct TrimEnv {
+ const DoutPrefixProvider *dpp;
+ RGWRados *const store;
+ RGWHTTPManager *const http;
+ int num_shards;
+ const std::string& zone;
+ Cursor current; //< cursor to current period
+ epoch_t last_trim_epoch{0}; //< epoch of last mdlog that was purged
+
+ TrimEnv(const DoutPrefixProvider *dpp, RGWRados *store, RGWHTTPManager *http, int num_shards)
+ : dpp(dpp), store(store), http(http), num_shards(num_shards),
+ zone(store->svc.zone->get_zone_params().get_id()),
+ current(store->period_history->get_current())
+ {}
+};
+
+struct MasterTrimEnv : public TrimEnv {
+ connection_map connections; //< peer connections
+ std::vector<rgw_meta_sync_status> peer_status; //< sync status for each peer
+ /// last trim marker for each shard, only applies to current period's mdlog
+ std::vector<std::string> last_trim_markers;
+
+ MasterTrimEnv(const DoutPrefixProvider *dpp, RGWRados *store, RGWHTTPManager *http, int num_shards)
+ : TrimEnv(dpp, store, http, num_shards),
+ last_trim_markers(num_shards)
+ {
+ auto& period = current.get_period();
+ connections = make_peer_connections(store, period.get_map().zonegroups);
+ connections.erase(zone);
+ peer_status.resize(connections.size());
+ }
+};
+
+struct PeerTrimEnv : public TrimEnv {
+ /// last trim timestamp for each shard, only applies to current period's mdlog
+ std::vector<ceph::real_time> last_trim_timestamps;
+
+ PeerTrimEnv(const DoutPrefixProvider *dpp, RGWRados *store, RGWHTTPManager *http, int num_shards)
+ : TrimEnv(dpp, store, http, num_shards),
+ last_trim_timestamps(num_shards)
+ {}
+
+ void set_num_shards(int num_shards) {
+ this->num_shards = num_shards;
+ last_trim_timestamps.resize(num_shards);
+ }
+};
+
+} // anonymous namespace
+
+
+/// spawn a trim cr for each shard that needs it, while limiting the number
+/// of concurrent shards
+class MetaMasterTrimShardCollectCR : public RGWShardCollectCR {
+ private:
+ static constexpr int MAX_CONCURRENT_SHARDS = 16;
+
+ MasterTrimEnv& env;
+ RGWMetadataLog *mdlog;
+ int shard_id{0};
+ std::string oid;
+ const rgw_meta_sync_status& sync_status;
+
+ public:
+ MetaMasterTrimShardCollectCR(MasterTrimEnv& env, RGWMetadataLog *mdlog,
+ const rgw_meta_sync_status& sync_status)
+ : RGWShardCollectCR(env.store->ctx(), MAX_CONCURRENT_SHARDS),
+ env(env), mdlog(mdlog), sync_status(sync_status)
+ {}
+
+ bool spawn_next() override;
+};
+
+bool MetaMasterTrimShardCollectCR::spawn_next()
+{
+ while (shard_id < env.num_shards) {
+ auto m = sync_status.sync_markers.find(shard_id);
+ if (m == sync_status.sync_markers.end()) {
+ shard_id++;
+ continue;
+ }
+ auto& stable = get_stable_marker(m->second);
+ auto& last_trim = env.last_trim_markers[shard_id];
+
+ if (stable <= last_trim) {
+ // already trimmed
+ ldout(cct, 20) << "skipping log shard " << shard_id
+ << " at marker=" << stable
+ << " last_trim=" << last_trim
+ << " realm_epoch=" << sync_status.sync_info.realm_epoch << dendl;
+ shard_id++;
+ continue;
+ }
+
+ mdlog->get_shard_oid(shard_id, oid);
+
+ ldout(cct, 10) << "trimming log shard " << shard_id
+ << " at marker=" << stable
+ << " last_trim=" << last_trim
+ << " realm_epoch=" << sync_status.sync_info.realm_epoch << dendl;
+ spawn(new RGWSyncLogTrimCR(env.store, oid, stable, &last_trim), false);
+ shard_id++;
+ return true;
+ }
+ return false;
+}
+
+/// spawn rest requests to read each peer's sync status
+class MetaMasterStatusCollectCR : public RGWShardCollectCR {
+ static constexpr int MAX_CONCURRENT_SHARDS = 16;
+
+ MasterTrimEnv& env;
+ connection_map::iterator c;
+ std::vector<rgw_meta_sync_status>::iterator s;
+ public:
+ explicit MetaMasterStatusCollectCR(MasterTrimEnv& env)
+ : RGWShardCollectCR(env.store->ctx(), MAX_CONCURRENT_SHARDS),
+ env(env), c(env.connections.begin()), s(env.peer_status.begin())
+ {}
+
+ bool spawn_next() override {
+ if (c == env.connections.end()) {
+ return false;
+ }
+ static rgw_http_param_pair params[] = {
+ { "type", "metadata" },
+ { "status", nullptr },
+ { nullptr, nullptr }
+ };
+
+ ldout(cct, 20) << "query sync status from " << c->first << dendl;
+ auto conn = c->second.get();
+ using StatusCR = RGWReadRESTResourceCR<rgw_meta_sync_status>;
+ spawn(new StatusCR(cct, conn, env.http, "/admin/log/", params, &*s),
+ false);
+ ++c;
+ ++s;
+ return true;
+ }
+};
+
+class MetaMasterTrimCR : public RGWCoroutine {
+ MasterTrimEnv& env;
+ rgw_meta_sync_status min_status; //< minimum sync status of all peers
+ int ret{0};
+
+ public:
+ explicit MetaMasterTrimCR(MasterTrimEnv& env)
+ : RGWCoroutine(env.store->ctx()), env(env)
+ {}
+
+ int operate() override;
+};
+
+int MetaMasterTrimCR::operate()
+{
+ reenter(this) {
+ // TODO: detect this and fail before we spawn the trim thread?
+ if (env.connections.empty()) {
+ ldout(cct, 4) << "no peers, exiting" << dendl;
+ return set_cr_done();
+ }
+
+ ldout(cct, 10) << "fetching sync status for zone " << env.zone << dendl;
+ // query mdlog sync status from peers
+ yield call(new MetaMasterStatusCollectCR(env));
+
+ // must get a successful reply from all peers to consider trimming
+ if (ret < 0) {
+ ldout(cct, 4) << "failed to fetch sync status from all peers" << dendl;
+ return set_cr_error(ret);
+ }
+
+ // determine the minimum epoch and markers
+ ret = take_min_status(env.store->ctx(), env.peer_status.begin(),
+ env.peer_status.end(), &min_status);
+ if (ret < 0) {
+ ldout(cct, 4) << "failed to calculate min sync status from peers" << dendl;
+ return set_cr_error(ret);
+ }
+ yield {
+ auto store = env.store;
+ auto epoch = min_status.sync_info.realm_epoch;
+ ldout(cct, 4) << "realm epoch min=" << epoch
+ << " current=" << env.current.get_epoch()<< dendl;
+ if (epoch > env.last_trim_epoch + 1) {
+ // delete any prior mdlog periods
+ spawn(new PurgePeriodLogsCR(store, epoch, &env.last_trim_epoch), true);
+ } else {
+ ldout(cct, 10) << "mdlogs already purged up to realm_epoch "
+ << env.last_trim_epoch << dendl;
+ }
+
+ // if realm_epoch == current, trim mdlog based on markers
+ if (epoch == env.current.get_epoch()) {
+ auto mdlog = store->meta_mgr->get_log(env.current.get_period().get_id());
+ spawn(new MetaMasterTrimShardCollectCR(env, mdlog, min_status), true);
+ }
+ }
+ // ignore any errors during purge/trim because we want to hold the lock open
+ return set_cr_done();
+ }
+ return 0;
+}
+
+
+/// read the first entry of the master's mdlog shard and trim to that position
+class MetaPeerTrimShardCR : public RGWCoroutine {
+ RGWMetaSyncEnv& env;
+ RGWMetadataLog *mdlog;
+ const std::string& period_id;
+ const int shard_id;
+ RGWMetadataLogInfo info;
+ ceph::real_time stable; //< safe timestamp to trim, according to master
+ ceph::real_time *last_trim; //< last trimmed timestamp, updated on trim
+ rgw_mdlog_shard_data result; //< result from master's mdlog listing
+
+ public:
+ MetaPeerTrimShardCR(RGWMetaSyncEnv& env, RGWMetadataLog *mdlog,
+ const std::string& period_id, int shard_id,
+ ceph::real_time *last_trim)
+ : RGWCoroutine(env.store->ctx()), env(env), mdlog(mdlog),
+ period_id(period_id), shard_id(shard_id), last_trim(last_trim)
+ {}
+
+ int operate() override;
+};
+
+int MetaPeerTrimShardCR::operate()
+{
+ reenter(this) {
+ // query master's first mdlog entry for this shard
+ yield call(new RGWListRemoteMDLogShardCR(&env, period_id, shard_id,
+ "", 1, &result));
+ if (retcode < 0) {
+ ldpp_dout(env.dpp, 5) << "failed to read first entry from master's mdlog shard "
+ << shard_id << " for period " << period_id
+ << ": " << cpp_strerror(retcode) << dendl;
+ return set_cr_error(retcode);
+ }
+ if (result.entries.empty()) {
+ // if there are no mdlog entries, we don't have a timestamp to compare. we
+ // can't just trim everything, because there could be racing updates since
+ // this empty reply. query the mdlog shard info to read its max timestamp,
+ // then retry the listing to make sure it's still empty before trimming to
+ // that
+ ldpp_dout(env.dpp, 10) << "empty master mdlog shard " << shard_id
+ << ", reading last timestamp from shard info" << dendl;
+ // read the mdlog shard info for the last timestamp
+ using ShardInfoCR = RGWReadRemoteMDLogShardInfoCR;
+ yield call(new ShardInfoCR(&env, period_id, shard_id, &info));
+ if (retcode < 0) {
+ ldpp_dout(env.dpp, 5) << "failed to read info from master's mdlog shard "
+ << shard_id << " for period " << period_id
+ << ": " << cpp_strerror(retcode) << dendl;
+ return set_cr_error(retcode);
+ }
+ if (ceph::real_clock::is_zero(info.last_update)) {
+ return set_cr_done(); // nothing to trim
+ }
+ ldpp_dout(env.dpp, 10) << "got mdlog shard info with last update="
+ << info.last_update << dendl;
+ // re-read the master's first mdlog entry to make sure it hasn't changed
+ yield call(new RGWListRemoteMDLogShardCR(&env, period_id, shard_id,
+ "", 1, &result));
+ if (retcode < 0) {
+ ldpp_dout(env.dpp, 5) << "failed to read first entry from master's mdlog shard "
+ << shard_id << " for period " << period_id
+ << ": " << cpp_strerror(retcode) << dendl;
+ return set_cr_error(retcode);
+ }
+ // if the mdlog is still empty, trim to max marker
+ if (result.entries.empty()) {
+ stable = info.last_update;
+ } else {
+ stable = result.entries.front().timestamp;
+
+ // can only trim -up to- master's first timestamp, so subtract a second.
+ // (this is why we use timestamps instead of markers for the peers)
+ stable -= std::chrono::seconds(1);
+ }
+ } else {
+ stable = result.entries.front().timestamp;
+ stable -= std::chrono::seconds(1);
+ }
+
+ if (stable <= *last_trim) {
+ ldpp_dout(env.dpp, 10) << "skipping log shard " << shard_id
+ << " at timestamp=" << stable
+ << " last_trim=" << *last_trim << dendl;
+ return set_cr_done();
+ }
+
+ ldpp_dout(env.dpp, 10) << "trimming log shard " << shard_id
+ << " at timestamp=" << stable
+ << " last_trim=" << *last_trim << dendl;
+ yield {
+ std::string oid;
+ mdlog->get_shard_oid(shard_id, oid);
+ call(new RGWRadosTimelogTrimCR(env.store, oid, real_time{}, stable, "", ""));
+ }
+ if (retcode < 0 && retcode != -ENODATA) {
+ ldpp_dout(env.dpp, 1) << "failed to trim mdlog shard " << shard_id
+ << ": " << cpp_strerror(retcode) << dendl;
+ return set_cr_error(retcode);
+ }
+ *last_trim = stable;
+ return set_cr_done();
+ }
+ return 0;
+}
+
+class MetaPeerTrimShardCollectCR : public RGWShardCollectCR {
+ static constexpr int MAX_CONCURRENT_SHARDS = 16;
+
+ PeerTrimEnv& env;
+ RGWMetadataLog *mdlog;
+ const std::string& period_id;
+ RGWMetaSyncEnv meta_env; //< for RGWListRemoteMDLogShardCR
+ int shard_id{0};
+
+ public:
+ MetaPeerTrimShardCollectCR(PeerTrimEnv& env, RGWMetadataLog *mdlog)
+ : RGWShardCollectCR(env.store->ctx(), MAX_CONCURRENT_SHARDS),
+ env(env), mdlog(mdlog), period_id(env.current.get_period().get_id())
+ {
+ meta_env.init(env.dpp, cct, env.store, env.store->svc.zone->get_master_conn(),
+ env.store->get_async_rados(), env.http, nullptr,
+ env.store->get_sync_tracer());
+ }
+
+ bool spawn_next() override;
+};
+
+bool MetaPeerTrimShardCollectCR::spawn_next()
+{
+ if (shard_id >= env.num_shards) {
+ return false;
+ }
+ auto& last_trim = env.last_trim_timestamps[shard_id];
+ spawn(new MetaPeerTrimShardCR(meta_env, mdlog, period_id, shard_id, &last_trim),
+ false);
+ shard_id++;
+ return true;
+}
+
+class MetaPeerTrimCR : public RGWCoroutine {
+ PeerTrimEnv& env;
+ rgw_mdlog_info mdlog_info; //< master's mdlog info
+
+ public:
+ explicit MetaPeerTrimCR(PeerTrimEnv& env) : RGWCoroutine(env.store->ctx()), env(env) {}
+
+ int operate() override;
+};
+
+int MetaPeerTrimCR::operate()
+{
+ reenter(this) {
+ ldout(cct, 10) << "fetching master mdlog info" << dendl;
+ yield {
+ // query mdlog_info from master for oldest_log_period
+ rgw_http_param_pair params[] = {
+ { "type", "metadata" },
+ { nullptr, nullptr }
+ };
+
+ using LogInfoCR = RGWReadRESTResourceCR<rgw_mdlog_info>;
+ call(new LogInfoCR(cct, env.store->svc.zone->get_master_conn(), env.http,
+ "/admin/log/", params, &mdlog_info));
+ }
+ if (retcode < 0) {
+ ldout(cct, 4) << "failed to read mdlog info from master" << dendl;
+ return set_cr_error(retcode);
+ }
+ // use master's shard count instead
+ env.set_num_shards(mdlog_info.num_shards);
+
+ if (mdlog_info.realm_epoch > env.last_trim_epoch + 1) {
+ // delete any prior mdlog periods
+ yield call(new PurgePeriodLogsCR(env.store, mdlog_info.realm_epoch,
+ &env.last_trim_epoch));
+ } else {
+ ldout(cct, 10) << "mdlogs already purged through realm_epoch "
+ << env.last_trim_epoch << dendl;
+ }
+
+ // if realm_epoch == current, trim mdlog based on master's markers
+ if (mdlog_info.realm_epoch == env.current.get_epoch()) {
+ yield {
+ auto meta_mgr = env.store->meta_mgr;
+ auto mdlog = meta_mgr->get_log(env.current.get_period().get_id());
+ call(new MetaPeerTrimShardCollectCR(env, mdlog));
+ // ignore any errors during purge/trim because we want to hold the lock open
+ }
+ }
+ return set_cr_done();
+ }
+ return 0;
+}
+
+class MetaTrimPollCR : public RGWCoroutine {
+ RGWRados *const store;
+ const utime_t interval; //< polling interval
+ const rgw_raw_obj obj;
+ const std::string name{"meta_trim"}; //< lock name
+ const std::string cookie;
+
+ protected:
+ /// allocate the coroutine to run within the lease
+ virtual RGWCoroutine* alloc_cr() = 0;
+
+ public:
+ MetaTrimPollCR(RGWRados *store, utime_t interval)
+ : RGWCoroutine(store->ctx()), store(store), interval(interval),
+ obj(store->svc.zone->get_zone_params().log_pool, RGWMetadataLogHistory::oid),
+ cookie(RGWSimpleRadosLockCR::gen_random_cookie(cct))
+ {}
+
+ int operate() override;
+};
+
+int MetaTrimPollCR::operate()
+{
+ reenter(this) {
+ for (;;) {
+ set_status("sleeping");
+ wait(interval);
+
+ // prevent others from trimming for our entire wait interval
+ set_status("acquiring trim lock");
+ yield call(new RGWSimpleRadosLockCR(store->get_async_rados(), store,
+ obj, name, cookie, interval.sec()));
+ if (retcode < 0) {
+ ldout(cct, 4) << "failed to lock: " << cpp_strerror(retcode) << dendl;
+ continue;
+ }
+
+ set_status("trimming");
+ yield call(alloc_cr());
+
+ if (retcode < 0) {
+ // on errors, unlock so other gateways can try
+ set_status("unlocking");
+ yield call(new RGWSimpleRadosUnlockCR(store->get_async_rados(), store,
+ obj, name, cookie));
+ }
+ }
+ }
+ return 0;
+}
+
+class MetaMasterTrimPollCR : public MetaTrimPollCR {
+ MasterTrimEnv env; //< trim state to share between calls
+ RGWCoroutine* alloc_cr() override {
+ return new MetaMasterTrimCR(env);
+ }
+ public:
+ MetaMasterTrimPollCR(const DoutPrefixProvider *dpp, RGWRados *store, RGWHTTPManager *http,
+ int num_shards, utime_t interval)
+ : MetaTrimPollCR(store, interval),
+ env(dpp, store, http, num_shards)
+ {}
+};
+
+class MetaPeerTrimPollCR : public MetaTrimPollCR {
+ PeerTrimEnv env; //< trim state to share between calls
+ RGWCoroutine* alloc_cr() override {
+ return new MetaPeerTrimCR(env);
+ }
+ public:
+ MetaPeerTrimPollCR(const DoutPrefixProvider *dpp, RGWRados *store, RGWHTTPManager *http,
+ int num_shards, utime_t interval)
+ : MetaTrimPollCR(store, interval),
+ env(dpp, store, http, num_shards)
+ {}
+};
+
+RGWCoroutine* create_meta_log_trim_cr(const DoutPrefixProvider *dpp, RGWRados *store, RGWHTTPManager *http,
+ int num_shards, utime_t interval)
+{
+ if (store->svc.zone->is_meta_master()) {
+ return new MetaMasterTrimPollCR(dpp, store, http, num_shards, interval);
+ }
+ return new MetaPeerTrimPollCR(dpp, store, http, num_shards, interval);
+}
+
+
+struct MetaMasterAdminTrimCR : private MasterTrimEnv, public MetaMasterTrimCR {
+ MetaMasterAdminTrimCR(const DoutPrefixProvider *dpp, RGWRados *store, RGWHTTPManager *http, int num_shards)
+ : MasterTrimEnv(dpp, store, http, num_shards),
+ MetaMasterTrimCR(*static_cast<MasterTrimEnv*>(this))
+ {}
+};
+
+struct MetaPeerAdminTrimCR : private PeerTrimEnv, public MetaPeerTrimCR {
+ MetaPeerAdminTrimCR(const DoutPrefixProvider *dpp, RGWRados *store, RGWHTTPManager *http, int num_shards)
+ : PeerTrimEnv(dpp, store, http, num_shards),
+ MetaPeerTrimCR(*static_cast<PeerTrimEnv*>(this))
+ {}
+};
+
+RGWCoroutine* create_admin_meta_log_trim_cr(const DoutPrefixProvider *dpp, RGWRados *store,
+ RGWHTTPManager *http,
+ int num_shards)
+{
+ if (store->svc.zone->is_meta_master()) {
+ return new MetaMasterAdminTrimCR(dpp, store, http, num_shards);
+ }
+ return new MetaPeerAdminTrimCR(dpp, store, http, num_shards);
+}
diff --git a/src/rgw/rgw_sync.h b/src/rgw/rgw_sync.h
new file mode 100644
index 00000000..7774e164
--- /dev/null
+++ b/src/rgw/rgw_sync.h
@@ -0,0 +1,534 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_SYNC_H
+#define CEPH_RGW_SYNC_H
+
+#include <atomic>
+
+#include "include/stringify.h"
+#include "common/RWLock.h"
+
+#include "rgw_coroutine.h"
+#include "rgw_http_client.h"
+#include "rgw_metadata.h"
+#include "rgw_meta_sync_status.h"
+#include "rgw_rados.h"
+#include "rgw_sync_trace.h"
+
+
+#define ERROR_LOGGER_SHARDS 32
+#define RGW_SYNC_ERROR_LOG_SHARD_PREFIX "sync.error-log"
+
+struct rgw_mdlog_info {
+ uint32_t num_shards;
+ std::string period; //< period id of the master's oldest metadata log
+ epoch_t realm_epoch; //< realm epoch of oldest metadata log
+
+ rgw_mdlog_info() : num_shards(0), realm_epoch(0) {}
+
+ void decode_json(JSONObj *obj);
+};
+
+
+struct rgw_mdlog_entry {
+ string id;
+ string section;
+ string name;
+ ceph::real_time timestamp;
+ RGWMetadataLogData log_data;
+
+ void decode_json(JSONObj *obj);
+
+ bool convert_from(cls_log_entry& le) {
+ id = le.id;
+ section = le.section;
+ name = le.name;
+ timestamp = le.timestamp.to_real_time();
+ try {
+ auto iter = le.data.cbegin();
+ decode(log_data, iter);
+ } catch (buffer::error& err) {
+ return false;
+ }
+ return true;
+ }
+};
+
+struct rgw_mdlog_shard_data {
+ string marker;
+ bool truncated;
+ vector<rgw_mdlog_entry> entries;
+
+ void decode_json(JSONObj *obj);
+};
+
+class RGWAsyncRadosProcessor;
+class RGWMetaSyncStatusManager;
+class RGWMetaSyncCR;
+class RGWRESTConn;
+class RGWSyncTraceManager;
+
+class RGWSyncErrorLogger {
+ RGWRados *store;
+
+ vector<string> oids;
+ int num_shards;
+
+ std::atomic<int64_t> counter = { 0 };
+public:
+ RGWSyncErrorLogger(RGWRados *_store, const string &oid_prefix, int _num_shards);
+ RGWCoroutine *log_error_cr(const string& source_zone, const string& section, const string& name, uint32_t error_code, const string& message);
+
+ static string get_shard_oid(const string& oid_prefix, int shard_id);
+};
+
+struct rgw_sync_error_info {
+ string source_zone;
+ uint32_t error_code;
+ string message;
+
+ rgw_sync_error_info() : error_code(0) {}
+ rgw_sync_error_info(const string& _source_zone, uint32_t _error_code, const string& _message) : source_zone(_source_zone), error_code(_error_code), message(_message) {}
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(source_zone, bl);
+ encode(error_code, bl);
+ encode(message, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(source_zone, bl);
+ decode(error_code, bl);
+ decode(message, bl);
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const;
+};
+WRITE_CLASS_ENCODER(rgw_sync_error_info)
+
+#define DEFAULT_BACKOFF_MAX 30
+
+class RGWSyncBackoff {
+ int cur_wait;
+ int max_secs;
+
+ void update_wait_time();
+public:
+ explicit RGWSyncBackoff(int _max_secs = DEFAULT_BACKOFF_MAX) : cur_wait(0), max_secs(_max_secs) {}
+
+ void backoff_sleep();
+ void reset() {
+ cur_wait = 0;
+ }
+
+ void backoff(RGWCoroutine *op);
+};
+
+class RGWBackoffControlCR : public RGWCoroutine
+{
+ RGWCoroutine *cr;
+ Mutex lock;
+
+ RGWSyncBackoff backoff;
+ bool reset_backoff;
+
+ bool exit_on_error;
+
+protected:
+ bool *backoff_ptr() {
+ return &reset_backoff;
+ }
+
+ Mutex& cr_lock() {
+ return lock;
+ }
+
+ RGWCoroutine *get_cr() {
+ return cr;
+ }
+
+public:
+ RGWBackoffControlCR(CephContext *_cct, bool _exit_on_error) : RGWCoroutine(_cct), cr(NULL), lock("RGWBackoffControlCR::lock:" + stringify(this)),
+ reset_backoff(false), exit_on_error(_exit_on_error) {
+ }
+
+ ~RGWBackoffControlCR() override {
+ if (cr) {
+ cr->put();
+ }
+ }
+
+ virtual RGWCoroutine *alloc_cr() = 0;
+ virtual RGWCoroutine *alloc_finisher_cr() { return NULL; }
+
+ int operate() override;
+};
+
+struct RGWMetaSyncEnv {
+ const DoutPrefixProvider *dpp;
+ CephContext *cct{nullptr};
+ RGWRados *store{nullptr};
+ RGWRESTConn *conn{nullptr};
+ RGWAsyncRadosProcessor *async_rados{nullptr};
+ RGWHTTPManager *http_manager{nullptr};
+ RGWSyncErrorLogger *error_logger{nullptr};
+ RGWSyncTraceManager *sync_tracer{nullptr};
+
+ RGWMetaSyncEnv() {}
+
+ void init(const DoutPrefixProvider *_dpp, CephContext *_cct, RGWRados *_store, RGWRESTConn *_conn,
+ RGWAsyncRadosProcessor *_async_rados, RGWHTTPManager *_http_manager,
+ RGWSyncErrorLogger *_error_logger, RGWSyncTraceManager *_sync_tracer);
+
+ string shard_obj_name(int shard_id);
+ string status_oid();
+};
+
+class RGWRemoteMetaLog : public RGWCoroutinesManager {
+ const DoutPrefixProvider *dpp;
+ RGWRados *store;
+ RGWRESTConn *conn;
+ RGWAsyncRadosProcessor *async_rados;
+
+ RGWHTTPManager http_manager;
+ RGWMetaSyncStatusManager *status_manager;
+ RGWSyncErrorLogger *error_logger{nullptr};
+ RGWSyncTraceManager *sync_tracer{nullptr};
+
+ RGWMetaSyncCR *meta_sync_cr{nullptr};
+
+ RGWSyncBackoff backoff;
+
+ RGWMetaSyncEnv sync_env;
+
+ void init_sync_env(RGWMetaSyncEnv *env);
+ int store_sync_info(const rgw_meta_sync_info& sync_info);
+
+ std::atomic<bool> going_down = { false };
+
+ RGWSyncTraceNodeRef tn;
+
+public:
+ RGWRemoteMetaLog(const DoutPrefixProvider *dpp, RGWRados *_store,
+ RGWAsyncRadosProcessor *async_rados,
+ RGWMetaSyncStatusManager *_sm)
+ : RGWCoroutinesManager(_store->ctx(), _store->get_cr_registry()),
+ dpp(dpp), store(_store), conn(NULL), async_rados(async_rados),
+ http_manager(store->ctx(), completion_mgr),
+ status_manager(_sm) {}
+
+ ~RGWRemoteMetaLog() override;
+
+ int init();
+ void finish();
+
+ int read_log_info(rgw_mdlog_info *log_info);
+ int read_master_log_shards_info(const string& master_period, map<int, RGWMetadataLogInfo> *shards_info);
+ int read_master_log_shards_next(const string& period, map<int, string> shard_markers, map<int, rgw_mdlog_shard_data> *result);
+ int read_sync_status(rgw_meta_sync_status *sync_status);
+ int init_sync_status();
+ int run_sync();
+
+ void wakeup(int shard_id);
+
+ RGWMetaSyncEnv& get_sync_env() {
+ return sync_env;
+ }
+};
+
+class RGWMetaSyncStatusManager : public DoutPrefixProvider {
+ RGWRados *store;
+ librados::IoCtx ioctx;
+
+ RGWRemoteMetaLog master_log;
+
+ map<int, rgw_raw_obj> shard_objs;
+
+ struct utime_shard {
+ real_time ts;
+ int shard_id;
+
+ utime_shard() : shard_id(-1) {}
+
+ bool operator<(const utime_shard& rhs) const {
+ if (ts == rhs.ts) {
+ return shard_id < rhs.shard_id;
+ }
+ return ts < rhs.ts;
+ }
+ };
+
+ RWLock ts_to_shard_lock;
+ map<utime_shard, int> ts_to_shard;
+ vector<string> clone_markers;
+
+public:
+ RGWMetaSyncStatusManager(RGWRados *_store, RGWAsyncRadosProcessor *async_rados)
+ : store(_store), master_log(this, store, async_rados, this),
+ ts_to_shard_lock("ts_to_shard_lock") {}
+ int init();
+
+ int read_sync_status(rgw_meta_sync_status *sync_status) {
+ return master_log.read_sync_status(sync_status);
+ }
+ int init_sync_status() { return master_log.init_sync_status(); }
+ int read_log_info(rgw_mdlog_info *log_info) {
+ return master_log.read_log_info(log_info);
+ }
+ int read_master_log_shards_info(const string& master_period, map<int, RGWMetadataLogInfo> *shards_info) {
+ return master_log.read_master_log_shards_info(master_period, shards_info);
+ }
+ int read_master_log_shards_next(const string& period, map<int, string> shard_markers, map<int, rgw_mdlog_shard_data> *result) {
+ return master_log.read_master_log_shards_next(period, shard_markers, result);
+ }
+
+ int run() { return master_log.run_sync(); }
+
+
+ // implements DoutPrefixProvider
+ CephContext *get_cct() const override { return store->ctx(); }
+ unsigned get_subsys() const override;
+ std::ostream& gen_prefix(std::ostream& out) const override;
+
+ void wakeup(int shard_id) { return master_log.wakeup(shard_id); }
+ void stop() {
+ master_log.finish();
+ }
+};
+
+class RGWOrderCallCR : public RGWCoroutine
+{
+public:
+ RGWOrderCallCR(CephContext *cct) : RGWCoroutine(cct) {}
+
+ virtual void call_cr(RGWCoroutine *_cr) = 0;
+};
+
+class RGWLastCallerWinsCR : public RGWOrderCallCR
+{
+ RGWCoroutine *cr{nullptr};
+
+public:
+ explicit RGWLastCallerWinsCR(CephContext *cct) : RGWOrderCallCR(cct) {}
+ ~RGWLastCallerWinsCR() {
+ if (cr) {
+ cr->put();
+ }
+ }
+
+ int operate() override;
+
+ void call_cr(RGWCoroutine *_cr) override {
+ if (cr) {
+ cr->put();
+ }
+ cr = _cr;
+ }
+};
+
+template <class T, class K>
+class RGWSyncShardMarkerTrack {
+ struct marker_entry {
+ uint64_t pos;
+ real_time timestamp;
+
+ marker_entry() : pos(0) {}
+ marker_entry(uint64_t _p, const real_time& _ts) : pos(_p), timestamp(_ts) {}
+ };
+ typename std::map<T, marker_entry> pending;
+
+ map<T, marker_entry> finish_markers;
+
+ int window_size;
+ int updates_since_flush;
+
+ RGWOrderCallCR *order_cr{nullptr};
+
+protected:
+ typename std::set<K> need_retry_set;
+
+ virtual RGWCoroutine *store_marker(const T& new_marker, uint64_t index_pos, const real_time& timestamp) = 0;
+ virtual RGWOrderCallCR *allocate_order_control_cr() = 0;
+ virtual void handle_finish(const T& marker) { }
+
+public:
+ RGWSyncShardMarkerTrack(int _window_size) : window_size(_window_size), updates_since_flush(0) {}
+ virtual ~RGWSyncShardMarkerTrack() {
+ if (order_cr) {
+ order_cr->put();
+ }
+ }
+
+ bool start(const T& pos, int index_pos, const real_time& timestamp) {
+ if (pending.find(pos) != pending.end()) {
+ return false;
+ }
+ pending[pos] = marker_entry(index_pos, timestamp);
+ return true;
+ }
+
+ void try_update_high_marker(const T& pos, int index_pos, const real_time& timestamp) {
+ finish_markers[pos] = marker_entry(index_pos, timestamp);
+ }
+
+ RGWCoroutine *finish(const T& pos) {
+ if (pending.empty()) {
+ /* can happen, due to a bug that ended up with multiple objects with the same name and version
+ * -- which can happen when versioning is enabled an the version is 'null'.
+ */
+ return NULL;
+ }
+
+ typename std::map<T, marker_entry>::iterator iter = pending.begin();
+
+ bool is_first = (pos == iter->first);
+
+ typename std::map<T, marker_entry>::iterator pos_iter = pending.find(pos);
+ if (pos_iter == pending.end()) {
+ /* see pending.empty() comment */
+ return NULL;
+ }
+
+ finish_markers[pos] = pos_iter->second;
+
+ pending.erase(pos);
+
+ handle_finish(pos);
+
+ updates_since_flush++;
+
+ if (is_first && (updates_since_flush >= window_size || pending.empty())) {
+ return flush();
+ }
+ return NULL;
+ }
+
+ RGWCoroutine *flush() {
+ if (finish_markers.empty()) {
+ return NULL;
+ }
+
+ typename std::map<T, marker_entry>::iterator i;
+
+ if (pending.empty()) {
+ i = finish_markers.end();
+ } else {
+ i = finish_markers.lower_bound(pending.begin()->first);
+ }
+ if (i == finish_markers.begin()) {
+ return NULL;
+ }
+ updates_since_flush = 0;
+
+ auto last = i;
+ --i;
+ const T& high_marker = i->first;
+ marker_entry& high_entry = i->second;
+ RGWCoroutine *cr = order(store_marker(high_marker, high_entry.pos, high_entry.timestamp));
+ finish_markers.erase(finish_markers.begin(), last);
+ return cr;
+ }
+
+ /*
+ * a key needs retry if it was processing when another marker that points
+ * to the same bucket shards arrives. Instead of processing it, we mark
+ * it as need_retry so that when we finish processing the original, we
+ * retry the processing on the same bucket shard, in case there are more
+ * entries to process. This closes a race that can happen.
+ */
+ bool need_retry(const K& key) {
+ return (need_retry_set.find(key) != need_retry_set.end());
+ }
+
+ void set_need_retry(const K& key) {
+ need_retry_set.insert(key);
+ }
+
+ void reset_need_retry(const K& key) {
+ need_retry_set.erase(key);
+ }
+
+ RGWCoroutine *order(RGWCoroutine *cr) {
+ /* either returns a new RGWLastWriteWinsCR, or update existing one, in which case it returns
+ * nothing and the existing one will call the cr
+ */
+ if (order_cr && order_cr->is_done()) {
+ order_cr->put();
+ order_cr = nullptr;
+ }
+ if (!order_cr) {
+ order_cr = allocate_order_control_cr();
+ order_cr->get();
+ order_cr->call_cr(cr);
+ return order_cr;
+ }
+ order_cr->call_cr(cr);
+ return nullptr; /* don't call it a second time */
+ }
+};
+
+class RGWMetaSyncShardMarkerTrack;
+
+class RGWMetaSyncSingleEntryCR : public RGWCoroutine {
+ RGWMetaSyncEnv *sync_env;
+
+ string raw_key;
+ string entry_marker;
+ RGWMDLogStatus op_status;
+
+ ssize_t pos;
+ string section;
+ string key;
+
+ int sync_status;
+
+ bufferlist md_bl;
+
+ RGWMetaSyncShardMarkerTrack *marker_tracker;
+
+ int tries;
+
+ bool error_injection;
+
+ RGWSyncTraceNodeRef tn;
+
+public:
+ RGWMetaSyncSingleEntryCR(RGWMetaSyncEnv *_sync_env,
+ const string& _raw_key, const string& _entry_marker,
+ const RGWMDLogStatus& _op_status,
+ RGWMetaSyncShardMarkerTrack *_marker_tracker, const RGWSyncTraceNodeRef& _tn_parent);
+
+ int operate() override;
+};
+
+class RGWShardCollectCR : public RGWCoroutine {
+ int cur_shard;
+ int current_running;
+ int max_concurrent;
+ int status;
+
+public:
+ RGWShardCollectCR(CephContext *_cct, int _max_concurrent) : RGWCoroutine(_cct),
+ current_running(0),
+ max_concurrent(_max_concurrent),
+ status(0) {}
+
+ virtual bool spawn_next() = 0;
+ int operate() override;
+};
+
+// MetaLogTrimCR factory function
+RGWCoroutine* create_meta_log_trim_cr(const DoutPrefixProvider *dpp, RGWRados *store, RGWHTTPManager *http,
+ int num_shards, utime_t interval);
+
+// factory function for mdlog trim via radosgw-admin
+RGWCoroutine* create_admin_meta_log_trim_cr(const DoutPrefixProvider *dpp, RGWRados *store,
+ RGWHTTPManager *http,
+ int num_shards);
+
+#endif
diff --git a/src/rgw/rgw_sync_counters.cc b/src/rgw/rgw_sync_counters.cc
new file mode 100644
index 00000000..b4130068
--- /dev/null
+++ b/src/rgw/rgw_sync_counters.cc
@@ -0,0 +1,28 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/ceph_context.h"
+#include "rgw_sync_counters.h"
+
+namespace sync_counters {
+
+PerfCountersRef build(CephContext *cct, const std::string& name)
+{
+ PerfCountersBuilder b(cct, name, l_first, l_last);
+
+ // share these counters with ceph-mgr
+ b.set_prio_default(PerfCountersBuilder::PRIO_USEFUL);
+
+ b.add_u64_avg(l_fetch, "fetch_bytes", "Number of object bytes replicated");
+ b.add_u64_counter(l_fetch_not_modified, "fetch_not_modified", "Number of objects already replicated");
+ b.add_u64_counter(l_fetch_err, "fetch_errors", "Number of object replication errors");
+
+ b.add_time_avg(l_poll, "poll_latency", "Average latency of replication log requests");
+ b.add_u64_counter(l_poll_err, "poll_errors", "Number of replication log request errors");
+
+ auto logger = PerfCountersRef{ b.create_perf_counters(), cct };
+ cct->get_perfcounters_collection()->add(logger.get());
+ return logger;
+}
+
+} // namespace sync_counters
diff --git a/src/rgw/rgw_sync_counters.h b/src/rgw/rgw_sync_counters.h
new file mode 100644
index 00000000..4c270241
--- /dev/null
+++ b/src/rgw/rgw_sync_counters.h
@@ -0,0 +1,25 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include "common/perf_counters_collection.h"
+
+namespace sync_counters {
+
+enum {
+ l_first = 805000,
+
+ l_fetch,
+ l_fetch_not_modified,
+ l_fetch_err,
+
+ l_poll,
+ l_poll_err,
+
+ l_last,
+};
+
+PerfCountersRef build(CephContext *cct, const std::string& name);
+
+} // namespace sync_counters
diff --git a/src/rgw/rgw_sync_log_trim.cc b/src/rgw/rgw_sync_log_trim.cc
new file mode 100644
index 00000000..a8a3fdee
--- /dev/null
+++ b/src/rgw/rgw_sync_log_trim.cc
@@ -0,0 +1,1094 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2017 Red Hat, Inc
+ *
+ * Author: Casey Bodley <cbodley@redhat.com>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ */
+
+#include <mutex>
+#include <boost/circular_buffer.hpp>
+#include <boost/container/flat_map.hpp>
+
+#include "include/scope_guard.h"
+#include "common/bounded_key_counter.h"
+#include "common/errno.h"
+#include "rgw_sync_log_trim.h"
+#include "rgw_cr_rados.h"
+#include "rgw_cr_rest.h"
+#include "rgw_data_sync.h"
+#include "rgw_metadata.h"
+#include "rgw_rados.h"
+#include "rgw_zone.h"
+#include "rgw_sync.h"
+
+#include "services/svc_zone.h"
+
+#include <boost/asio/yield.hpp>
+#include "include/ceph_assert.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+#undef dout_prefix
+#define dout_prefix (*_dout << "trim: ")
+
+using rgw::BucketTrimConfig;
+using BucketChangeCounter = BoundedKeyCounter<std::string, int>;
+
+const std::string rgw::BucketTrimStatus::oid = "bilog.trim";
+using rgw::BucketTrimStatus;
+
+
+// watch/notify api for gateways to coordinate about which buckets to trim
+enum TrimNotifyType {
+ NotifyTrimCounters = 0,
+ NotifyTrimComplete,
+};
+WRITE_RAW_ENCODER(TrimNotifyType);
+
+struct TrimNotifyHandler {
+ virtual ~TrimNotifyHandler() = default;
+
+ virtual void handle(bufferlist::const_iterator& input, bufferlist& output) = 0;
+};
+
+/// api to share the bucket trim counters between gateways in the same zone.
+/// each gateway will process different datalog shards, so the gateway that runs
+/// the trim process needs to accumulate their counters
+struct TrimCounters {
+ /// counter for a single bucket
+ struct BucketCounter {
+ std::string bucket; //< bucket instance metadata key
+ int count{0};
+
+ BucketCounter() = default;
+ BucketCounter(const std::string& bucket, int count)
+ : bucket(bucket), count(count) {}
+
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::const_iterator& p);
+ };
+ using Vector = std::vector<BucketCounter>;
+
+ /// request bucket trim counters from peer gateways
+ struct Request {
+ uint16_t max_buckets; //< maximum number of bucket counters to return
+
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::const_iterator& p);
+ };
+
+ /// return the current bucket trim counters
+ struct Response {
+ Vector bucket_counters;
+
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::const_iterator& p);
+ };
+
+ /// server interface to query the hottest buckets
+ struct Server {
+ virtual ~Server() = default;
+
+ virtual void get_bucket_counters(int count, Vector& counters) = 0;
+ virtual void reset_bucket_counters() = 0;
+ };
+
+ /// notify handler
+ class Handler : public TrimNotifyHandler {
+ Server *const server;
+ public:
+ explicit Handler(Server *server) : server(server) {}
+
+ void handle(bufferlist::const_iterator& input, bufferlist& output) override;
+ };
+};
+std::ostream& operator<<(std::ostream& out, const TrimCounters::BucketCounter& rhs)
+{
+ return out << rhs.bucket << ":" << rhs.count;
+}
+
+void TrimCounters::BucketCounter::encode(bufferlist& bl) const
+{
+ using ceph::encode;
+ // no versioning to save space
+ encode(bucket, bl);
+ encode(count, bl);
+}
+void TrimCounters::BucketCounter::decode(bufferlist::const_iterator& p)
+{
+ using ceph::decode;
+ decode(bucket, p);
+ decode(count, p);
+}
+WRITE_CLASS_ENCODER(TrimCounters::BucketCounter);
+
+void TrimCounters::Request::encode(bufferlist& bl) const
+{
+ ENCODE_START(1, 1, bl);
+ encode(max_buckets, bl);
+ ENCODE_FINISH(bl);
+}
+void TrimCounters::Request::decode(bufferlist::const_iterator& p)
+{
+ DECODE_START(1, p);
+ decode(max_buckets, p);
+ DECODE_FINISH(p);
+}
+WRITE_CLASS_ENCODER(TrimCounters::Request);
+
+void TrimCounters::Response::encode(bufferlist& bl) const
+{
+ ENCODE_START(1, 1, bl);
+ encode(bucket_counters, bl);
+ ENCODE_FINISH(bl);
+}
+void TrimCounters::Response::decode(bufferlist::const_iterator& p)
+{
+ DECODE_START(1, p);
+ decode(bucket_counters, p);
+ DECODE_FINISH(p);
+}
+WRITE_CLASS_ENCODER(TrimCounters::Response);
+
+void TrimCounters::Handler::handle(bufferlist::const_iterator& input,
+ bufferlist& output)
+{
+ Request request;
+ decode(request, input);
+ auto count = std::min<uint16_t>(request.max_buckets, 128);
+
+ Response response;
+ server->get_bucket_counters(count, response.bucket_counters);
+ encode(response, output);
+}
+
+/// api to notify peer gateways that trim has completed and their bucket change
+/// counters can be reset
+struct TrimComplete {
+ struct Request {
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::const_iterator& p);
+ };
+ struct Response {
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::const_iterator& p);
+ };
+
+ /// server interface to reset bucket counters
+ using Server = TrimCounters::Server;
+
+ /// notify handler
+ class Handler : public TrimNotifyHandler {
+ Server *const server;
+ public:
+ explicit Handler(Server *server) : server(server) {}
+
+ void handle(bufferlist::const_iterator& input, bufferlist& output) override;
+ };
+};
+
+void TrimComplete::Request::encode(bufferlist& bl) const
+{
+ ENCODE_START(1, 1, bl);
+ ENCODE_FINISH(bl);
+}
+void TrimComplete::Request::decode(bufferlist::const_iterator& p)
+{
+ DECODE_START(1, p);
+ DECODE_FINISH(p);
+}
+WRITE_CLASS_ENCODER(TrimComplete::Request);
+
+void TrimComplete::Response::encode(bufferlist& bl) const
+{
+ ENCODE_START(1, 1, bl);
+ ENCODE_FINISH(bl);
+}
+void TrimComplete::Response::decode(bufferlist::const_iterator& p)
+{
+ DECODE_START(1, p);
+ DECODE_FINISH(p);
+}
+WRITE_CLASS_ENCODER(TrimComplete::Response);
+
+void TrimComplete::Handler::handle(bufferlist::const_iterator& input,
+ bufferlist& output)
+{
+ Request request;
+ decode(request, input);
+
+ server->reset_bucket_counters();
+
+ Response response;
+ encode(response, output);
+}
+
+
+/// rados watcher for bucket trim notifications
+class BucketTrimWatcher : public librados::WatchCtx2 {
+ RGWRados *const store;
+ const rgw_raw_obj& obj;
+ rgw_rados_ref ref;
+ uint64_t handle{0};
+
+ using HandlerPtr = std::unique_ptr<TrimNotifyHandler>;
+ boost::container::flat_map<TrimNotifyType, HandlerPtr> handlers;
+
+ public:
+ BucketTrimWatcher(RGWRados *store, const rgw_raw_obj& obj,
+ TrimCounters::Server *counters)
+ : store(store), obj(obj) {
+ handlers.emplace(NotifyTrimCounters, new TrimCounters::Handler(counters));
+ handlers.emplace(NotifyTrimComplete, new TrimComplete::Handler(counters));
+ }
+
+ ~BucketTrimWatcher() {
+ stop();
+ }
+
+ int start() {
+ int r = store->get_raw_obj_ref(obj, &ref);
+ if (r < 0) {
+ return r;
+ }
+
+ // register a watch on the realm's control object
+ r = ref.ioctx.watch2(ref.obj.oid, &handle, this);
+ if (r == -ENOENT) {
+ constexpr bool exclusive = true;
+ r = ref.ioctx.create(ref.obj.oid, exclusive);
+ if (r == -EEXIST || r == 0) {
+ r = ref.ioctx.watch2(ref.obj.oid, &handle, this);
+ }
+ }
+ if (r < 0) {
+ lderr(store->ctx()) << "Failed to watch " << ref.obj
+ << " with " << cpp_strerror(-r) << dendl;
+ ref.ioctx.close();
+ return r;
+ }
+
+ ldout(store->ctx(), 10) << "Watching " << ref.obj.oid << dendl;
+ return 0;
+ }
+
+ int restart() {
+ int r = ref.ioctx.unwatch2(handle);
+ if (r < 0) {
+ lderr(store->ctx()) << "Failed to unwatch on " << ref.obj
+ << " with " << cpp_strerror(-r) << dendl;
+ }
+ r = ref.ioctx.watch2(ref.obj.oid, &handle, this);
+ if (r < 0) {
+ lderr(store->ctx()) << "Failed to restart watch on " << ref.obj
+ << " with " << cpp_strerror(-r) << dendl;
+ ref.ioctx.close();
+ }
+ return r;
+ }
+
+ void stop() {
+ if (handle) {
+ ref.ioctx.unwatch2(handle);
+ ref.ioctx.close();
+ }
+ }
+
+ /// respond to bucket trim notifications
+ void handle_notify(uint64_t notify_id, uint64_t cookie,
+ uint64_t notifier_id, bufferlist& bl) override {
+ if (cookie != handle) {
+ return;
+ }
+ bufferlist reply;
+ try {
+ auto p = bl.cbegin();
+ TrimNotifyType type;
+ decode(type, p);
+
+ auto handler = handlers.find(type);
+ if (handler != handlers.end()) {
+ handler->second->handle(p, reply);
+ } else {
+ lderr(store->ctx()) << "no handler for notify type " << type << dendl;
+ }
+ } catch (const buffer::error& e) {
+ lderr(store->ctx()) << "Failed to decode notification: " << e.what() << dendl;
+ }
+ ref.ioctx.notify_ack(ref.obj.oid, notify_id, cookie, reply);
+ }
+
+ /// reestablish the watch if it gets disconnected
+ void handle_error(uint64_t cookie, int err) override {
+ if (cookie != handle) {
+ return;
+ }
+ if (err == -ENOTCONN) {
+ ldout(store->ctx(), 4) << "Disconnected watch on " << ref.obj << dendl;
+ restart();
+ }
+ }
+};
+
+
+/// Interface to communicate with the trim manager about completed operations
+struct BucketTrimObserver {
+ virtual ~BucketTrimObserver() = default;
+
+ virtual void on_bucket_trimmed(std::string&& bucket_instance) = 0;
+ virtual bool trimmed_recently(const boost::string_view& bucket_instance) = 0;
+};
+
+/// populate the status with the minimum stable marker of each shard
+template <typename Iter>
+int take_min_status(CephContext *cct, Iter first, Iter last,
+ std::vector<std::string> *status)
+{
+ for (auto peer = first; peer != last; ++peer) {
+ if (peer->size() != status->size()) {
+ // all peers must agree on the number of shards
+ return -EINVAL;
+ }
+ auto m = status->begin();
+ for (auto& shard : *peer) {
+ auto& marker = *m++;
+ // only consider incremental sync markers
+ if (shard.state != rgw_bucket_shard_sync_info::StateIncrementalSync) {
+ continue;
+ }
+ // always take the first marker, or any later marker that's smaller
+ if (peer == first || marker > shard.inc_marker.position) {
+ marker = std::move(shard.inc_marker.position);
+ }
+ }
+ }
+ return 0;
+}
+
+/// trim each bilog shard to the given marker, while limiting the number of
+/// concurrent requests
+class BucketTrimShardCollectCR : public RGWShardCollectCR {
+ static constexpr int MAX_CONCURRENT_SHARDS = 16;
+ RGWRados *const store;
+ const RGWBucketInfo& bucket_info;
+ const std::vector<std::string>& markers; //< shard markers to trim
+ size_t i{0}; //< index of current shard marker
+ public:
+ BucketTrimShardCollectCR(RGWRados *store, const RGWBucketInfo& bucket_info,
+ const std::vector<std::string>& markers)
+ : RGWShardCollectCR(store->ctx(), MAX_CONCURRENT_SHARDS),
+ store(store), bucket_info(bucket_info), markers(markers)
+ {}
+ bool spawn_next() override;
+};
+
+bool BucketTrimShardCollectCR::spawn_next()
+{
+ while (i < markers.size()) {
+ const auto& marker = markers[i];
+ const auto shard_id = i++;
+
+ // skip empty markers
+ if (!marker.empty()) {
+ ldout(cct, 10) << "trimming bilog shard " << shard_id
+ << " of " << bucket_info.bucket << " at marker " << marker << dendl;
+ spawn(new RGWRadosBILogTrimCR(store, bucket_info, shard_id,
+ std::string{}, marker),
+ false);
+ return true;
+ }
+ }
+ return false;
+}
+
+/// trim the bilog of all of the given bucket instance's shards
+class BucketTrimInstanceCR : public RGWCoroutine {
+ RGWRados *const store;
+ RGWHTTPManager *const http;
+ BucketTrimObserver *const observer;
+ std::string bucket_instance;
+ const std::string& zone_id; //< my zone id
+ RGWBucketInfo bucket_info; //< bucket instance info to locate bucket indices
+ int child_ret = 0;
+
+ using StatusShards = std::vector<rgw_bucket_shard_sync_info>;
+ std::vector<StatusShards> peer_status; //< sync status for each peer
+ std::vector<std::string> min_markers; //< min marker per shard
+
+ public:
+ BucketTrimInstanceCR(RGWRados *store, RGWHTTPManager *http,
+ BucketTrimObserver *observer,
+ const std::string& bucket_instance)
+ : RGWCoroutine(store->ctx()), store(store),
+ http(http), observer(observer),
+ bucket_instance(bucket_instance),
+ zone_id(store->svc.zone->get_zone().id),
+ peer_status(store->svc.zone->get_zone_data_notify_to_map().size())
+ {}
+
+ int operate() override;
+};
+
+int BucketTrimInstanceCR::operate()
+{
+ reenter(this) {
+ ldout(cct, 4) << "starting trim on bucket=" << bucket_instance << dendl;
+
+ // query peers for sync status
+ set_status("fetching sync status from peers");
+ yield {
+ // query data sync status from each sync peer
+ rgw_http_param_pair params[] = {
+ { "type", "bucket-index" },
+ { "status", nullptr },
+ { "bucket", bucket_instance.c_str() },
+ { "source-zone", zone_id.c_str() },
+ { nullptr, nullptr }
+ };
+
+ auto p = peer_status.begin();
+ for (auto& c : store->svc.zone->get_zone_data_notify_to_map()) {
+ using StatusCR = RGWReadRESTResourceCR<StatusShards>;
+ spawn(new StatusCR(cct, c.second, http, "/admin/log/", params, &*p),
+ false);
+ ++p;
+ }
+ // in parallel, read the local bucket instance info
+ spawn(new RGWGetBucketInstanceInfoCR(store->get_async_rados(), store,
+ bucket_instance, &bucket_info),
+ false);
+ }
+ // wait for a response from each peer. all must respond to attempt trim
+ while (num_spawned()) {
+ yield wait_for_child();
+ collect(&child_ret, nullptr);
+ if (child_ret < 0) {
+ drain_all();
+ return set_cr_error(child_ret);
+ }
+ }
+
+ // initialize each shard with the maximum marker, which is only used when
+ // there are no peers syncing from us
+ min_markers.assign(std::max(1u, bucket_info.num_shards),
+ RGWSyncLogTrimCR::max_marker);
+
+ // determine the minimum marker for each shard
+ retcode = take_min_status(cct, peer_status.begin(), peer_status.end(),
+ &min_markers);
+ if (retcode < 0) {
+ ldout(cct, 4) << "failed to correlate bucket sync status from peers" << dendl;
+ return set_cr_error(retcode);
+ }
+
+ // trim shards with a ShardCollectCR
+ ldout(cct, 10) << "trimming bilogs for bucket=" << bucket_info.bucket
+ << " markers=" << min_markers << ", shards=" << min_markers.size() << dendl;
+ set_status("trimming bilog shards");
+ yield call(new BucketTrimShardCollectCR(store, bucket_info, min_markers));
+ // ENODATA just means there were no keys to trim
+ if (retcode == -ENODATA) {
+ retcode = 0;
+ }
+ if (retcode < 0) {
+ ldout(cct, 4) << "failed to trim bilog shards: "
+ << cpp_strerror(retcode) << dendl;
+ return set_cr_error(retcode);
+ }
+
+ observer->on_bucket_trimmed(std::move(bucket_instance));
+ return set_cr_done();
+ }
+ return 0;
+}
+
+/// trim each bucket instance while limiting the number of concurrent operations
+class BucketTrimInstanceCollectCR : public RGWShardCollectCR {
+ RGWRados *const store;
+ RGWHTTPManager *const http;
+ BucketTrimObserver *const observer;
+ std::vector<std::string>::const_iterator bucket;
+ std::vector<std::string>::const_iterator end;
+ public:
+ BucketTrimInstanceCollectCR(RGWRados *store, RGWHTTPManager *http,
+ BucketTrimObserver *observer,
+ const std::vector<std::string>& buckets,
+ int max_concurrent)
+ : RGWShardCollectCR(store->ctx(), max_concurrent),
+ store(store), http(http), observer(observer),
+ bucket(buckets.begin()), end(buckets.end())
+ {}
+ bool spawn_next() override;
+};
+
+bool BucketTrimInstanceCollectCR::spawn_next()
+{
+ if (bucket == end) {
+ return false;
+ }
+ spawn(new BucketTrimInstanceCR(store, http, observer, *bucket), false);
+ ++bucket;
+ return true;
+}
+
+/// correlate the replies from each peer gateway into the given counter
+int accumulate_peer_counters(bufferlist& bl, BucketChangeCounter& counter)
+{
+ counter.clear();
+
+ try {
+ // decode notify responses
+ auto p = bl.cbegin();
+ std::map<std::pair<uint64_t, uint64_t>, bufferlist> replies;
+ std::set<std::pair<uint64_t, uint64_t>> timeouts;
+ decode(replies, p);
+ decode(timeouts, p);
+
+ for (auto& peer : replies) {
+ auto q = peer.second.cbegin();
+ TrimCounters::Response response;
+ decode(response, q);
+ for (const auto& b : response.bucket_counters) {
+ counter.insert(b.bucket, b.count);
+ }
+ }
+ } catch (const buffer::error& e) {
+ return -EIO;
+ }
+ return 0;
+}
+
+/// metadata callback has the signature bool(string&& key, string&& marker)
+using MetadataListCallback = std::function<bool(std::string&&, std::string&&)>;
+
+/// lists metadata keys, passing each to a callback until it returns false.
+/// on reaching the end, it will restart at the beginning and list up to the
+/// initial marker
+class AsyncMetadataList : public RGWAsyncRadosRequest {
+ CephContext *const cct;
+ RGWMetadataManager *const mgr;
+ const std::string section;
+ const std::string start_marker;
+ MetadataListCallback callback;
+
+ int _send_request() override;
+ public:
+ AsyncMetadataList(CephContext *cct, RGWCoroutine *caller,
+ RGWAioCompletionNotifier *cn, RGWMetadataManager *mgr,
+ const std::string& section, const std::string& start_marker,
+ const MetadataListCallback& callback)
+ : RGWAsyncRadosRequest(caller, cn), cct(cct), mgr(mgr),
+ section(section), start_marker(start_marker), callback(callback)
+ {}
+};
+
+int AsyncMetadataList::_send_request()
+{
+ void* handle = nullptr;
+ std::list<std::string> keys;
+ bool truncated{false};
+ std::string marker;
+
+ // start a listing at the given marker
+ int r = mgr->list_keys_init(section, start_marker, &handle);
+ if (r == -EINVAL) {
+ // restart with empty marker below
+ } else if (r < 0) {
+ ldout(cct, 10) << "failed to init metadata listing: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ } else {
+ ldout(cct, 20) << "starting metadata listing at " << start_marker << dendl;
+
+ // release the handle when scope exits
+ auto g = make_scope_guard([=] { mgr->list_keys_complete(handle); });
+
+ do {
+ // get the next key and marker
+ r = mgr->list_keys_next(handle, 1, keys, &truncated);
+ if (r < 0) {
+ ldout(cct, 10) << "failed to list metadata: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+ marker = mgr->get_marker(handle);
+
+ if (!keys.empty()) {
+ ceph_assert(keys.size() == 1);
+ auto& key = keys.front();
+ if (!callback(std::move(key), std::move(marker))) {
+ return 0;
+ }
+ }
+ } while (truncated);
+
+ if (start_marker.empty()) {
+ // already listed all keys
+ return 0;
+ }
+ }
+
+ // restart the listing from the beginning (empty marker)
+ handle = nullptr;
+
+ r = mgr->list_keys_init(section, "", &handle);
+ if (r < 0) {
+ ldout(cct, 10) << "failed to restart metadata listing: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+ ldout(cct, 20) << "restarting metadata listing" << dendl;
+
+ // release the handle when scope exits
+ auto g = make_scope_guard([=] { mgr->list_keys_complete(handle); });
+ do {
+ // get the next key and marker
+ r = mgr->list_keys_next(handle, 1, keys, &truncated);
+ if (r < 0) {
+ ldout(cct, 10) << "failed to list metadata: "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+ marker = mgr->get_marker(handle);
+
+ if (!keys.empty()) {
+ ceph_assert(keys.size() == 1);
+ auto& key = keys.front();
+ // stop at original marker
+ if (marker > start_marker) {
+ return 0;
+ }
+ if (!callback(std::move(key), std::move(marker))) {
+ return 0;
+ }
+ }
+ } while (truncated);
+
+ return 0;
+}
+
+/// coroutine wrapper for AsyncMetadataList
+class MetadataListCR : public RGWSimpleCoroutine {
+ RGWAsyncRadosProcessor *const async_rados;
+ RGWMetadataManager *const mgr;
+ const std::string& section;
+ const std::string& start_marker;
+ MetadataListCallback callback;
+ RGWAsyncRadosRequest *req{nullptr};
+ public:
+ MetadataListCR(CephContext *cct, RGWAsyncRadosProcessor *async_rados,
+ RGWMetadataManager *mgr, const std::string& section,
+ const std::string& start_marker,
+ const MetadataListCallback& callback)
+ : RGWSimpleCoroutine(cct), async_rados(async_rados), mgr(mgr),
+ section(section), start_marker(start_marker), callback(callback)
+ {}
+ ~MetadataListCR() override {
+ request_cleanup();
+ }
+
+ int send_request() override {
+ req = new AsyncMetadataList(cct, this, stack->create_completion_notifier(),
+ mgr, section, start_marker, callback);
+ async_rados->queue(req);
+ return 0;
+ }
+ int request_complete() override {
+ return req->get_ret_status();
+ }
+ void request_cleanup() override {
+ if (req) {
+ req->finish();
+ req = nullptr;
+ }
+ }
+};
+
+class BucketTrimCR : public RGWCoroutine {
+ RGWRados *const store;
+ RGWHTTPManager *const http;
+ const BucketTrimConfig& config;
+ BucketTrimObserver *const observer;
+ const rgw_raw_obj& obj;
+ ceph::mono_time start_time;
+ bufferlist notify_replies;
+ BucketChangeCounter counter;
+ std::vector<std::string> buckets; //< buckets selected for trim
+ BucketTrimStatus status;
+ RGWObjVersionTracker objv; //< version tracker for trim status object
+ std::string last_cold_marker; //< position for next trim marker
+
+ static const std::string section; //< metadata section for bucket instances
+ public:
+ BucketTrimCR(RGWRados *store, RGWHTTPManager *http,
+ const BucketTrimConfig& config, BucketTrimObserver *observer,
+ const rgw_raw_obj& obj)
+ : RGWCoroutine(store->ctx()), store(store), http(http), config(config),
+ observer(observer), obj(obj), counter(config.counter_size)
+ {}
+
+ int operate() override;
+};
+
+const std::string BucketTrimCR::section{"bucket.instance"};
+
+int BucketTrimCR::operate()
+{
+ reenter(this) {
+ start_time = ceph::mono_clock::now();
+
+ if (config.buckets_per_interval) {
+ // query watch/notify for hot buckets
+ ldout(cct, 10) << "fetching active bucket counters" << dendl;
+ set_status("fetching active bucket counters");
+ yield {
+ // request the top bucket counters from each peer gateway
+ const TrimNotifyType type = NotifyTrimCounters;
+ TrimCounters::Request request{32};
+ bufferlist bl;
+ encode(type, bl);
+ encode(request, bl);
+ call(new RGWRadosNotifyCR(store, obj, bl, config.notify_timeout_ms,
+ &notify_replies));
+ }
+ if (retcode < 0) {
+ ldout(cct, 10) << "failed to fetch peer bucket counters" << dendl;
+ return set_cr_error(retcode);
+ }
+
+ // select the hottest buckets for trim
+ retcode = accumulate_peer_counters(notify_replies, counter);
+ if (retcode < 0) {
+ ldout(cct, 4) << "failed to correlate peer bucket counters" << dendl;
+ return set_cr_error(retcode);
+ }
+ buckets.reserve(config.buckets_per_interval);
+
+ const int max_count = config.buckets_per_interval -
+ config.min_cold_buckets_per_interval;
+ counter.get_highest(max_count,
+ [this] (const std::string& bucket, int count) {
+ buckets.push_back(bucket);
+ });
+ }
+
+ if (buckets.size() < config.buckets_per_interval) {
+ // read BucketTrimStatus for marker position
+ set_status("reading trim status");
+ using ReadStatus = RGWSimpleRadosReadCR<BucketTrimStatus>;
+ yield call(new ReadStatus(store->get_async_rados(), store->svc.sysobj, obj,
+ &status, true, &objv));
+ if (retcode < 0) {
+ ldout(cct, 10) << "failed to read bilog trim status: "
+ << cpp_strerror(retcode) << dendl;
+ return set_cr_error(retcode);
+ }
+ if (status.marker == "MAX") {
+ status.marker.clear(); // restart at the beginning
+ }
+ ldout(cct, 10) << "listing cold buckets from marker="
+ << status.marker << dendl;
+
+ set_status("listing cold buckets for trim");
+ yield {
+ // capture a reference so 'this' remains valid in the callback
+ auto ref = boost::intrusive_ptr<RGWCoroutine>{this};
+ // list cold buckets to consider for trim
+ auto cb = [this, ref] (std::string&& bucket, std::string&& marker) {
+ // filter out keys that we trimmed recently
+ if (observer->trimmed_recently(bucket)) {
+ return true;
+ }
+ // filter out active buckets that we've already selected
+ auto i = std::find(buckets.begin(), buckets.end(), bucket);
+ if (i != buckets.end()) {
+ return true;
+ }
+ buckets.emplace_back(std::move(bucket));
+ // remember the last cold bucket spawned to update the status marker
+ last_cold_marker = std::move(marker);
+ // return true if there's room for more
+ return buckets.size() < config.buckets_per_interval;
+ };
+
+ call(new MetadataListCR(cct, store->get_async_rados(), store->meta_mgr,
+ section, status.marker, cb));
+ }
+ if (retcode < 0) {
+ ldout(cct, 4) << "failed to list bucket instance metadata: "
+ << cpp_strerror(retcode) << dendl;
+ return set_cr_error(retcode);
+ }
+ }
+
+ // trim bucket instances with limited concurrency
+ set_status("trimming buckets");
+ ldout(cct, 4) << "collected " << buckets.size() << " buckets for trim" << dendl;
+ yield call(new BucketTrimInstanceCollectCR(store, http, observer, buckets,
+ config.concurrent_buckets));
+ // ignore errors from individual buckets
+
+ // write updated trim status
+ if (!last_cold_marker.empty() && status.marker != last_cold_marker) {
+ set_status("writing updated trim status");
+ status.marker = std::move(last_cold_marker);
+ ldout(cct, 20) << "writing bucket trim marker=" << status.marker << dendl;
+ using WriteStatus = RGWSimpleRadosWriteCR<BucketTrimStatus>;
+ yield call(new WriteStatus(store->get_async_rados(), store->svc.sysobj, obj,
+ status, &objv));
+ if (retcode < 0) {
+ ldout(cct, 4) << "failed to write updated trim status: "
+ << cpp_strerror(retcode) << dendl;
+ return set_cr_error(retcode);
+ }
+ }
+
+ // notify peers that trim completed
+ set_status("trim completed");
+ yield {
+ const TrimNotifyType type = NotifyTrimComplete;
+ TrimComplete::Request request;
+ bufferlist bl;
+ encode(type, bl);
+ encode(request, bl);
+ call(new RGWRadosNotifyCR(store, obj, bl, config.notify_timeout_ms,
+ nullptr));
+ }
+ if (retcode < 0) {
+ ldout(cct, 10) << "failed to notify peers of trim completion" << dendl;
+ return set_cr_error(retcode);
+ }
+
+ ldout(cct, 4) << "bucket index log processing completed in "
+ << ceph::mono_clock::now() - start_time << dendl;
+ return set_cr_done();
+ }
+ return 0;
+}
+
+class BucketTrimPollCR : public RGWCoroutine {
+ RGWRados *const store;
+ RGWHTTPManager *const http;
+ const BucketTrimConfig& config;
+ BucketTrimObserver *const observer;
+ const rgw_raw_obj& obj;
+ const std::string name{"trim"}; //< lock name
+ const std::string cookie;
+
+ public:
+ BucketTrimPollCR(RGWRados *store, RGWHTTPManager *http,
+ const BucketTrimConfig& config,
+ BucketTrimObserver *observer, const rgw_raw_obj& obj)
+ : RGWCoroutine(store->ctx()), store(store), http(http),
+ config(config), observer(observer), obj(obj),
+ cookie(RGWSimpleRadosLockCR::gen_random_cookie(cct))
+ {}
+
+ int operate() override;
+};
+
+int BucketTrimPollCR::operate()
+{
+ reenter(this) {
+ for (;;) {
+ set_status("sleeping");
+ wait(utime_t{static_cast<time_t>(config.trim_interval_sec), 0});
+
+ // prevent others from trimming for our entire wait interval
+ set_status("acquiring trim lock");
+ yield call(new RGWSimpleRadosLockCR(store->get_async_rados(), store,
+ obj, name, cookie,
+ config.trim_interval_sec));
+ if (retcode < 0) {
+ ldout(cct, 4) << "failed to lock: " << cpp_strerror(retcode) << dendl;
+ continue;
+ }
+
+ set_status("trimming");
+ yield call(new BucketTrimCR(store, http, config, observer, obj));
+ if (retcode < 0) {
+ // on errors, unlock so other gateways can try
+ set_status("unlocking");
+ yield call(new RGWSimpleRadosUnlockCR(store->get_async_rados(), store,
+ obj, name, cookie));
+ }
+ }
+ }
+ return 0;
+}
+
+/// tracks a bounded list of events with timestamps. old events can be expired,
+/// and recent events can be searched by key. expiration depends on events being
+/// inserted in temporal order
+template <typename T, typename Clock = ceph::coarse_mono_clock>
+class RecentEventList {
+ public:
+ using clock_type = Clock;
+ using time_point = typename clock_type::time_point;
+
+ RecentEventList(size_t max_size, const ceph::timespan& max_duration)
+ : events(max_size), max_duration(max_duration)
+ {}
+
+ /// insert an event at the given point in time. this time must be at least as
+ /// recent as the last inserted event
+ void insert(T&& value, const time_point& now) {
+ // ceph_assert(events.empty() || now >= events.back().time)
+ events.push_back(Event{std::move(value), now});
+ }
+
+ /// performs a linear search for an event matching the given key, whose type
+ /// U can be any that provides operator==(U, T)
+ template <typename U>
+ bool lookup(const U& key) const {
+ for (const auto& event : events) {
+ if (key == event.value) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /// remove events that are no longer recent compared to the given point in time
+ void expire_old(const time_point& now) {
+ const auto expired_before = now - max_duration;
+ while (!events.empty() && events.front().time < expired_before) {
+ events.pop_front();
+ }
+ }
+
+ private:
+ struct Event {
+ T value;
+ time_point time;
+ };
+ boost::circular_buffer<Event> events;
+ const ceph::timespan max_duration;
+};
+
+namespace rgw {
+
+// read bucket trim configuration from ceph context
+void configure_bucket_trim(CephContext *cct, BucketTrimConfig& config)
+{
+ const auto& conf = cct->_conf;
+
+ config.trim_interval_sec =
+ conf.get_val<int64_t>("rgw_sync_log_trim_interval");
+ config.counter_size = 512;
+ config.buckets_per_interval =
+ conf.get_val<int64_t>("rgw_sync_log_trim_max_buckets");
+ config.min_cold_buckets_per_interval =
+ conf.get_val<int64_t>("rgw_sync_log_trim_min_cold_buckets");
+ config.concurrent_buckets =
+ conf.get_val<int64_t>("rgw_sync_log_trim_concurrent_buckets");
+ config.notify_timeout_ms = 10000;
+ config.recent_size = 128;
+ config.recent_duration = std::chrono::hours(2);
+}
+
+class BucketTrimManager::Impl : public TrimCounters::Server,
+ public BucketTrimObserver {
+ public:
+ RGWRados *const store;
+ const BucketTrimConfig config;
+
+ const rgw_raw_obj status_obj;
+
+ /// count frequency of bucket instance entries in the data changes log
+ BucketChangeCounter counter;
+
+ using RecentlyTrimmedBucketList = RecentEventList<std::string>;
+ using clock_type = RecentlyTrimmedBucketList::clock_type;
+ /// track recently trimmed buckets to focus trim activity elsewhere
+ RecentlyTrimmedBucketList trimmed;
+
+ /// serve the bucket trim watch/notify api
+ BucketTrimWatcher watcher;
+
+ /// protect data shared between data sync, trim, and watch/notify threads
+ std::mutex mutex;
+
+ Impl(RGWRados *store, const BucketTrimConfig& config)
+ : store(store), config(config),
+ status_obj(store->svc.zone->get_zone_params().log_pool, BucketTrimStatus::oid),
+ counter(config.counter_size),
+ trimmed(config.recent_size, config.recent_duration),
+ watcher(store, status_obj, this)
+ {}
+
+ /// TrimCounters::Server interface for watch/notify api
+ void get_bucket_counters(int count, TrimCounters::Vector& buckets) {
+ buckets.reserve(count);
+ std::lock_guard<std::mutex> lock(mutex);
+ counter.get_highest(count, [&buckets] (const std::string& key, int count) {
+ buckets.emplace_back(key, count);
+ });
+ ldout(store->ctx(), 20) << "get_bucket_counters: " << buckets << dendl;
+ }
+
+ void reset_bucket_counters() override {
+ ldout(store->ctx(), 20) << "bucket trim completed" << dendl;
+ std::lock_guard<std::mutex> lock(mutex);
+ counter.clear();
+ trimmed.expire_old(clock_type::now());
+ }
+
+ /// BucketTrimObserver interface to remember successfully-trimmed buckets
+ void on_bucket_trimmed(std::string&& bucket_instance) override {
+ ldout(store->ctx(), 20) << "trimmed bucket instance " << bucket_instance << dendl;
+ std::lock_guard<std::mutex> lock(mutex);
+ trimmed.insert(std::move(bucket_instance), clock_type::now());
+ }
+
+ bool trimmed_recently(const boost::string_view& bucket_instance) override {
+ std::lock_guard<std::mutex> lock(mutex);
+ return trimmed.lookup(bucket_instance);
+ }
+};
+
+BucketTrimManager::BucketTrimManager(RGWRados *store,
+ const BucketTrimConfig& config)
+ : impl(new Impl(store, config))
+{
+}
+BucketTrimManager::~BucketTrimManager() = default;
+
+int BucketTrimManager::init()
+{
+ return impl->watcher.start();
+}
+
+void BucketTrimManager::on_bucket_changed(const boost::string_view& bucket)
+{
+ std::lock_guard<std::mutex> lock(impl->mutex);
+ // filter recently trimmed bucket instances out of bucket change counter
+ if (impl->trimmed.lookup(bucket)) {
+ return;
+ }
+ impl->counter.insert(bucket.to_string());
+}
+
+RGWCoroutine* BucketTrimManager::create_bucket_trim_cr(RGWHTTPManager *http)
+{
+ return new BucketTrimPollCR(impl->store, http, impl->config,
+ impl.get(), impl->status_obj);
+}
+
+RGWCoroutine* BucketTrimManager::create_admin_bucket_trim_cr(RGWHTTPManager *http)
+{
+ // return the trim coroutine without any polling
+ return new BucketTrimCR(impl->store, http, impl->config,
+ impl.get(), impl->status_obj);
+}
+
+} // namespace rgw
diff --git a/src/rgw/rgw_sync_log_trim.h b/src/rgw/rgw_sync_log_trim.h
new file mode 100644
index 00000000..13d1f63a
--- /dev/null
+++ b/src/rgw/rgw_sync_log_trim.h
@@ -0,0 +1,110 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2017 Red Hat, Inc
+ *
+ * Author: Casey Bodley <cbodley@redhat.com>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ */
+
+#ifndef RGW_SYNC_LOG_TRIM_H
+#define RGW_SYNC_LOG_TRIM_H
+
+#include <memory>
+#include <boost/utility/string_view.hpp>
+#include "include/encoding.h"
+#include "common/ceph_time.h"
+
+class CephContext;
+class RGWCoroutine;
+class RGWHTTPManager;
+class RGWRados;
+
+namespace rgw {
+
+/// Interface to inform the trim process about which buckets are most active
+struct BucketChangeObserver {
+ virtual ~BucketChangeObserver() = default;
+
+ virtual void on_bucket_changed(const boost::string_view& bucket_instance) = 0;
+};
+
+/// Configuration for BucketTrimManager
+struct BucketTrimConfig {
+ /// time interval in seconds between bucket trim attempts
+ uint32_t trim_interval_sec{0};
+ /// maximum number of buckets to track with BucketChangeObserver
+ size_t counter_size{0};
+ /// maximum number of buckets to process each trim interval
+ uint32_t buckets_per_interval{0};
+ /// minimum number of buckets to choose from the global bucket instance list
+ uint32_t min_cold_buckets_per_interval{0};
+ /// maximum number of buckets to process in parallel
+ uint32_t concurrent_buckets{0};
+ /// timeout in ms for bucket trim notify replies
+ uint64_t notify_timeout_ms{0};
+ /// maximum number of recently trimmed buckets to remember (should be small
+ /// enough for a linear search)
+ size_t recent_size{0};
+ /// maximum duration to consider a trim as 'recent' (should be some multiple
+ /// of the trim interval, at least)
+ ceph::timespan recent_duration{0};
+};
+
+/// fill out the BucketTrimConfig from the ceph context
+void configure_bucket_trim(CephContext *cct, BucketTrimConfig& config);
+
+/// Determines the buckets on which to focus trim activity, using two sources of
+/// input: the frequency of entries read from the data changes log, and a global
+/// listing of the bucket.instance metadata. This allows us to trim active
+/// buckets quickly, while also ensuring that all buckets will eventually trim
+class BucketTrimManager : public BucketChangeObserver {
+ class Impl;
+ std::unique_ptr<Impl> impl;
+ public:
+ BucketTrimManager(RGWRados *store, const BucketTrimConfig& config);
+ ~BucketTrimManager();
+
+ int init();
+
+ /// increment a counter for the given bucket instance
+ void on_bucket_changed(const boost::string_view& bucket_instance) override;
+
+ /// create a coroutine to run the bucket trim process every trim interval
+ RGWCoroutine* create_bucket_trim_cr(RGWHTTPManager *http);
+
+ /// create a coroutine to trim buckets directly via radosgw-admin
+ RGWCoroutine* create_admin_bucket_trim_cr(RGWHTTPManager *http);
+};
+
+/// provides persistent storage for the trim manager's current position in the
+/// list of bucket instance metadata
+struct BucketTrimStatus {
+ std::string marker; //< metadata key of current bucket instance
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(marker, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::const_iterator& p) {
+ DECODE_START(1, p);
+ decode(marker, p);
+ DECODE_FINISH(p);
+ }
+
+ static const std::string oid;
+};
+
+} // namespace rgw
+
+WRITE_CLASS_ENCODER(rgw::BucketTrimStatus);
+
+#endif // RGW_SYNC_LOG_TRIM_H
diff --git a/src/rgw/rgw_sync_module.cc b/src/rgw/rgw_sync_module.cc
new file mode 100644
index 00000000..91f31adb
--- /dev/null
+++ b/src/rgw/rgw_sync_module.cc
@@ -0,0 +1,91 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "rgw_common.h"
+#include "rgw_coroutine.h"
+#include "rgw_cr_rados.h"
+#include "rgw_sync_module.h"
+#include "rgw_data_sync.h"
+#include "rgw_bucket.h"
+
+#include "rgw_sync_module_log.h"
+#include "rgw_sync_module_es.h"
+#include "rgw_sync_module_aws.h"
+#include "rgw_sync_module_pubsub.h"
+
+#include <boost/asio/yield.hpp>
+
+#define dout_subsys ceph_subsys_rgw
+
+RGWMetadataHandler *RGWSyncModuleInstance::alloc_bucket_meta_handler()
+{
+ return RGWBucketMetaHandlerAllocator::alloc();
+}
+
+RGWMetadataHandler *RGWSyncModuleInstance::alloc_bucket_instance_meta_handler()
+{
+ return RGWBucketInstanceMetaHandlerAllocator::alloc();
+}
+
+RGWStatRemoteObjCBCR::RGWStatRemoteObjCBCR(RGWDataSyncEnv *_sync_env,
+ RGWBucketInfo& _bucket_info, rgw_obj_key& _key) : RGWCoroutine(_sync_env->cct),
+ sync_env(_sync_env),
+ bucket_info(_bucket_info), key(_key) {
+}
+
+RGWCallStatRemoteObjCR::RGWCallStatRemoteObjCR(RGWDataSyncEnv *_sync_env,
+ RGWBucketInfo& _bucket_info, rgw_obj_key& _key) : RGWCoroutine(_sync_env->cct),
+ sync_env(_sync_env),
+ bucket_info(_bucket_info), key(_key) {
+}
+
+int RGWCallStatRemoteObjCR::operate() {
+ reenter(this) {
+ yield {
+ call(new RGWStatRemoteObjCR(sync_env->async_rados, sync_env->store,
+ sync_env->source_zone,
+ bucket_info, key, &mtime, &size, &etag, &attrs, &headers));
+ }
+ if (retcode < 0) {
+ ldout(sync_env->cct, 10) << "RGWStatRemoteObjCR() returned " << retcode << dendl;
+ return set_cr_error(retcode);
+ }
+ ldout(sync_env->cct, 20) << "stat of remote obj: z=" << sync_env->source_zone
+ << " b=" << bucket_info.bucket << " k=" << key
+ << " size=" << size << " mtime=" << mtime << dendl;
+ yield {
+ RGWStatRemoteObjCBCR *cb = allocate_callback();
+ if (cb) {
+ cb->set_result(mtime, size, etag, std::move(attrs), std::move(headers));
+ call(cb);
+ }
+ }
+ if (retcode < 0) {
+ ldout(sync_env->cct, 10) << "RGWStatRemoteObjCR() callback returned " << retcode << dendl;
+ return set_cr_error(retcode);
+ }
+ return set_cr_done();
+ }
+ return 0;
+}
+
+void rgw_register_sync_modules(RGWSyncModulesManager *modules_manager)
+{
+ RGWSyncModuleRef default_module(std::make_shared<RGWDefaultSyncModule>());
+ modules_manager->register_module("rgw", default_module, true);
+
+ RGWSyncModuleRef archive_module(std::make_shared<RGWArchiveSyncModule>());
+ modules_manager->register_module("archive", archive_module);
+
+ RGWSyncModuleRef log_module(std::make_shared<RGWLogSyncModule>());
+ modules_manager->register_module("log", log_module);
+
+ RGWSyncModuleRef es_module(std::make_shared<RGWElasticSyncModule>());
+ modules_manager->register_module("elasticsearch", es_module);
+
+ RGWSyncModuleRef aws_module(std::make_shared<RGWAWSSyncModule>());
+ modules_manager->register_module("cloud", aws_module);
+
+ RGWSyncModuleRef pubsub_module(std::make_shared<RGWPSSyncModule>());
+ modules_manager->register_module("pubsub", pubsub_module);
+}
diff --git a/src/rgw/rgw_sync_module.h b/src/rgw/rgw_sync_module.h
new file mode 100644
index 00000000..aa68934c
--- /dev/null
+++ b/src/rgw/rgw_sync_module.h
@@ -0,0 +1,197 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_SYNC_MODULE_H
+#define CEPH_RGW_SYNC_MODULE_H
+
+#include "rgw_common.h"
+#include "rgw_coroutine.h"
+
+class RGWBucketInfo;
+class RGWRemoteDataLog;
+struct RGWDataSyncEnv;
+struct rgw_bucket_entry_owner;
+struct rgw_obj_key;
+
+
+class RGWDataSyncModule {
+public:
+ RGWDataSyncModule() {}
+ virtual ~RGWDataSyncModule() {}
+
+ virtual void init(RGWDataSyncEnv *sync_env, uint64_t instance_id) {}
+
+ virtual RGWCoroutine *init_sync(RGWDataSyncEnv *sync_env) {
+ return nullptr;
+ }
+
+ virtual RGWCoroutine *start_sync(RGWDataSyncEnv *sync_env) {
+ return nullptr;
+ }
+ virtual RGWCoroutine *sync_object(RGWDataSyncEnv *sync_env, RGWBucketInfo& bucket_info, rgw_obj_key& key, std::optional<uint64_t> versioned_epoch, rgw_zone_set *zones_trace) = 0;
+ virtual RGWCoroutine *remove_object(RGWDataSyncEnv *sync_env, RGWBucketInfo& bucket_info, rgw_obj_key& key, real_time& mtime,
+ bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) = 0;
+ virtual RGWCoroutine *create_delete_marker(RGWDataSyncEnv *sync_env, RGWBucketInfo& bucket_info, rgw_obj_key& key, real_time& mtime,
+ rgw_bucket_entry_owner& owner, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) = 0;
+};
+
+class RGWRESTMgr;
+class RGWMetadataHandler;
+
+class RGWSyncModuleInstance {
+public:
+ RGWSyncModuleInstance() {}
+ virtual ~RGWSyncModuleInstance() {}
+ virtual RGWDataSyncModule *get_data_handler() = 0;
+ virtual RGWRESTMgr *get_rest_filter(int dialect, RGWRESTMgr *orig) {
+ return orig;
+ }
+ virtual bool supports_user_writes() {
+ return false;
+ }
+ virtual RGWMetadataHandler *alloc_bucket_meta_handler();
+ virtual RGWMetadataHandler *alloc_bucket_instance_meta_handler();
+
+ // indication whether the sync module start with full sync (default behavior)
+ // incremental sync would follow anyway
+ virtual bool should_full_sync() const {
+ return true;
+ }
+};
+
+typedef std::shared_ptr<RGWSyncModuleInstance> RGWSyncModuleInstanceRef;
+
+class JSONFormattable;
+
+class RGWSyncModule {
+
+public:
+ RGWSyncModule() {}
+ virtual ~RGWSyncModule() {}
+
+ virtual bool supports_writes() {
+ return false;
+ }
+ virtual bool supports_data_export() = 0;
+ virtual int create_instance(CephContext *cct, const JSONFormattable& config, RGWSyncModuleInstanceRef *instance) = 0;
+};
+
+typedef std::shared_ptr<RGWSyncModule> RGWSyncModuleRef;
+
+
+class RGWSyncModulesManager {
+ Mutex lock;
+
+ map<string, RGWSyncModuleRef> modules;
+public:
+ RGWSyncModulesManager() : lock("RGWSyncModulesManager") {}
+
+ void register_module(const string& name, RGWSyncModuleRef& module, bool is_default = false) {
+ Mutex::Locker l(lock);
+ modules[name] = module;
+ if (is_default) {
+ modules[string()] = module;
+ }
+ }
+
+ bool get_module(const string& name, RGWSyncModuleRef *module) {
+ Mutex::Locker l(lock);
+ auto iter = modules.find(name);
+ if (iter == modules.end()) {
+ return false;
+ }
+ if (module != nullptr) {
+ *module = iter->second;
+ }
+ return true;
+ }
+
+
+ int supports_data_export(const string& name) {
+ RGWSyncModuleRef module;
+ if (!get_module(name, &module)) {
+ return -ENOENT;
+ }
+
+ return module.get()->supports_data_export();
+ }
+
+ int create_instance(CephContext *cct, const string& name, const JSONFormattable& config, RGWSyncModuleInstanceRef *instance) {
+ RGWSyncModuleRef module;
+ if (!get_module(name, &module)) {
+ return -ENOENT;
+ }
+
+ return module.get()->create_instance(cct, config, instance);
+ }
+
+ vector<string> get_registered_module_names() const {
+ vector<string> names;
+ for (auto& i: modules) {
+ if (!i.first.empty()) {
+ names.push_back(i.first);
+ }
+ }
+ return names;
+ }
+};
+
+class RGWStatRemoteObjCBCR : public RGWCoroutine {
+protected:
+ RGWDataSyncEnv *sync_env;
+
+ RGWBucketInfo bucket_info;
+ rgw_obj_key key;
+
+ ceph::real_time mtime;
+ uint64_t size = 0;
+ string etag;
+ map<string, bufferlist> attrs;
+ map<string, string> headers;
+public:
+ RGWStatRemoteObjCBCR(RGWDataSyncEnv *_sync_env,
+ RGWBucketInfo& _bucket_info, rgw_obj_key& _key);
+ ~RGWStatRemoteObjCBCR() override {}
+
+ void set_result(ceph::real_time& _mtime,
+ uint64_t _size,
+ const string& _etag,
+ map<string, bufferlist>&& _attrs,
+ map<string, string>&& _headers) {
+ mtime = _mtime;
+ size = _size;
+ etag = _etag;
+ attrs = std::move(_attrs);
+ headers = std::move(_headers);
+ }
+};
+
+class RGWCallStatRemoteObjCR : public RGWCoroutine {
+ ceph::real_time mtime;
+ uint64_t size{0};
+ string etag;
+ map<string, bufferlist> attrs;
+ map<string, string> headers;
+
+protected:
+ RGWDataSyncEnv *sync_env;
+
+ RGWBucketInfo bucket_info;
+ rgw_obj_key key;
+
+public:
+ RGWCallStatRemoteObjCR(RGWDataSyncEnv *_sync_env,
+ RGWBucketInfo& _bucket_info, rgw_obj_key& _key);
+
+ ~RGWCallStatRemoteObjCR() override {}
+
+ int operate() override;
+
+ virtual RGWStatRemoteObjCBCR *allocate_callback() {
+ return nullptr;
+ }
+};
+
+void rgw_register_sync_modules(RGWSyncModulesManager *modules_manager);
+
+#endif
diff --git a/src/rgw/rgw_sync_module_aws.cc b/src/rgw/rgw_sync_module_aws.cc
new file mode 100644
index 00000000..e8074d8b
--- /dev/null
+++ b/src/rgw/rgw_sync_module_aws.cc
@@ -0,0 +1,1807 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/errno.h"
+
+#include "rgw_common.h"
+#include "rgw_coroutine.h"
+#include "rgw_sync_module.h"
+#include "rgw_data_sync.h"
+#include "rgw_sync_module_aws.h"
+#include "rgw_cr_rados.h"
+#include "rgw_rest_conn.h"
+#include "rgw_cr_rest.h"
+#include "rgw_acl.h"
+#include "rgw_zone.h"
+
+#include "services/svc_zone.h"
+
+#include <boost/asio/yield.hpp>
+
+#define dout_subsys ceph_subsys_rgw
+
+
+#define DEFAULT_MULTIPART_SYNC_PART_SIZE (32 * 1024 * 1024)
+
+static string default_target_path = "rgw-${zonegroup}-${sid}/${bucket}";
+
+static string get_key_oid(const rgw_obj_key& key)
+{
+ string oid = key.name;
+ if (!key.instance.empty() &&
+ !key.have_null_instance()) {
+ oid += string(":") + key.instance;
+ }
+ return oid;
+}
+
+static string obj_to_aws_path(const rgw_obj& obj)
+{
+ string path = obj.bucket.name + "/" + get_key_oid(obj.key);
+
+
+ return path;
+}
+
+/*
+
+ json configuration definition:
+
+ {
+ "connection": {
+ "access_key": <access>,
+ "secret": <secret>,
+ "endpoint": <endpoint>,
+ "host_style": <path | virtual>,
+ },
+ "acls": [ { "type": <id | email | uri>,
+ "source_id": <source_id>,
+ "dest_id": <dest_id> } ... ], # optional, acl mappings, no mappings if does not exist
+ "target_path": <target_path>, # override default
+
+
+ # anything below here is for non trivial configuration
+ # can be used in conjuction with the above
+
+ "default": {
+ "connection": {
+ "access_key": <access>,
+ "secret": <secret>,
+ "endpoint": <endpoint>,
+ "host_style" <path | virtual>,
+ },
+ "acls": [ # list of source uids and how they map into destination uids in the dest objects acls
+ {
+ "type" : <id | email | uri>, # optional, default is id
+ "source_id": <id>,
+ "dest_id": <id>
+ } ... ]
+ "target_path": "rgwx-${sid}/${bucket}" # how a bucket name is mapped to destination path,
+ # final object name will be target_path + "/" + obj
+ },
+ "connections": [
+ {
+ "id": <id>,
+ "access_key": <access>,
+ "secret": <secret>,
+ "endpoint": <endpoint>,
+ } ... ],
+ "acl_profiles": [
+ {
+ "id": <id>, # acl mappings
+ "acls": [ {
+ "type": <id | email | uri>,
+ "source_id": <id>,
+ "dest_id": <id>
+ } ... ]
+ }
+ ],
+ "profiles": [
+ {
+ "source_bucket": <source>, # can specify either specific bucket name (foo), or prefix (foo*)
+ "target_path": <dest>, # (override default)
+ "connection_id": <connection_id>, # optional, if empty references default connection
+ "acls_id": <mappings_id>, # optional, if empty references default mappings
+ } ... ],
+ }
+
+target path optional variables:
+
+(evaluated at init)
+sid: sync instance id, randomly generated by sync process on first sync initalization
+zonegroup: zonegroup name
+zonegroup_id: zonegroup name
+zone: zone name
+zone_id: zone name
+
+(evaluated when syncing)
+bucket: bucket name
+owner: bucket owner
+
+*/
+
+struct ACLMapping {
+ ACLGranteeTypeEnum type{ACL_TYPE_CANON_USER};
+ string source_id;
+ string dest_id;
+
+ ACLMapping() = default;
+
+ ACLMapping(ACLGranteeTypeEnum t,
+ const string& s,
+ const string& d) : type(t),
+ source_id(s),
+ dest_id(d) {}
+
+ void init(const JSONFormattable& config) {
+ const string& t = config["type"];
+
+ if (t == "email") {
+ type = ACL_TYPE_EMAIL_USER;
+ } else if (t == "uri") {
+ type = ACL_TYPE_GROUP;
+ } else {
+ type = ACL_TYPE_CANON_USER;
+ }
+
+ source_id = config["source_id"];
+ dest_id = config["dest_id"];
+ }
+
+ void dump_conf(CephContext *cct, JSONFormatter& jf) const {
+ Formatter::ObjectSection os(jf, "acl_mapping");
+ string s;
+ switch (type) {
+ case ACL_TYPE_EMAIL_USER:
+ s = "email";
+ break;
+ case ACL_TYPE_GROUP:
+ s = "uri";
+ break;
+ default:
+ s = "id";
+ break;
+ }
+ encode_json("type", s, &jf);
+ encode_json("source_id", source_id, &jf);
+ encode_json("dest_id", dest_id, &jf);
+ }
+};
+
+struct ACLMappings {
+ map<string, ACLMapping> acl_mappings;
+
+ void init(const JSONFormattable& config) {
+ for (auto& c : config.array()) {
+ ACLMapping m;
+ m.init(c);
+
+ acl_mappings.emplace(std::make_pair(m.source_id, m));
+ }
+ }
+ void dump_conf(CephContext *cct, JSONFormatter& jf) const {
+ Formatter::ArraySection os(jf, "acls");
+
+ for (auto& i : acl_mappings) {
+ i.second.dump_conf(cct, jf);
+ }
+ }
+};
+
+struct AWSSyncConfig_ACLProfiles {
+ map<string, std::shared_ptr<ACLMappings> > acl_profiles;
+
+ void init(const JSONFormattable& config) {
+ for (auto& c : config.array()) {
+ const string& profile_id = c["id"];
+
+ std::shared_ptr<ACLMappings> ap{new ACLMappings};
+ ap->init(c["acls"]);
+
+ acl_profiles[profile_id] = ap;
+ }
+ }
+ void dump_conf(CephContext *cct, JSONFormatter& jf) const {
+ Formatter::ArraySection section(jf, "acl_profiles");
+
+ for (auto& p : acl_profiles) {
+ Formatter::ObjectSection section(jf, "profile");
+ encode_json("id", p.first, &jf);
+ p.second->dump_conf(cct, jf);
+ }
+ }
+
+ bool find(const string& profile_id, ACLMappings *result) const {
+ auto iter = acl_profiles.find(profile_id);
+ if (iter == acl_profiles.end()) {
+ return false;
+ }
+ *result = *iter->second;
+ return true;
+ }
+};
+
+struct AWSSyncConfig_Connection {
+ string connection_id;
+ string endpoint;
+ RGWAccessKey key;
+ HostStyle host_style{PathStyle};
+
+ bool has_endpoint{false};
+ bool has_key{false};
+ bool has_host_style{false};
+
+ void init(const JSONFormattable& config) {
+ has_endpoint = config.exists("endpoint");
+ has_key = config.exists("access_key") || config.exists("secret");
+ has_host_style = config.exists("host_style");
+
+ connection_id = config["id"];
+ endpoint = config["endpoint"];
+
+ key = RGWAccessKey(config["access_key"], config["secret"]);
+ string host_style_str = config["host_style"];
+ if (host_style_str != "virtual") {
+ host_style = PathStyle;
+ } else {
+ host_style = VirtualStyle;
+ }
+ }
+ void dump_conf(CephContext *cct, JSONFormatter& jf) const {
+ Formatter::ObjectSection section(jf, "connection");
+ encode_json("id", connection_id, &jf);
+ encode_json("endpoint", endpoint, &jf);
+ string s = (host_style == PathStyle ? "path" : "virtual");
+ encode_json("host_style", s, &jf);
+
+ {
+ Formatter::ObjectSection os(jf, "key");
+ encode_json("access_key", key.id, &jf);
+ string secret = (key.key.empty() ? "" : "******");
+ encode_json("secret", secret, &jf);
+ }
+ }
+};
+
+static int conf_to_uint64(CephContext *cct, const JSONFormattable& config, const string& key, uint64_t *pval)
+{
+ string sval;
+ if (config.find(key, &sval)) {
+ string err;
+ uint64_t val = strict_strtoll(sval.c_str(), 10, &err);
+ if (!err.empty()) {
+ ldout(cct, 0) << "ERROR: could not parse configurable value for cloud sync module: " << key << ": " << sval << dendl;
+ return -EINVAL;
+ }
+ *pval = val;
+ }
+ return 0;
+}
+
+struct AWSSyncConfig_S3 {
+ uint64_t multipart_sync_threshold{DEFAULT_MULTIPART_SYNC_PART_SIZE};
+ uint64_t multipart_min_part_size{DEFAULT_MULTIPART_SYNC_PART_SIZE};
+
+ int init(CephContext *cct, const JSONFormattable& config) {
+ int r = conf_to_uint64(cct, config, "multipart_sync_threshold", &multipart_sync_threshold);
+ if (r < 0) {
+ return r;
+ }
+
+ r = conf_to_uint64(cct, config, "multipart_min_part_size", &multipart_min_part_size);
+ if (r < 0) {
+ return r;
+ }
+#define MULTIPART_MIN_POSSIBLE_PART_SIZE (5 * 1024 * 1024)
+ if (multipart_min_part_size < MULTIPART_MIN_POSSIBLE_PART_SIZE) {
+ multipart_min_part_size = MULTIPART_MIN_POSSIBLE_PART_SIZE;
+ }
+ return 0;
+ }
+
+ void dump_conf(CephContext *cct, JSONFormatter& jf) const {
+ Formatter::ObjectSection section(jf, "s3");
+ encode_json("multipart_sync_threshold", multipart_sync_threshold, &jf);
+ encode_json("multipart_min_part_size", multipart_min_part_size, &jf);
+ }
+};
+
+struct AWSSyncConfig_Profile {
+ string source_bucket;
+ bool prefix{false};
+ string target_path;
+ string connection_id;
+ string acls_id;
+
+ std::shared_ptr<AWSSyncConfig_Connection> conn_conf;
+ std::shared_ptr<ACLMappings> acls;
+
+ std::shared_ptr<RGWRESTConn> conn;
+
+ void init(const JSONFormattable& config) {
+ source_bucket = config["source_bucket"];
+
+ prefix = (!source_bucket.empty() && source_bucket[source_bucket.size() - 1] == '*');
+
+ if (prefix) {
+ source_bucket = source_bucket.substr(0, source_bucket.size() - 1);
+ }
+
+ target_path = config["target_path"];
+ connection_id = config["connection_id"];
+ acls_id = config["acls_id"];
+
+ if (config.exists("connection")) {
+ conn_conf = make_shared<AWSSyncConfig_Connection>();
+ conn_conf->init(config["connection"]);
+ }
+
+ if (config.exists("acls")) {
+ acls = make_shared<ACLMappings>();
+ acls->init(config["acls"]);
+ }
+ }
+
+ void dump_conf(CephContext *cct, JSONFormatter& jf, const char *section = "config") const {
+ Formatter::ObjectSection config(jf, section);
+ string sb{source_bucket};
+ if (prefix) {
+ sb.append("*");
+ }
+ encode_json("source_bucket", sb, &jf);
+ encode_json("target_path", target_path, &jf);
+ encode_json("connection_id", connection_id, &jf);
+ encode_json("acls_id", acls_id, &jf);
+ if (conn_conf.get()) {
+ conn_conf->dump_conf(cct, jf);
+ }
+ if (acls.get()) {
+ acls->dump_conf(cct, jf);
+ }
+ }
+};
+
+static void find_and_replace(const string& src, const string& find, const string& replace, string *dest)
+{
+ string s = src;
+
+ size_t pos = s.find(find);
+ while (pos != string::npos) {
+ size_t next_ofs = pos + find.size();
+ s = s.substr(0, pos) + replace + s.substr(next_ofs);
+ pos = s.find(find, next_ofs);
+ }
+
+ *dest = s;
+}
+
+static void apply_meta_param(const string& src, const string& param, const string& val, string *dest)
+{
+ string s = string("${") + param + "}";
+ find_and_replace(src, s, val, dest);
+}
+
+
+struct AWSSyncConfig {
+ AWSSyncConfig_Profile default_profile;
+ std::shared_ptr<AWSSyncConfig_Profile> root_profile;
+
+ map<string, std::shared_ptr<AWSSyncConfig_Connection> > connections;
+ AWSSyncConfig_ACLProfiles acl_profiles;
+
+ map<string, std::shared_ptr<AWSSyncConfig_Profile> > explicit_profiles;
+
+ AWSSyncConfig_S3 s3;
+
+ int init_profile(CephContext *cct, const JSONFormattable& profile_conf, AWSSyncConfig_Profile& profile,
+ bool connection_must_exist) {
+ if (!profile.connection_id.empty()) {
+ if (profile.conn_conf) {
+ ldout(cct, 0) << "ERROR: ambiguous profile connection configuration, connection_id=" << profile.connection_id << dendl;
+ return -EINVAL;
+ }
+ if (connections.find(profile.connection_id) == connections.end()) {
+ ldout(cct, 0) << "ERROR: profile configuration reference non-existent connection_id=" << profile.connection_id << dendl;
+ return -EINVAL;
+ }
+ profile.conn_conf = connections[profile.connection_id];
+ } else if (!profile.conn_conf) {
+ profile.connection_id = default_profile.connection_id;
+ auto i = connections.find(profile.connection_id);
+ if (i != connections.end()) {
+ profile.conn_conf = i->second;
+ }
+ }
+
+ if (connection_must_exist && !profile.conn_conf) {
+ ldout(cct, 0) << "ERROR: remote connection undefined for sync profile" << dendl;
+ return -EINVAL;
+ }
+
+ if (profile.conn_conf && default_profile.conn_conf) {
+ if (!profile.conn_conf->has_endpoint) {
+ profile.conn_conf->endpoint = default_profile.conn_conf->endpoint;
+ }
+ if (!profile.conn_conf->has_host_style) {
+ profile.conn_conf->host_style = default_profile.conn_conf->host_style;
+ }
+ if (!profile.conn_conf->has_key) {
+ profile.conn_conf->key = default_profile.conn_conf->key;
+ }
+ }
+
+ ACLMappings acl_mappings;
+
+ if (!profile.acls_id.empty()) {
+ if (!acl_profiles.find(profile.acls_id, &acl_mappings)) {
+ ldout(cct, 0) << "ERROR: profile configuration reference non-existent acls id=" << profile.acls_id << dendl;
+ return -EINVAL;
+ }
+ profile.acls = acl_profiles.acl_profiles[profile.acls_id];
+ } else if (!profile.acls) {
+ if (default_profile.acls) {
+ profile.acls = default_profile.acls;
+ profile.acls_id = default_profile.acls_id;
+ }
+ }
+
+ if (profile.target_path.empty()) {
+ profile.target_path = default_profile.target_path;
+ }
+ if (profile.target_path.empty()) {
+ profile.target_path = default_target_path;
+ }
+
+ return 0;
+ }
+
+ int init_target(CephContext *cct, const JSONFormattable& profile_conf, std::shared_ptr<AWSSyncConfig_Profile> *ptarget) {
+ std::shared_ptr<AWSSyncConfig_Profile> profile;
+ profile.reset(new AWSSyncConfig_Profile);
+ profile->init(profile_conf);
+
+ int ret = init_profile(cct, profile_conf, *profile, true);
+ if (ret < 0) {
+ return ret;
+ }
+
+ auto& sb = profile->source_bucket;
+
+ if (explicit_profiles.find(sb) != explicit_profiles.end()) {
+ ldout(cct, 0) << "WARNING: duplicate target configuration in sync module" << dendl;
+ }
+
+ explicit_profiles[sb] = profile;
+ if (ptarget) {
+ *ptarget = profile;
+ }
+ return 0;
+ }
+
+ bool do_find_profile(const rgw_bucket bucket, std::shared_ptr<AWSSyncConfig_Profile> *result) {
+ const string& name = bucket.name;
+ auto iter = explicit_profiles.upper_bound(name);
+ if (iter == explicit_profiles.begin()) {
+ return false;
+ }
+
+ --iter;
+ if (iter->first.size() > name.size()) {
+ return false;
+ }
+ if (name.compare(0, iter->first.size(), iter->first) != 0) {
+ return false;
+ }
+
+ std::shared_ptr<AWSSyncConfig_Profile>& target = iter->second;
+
+ if (!target->prefix &&
+ name.size() != iter->first.size()) {
+ return false;
+ }
+
+ *result = target;
+ return true;
+ }
+
+ void find_profile(const rgw_bucket bucket, std::shared_ptr<AWSSyncConfig_Profile> *result) {
+ if (!do_find_profile(bucket, result)) {
+ *result = root_profile;
+ }
+ }
+
+ AWSSyncConfig() {}
+
+ int init(CephContext *cct, const JSONFormattable& config) {
+ auto& default_conf = config["default"];
+
+ if (config.exists("default")) {
+ default_profile.init(default_conf);
+ init_profile(cct, default_conf, default_profile, false);
+ }
+
+ for (auto& conn : config["connections"].array()) {
+ auto new_conn = conn;
+
+ std::shared_ptr<AWSSyncConfig_Connection> c{new AWSSyncConfig_Connection};
+ c->init(new_conn);
+
+ connections[new_conn["id"]] = c;
+ }
+
+ acl_profiles.init(config["acl_profiles"]);
+
+ int r = s3.init(cct, config["s3"]);
+ if (r < 0) {
+ return r;
+ }
+
+ auto new_root_conf = config;
+
+ r = init_target(cct, new_root_conf, &root_profile); /* the root profile config */
+ if (r < 0) {
+ return r;
+ }
+
+ for (auto target_conf : config["profiles"].array()) {
+ int r = init_target(cct, target_conf, nullptr);
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ JSONFormatter jf(true);
+ dump_conf(cct, jf);
+ stringstream ss;
+ jf.flush(ss);
+
+ ldout(cct, 5) << "sync module config (parsed representation):\n" << ss.str() << dendl;
+
+ return 0;
+ }
+
+ void expand_target(RGWDataSyncEnv *sync_env, const string& sid, const string& path, string *dest) {
+ apply_meta_param(path, "sid", sid, dest);
+
+ const RGWZoneGroup& zg = sync_env->store->svc.zone->get_zonegroup();
+ apply_meta_param(path, "zonegroup", zg.get_name(), dest);
+ apply_meta_param(path, "zonegroup_id", zg.get_id(), dest);
+
+ const RGWZone& zone = sync_env->store->svc.zone->get_zone();
+ apply_meta_param(path, "zone", zone.name, dest);
+ apply_meta_param(path, "zone_id", zone.id, dest);
+ }
+
+ void update_config(RGWDataSyncEnv *sync_env, const string& sid) {
+ expand_target(sync_env, sid, root_profile->target_path, &root_profile->target_path);
+ ldout(sync_env->cct, 20) << "updated target: (root) -> " << root_profile->target_path << dendl;
+ for (auto& t : explicit_profiles) {
+ expand_target(sync_env, sid, t.second->target_path, &t.second->target_path);
+ ldout(sync_env->cct, 20) << "updated target: " << t.first << " -> " << t.second->target_path << dendl;
+ }
+ }
+
+ void dump_conf(CephContext *cct, JSONFormatter& jf) const {
+ Formatter::ObjectSection config(jf, "config");
+ root_profile->dump_conf(cct, jf);
+ jf.open_array_section("connections");
+ for (auto c : connections) {
+ c.second->dump_conf(cct, jf);
+ }
+ jf.close_section();
+
+ acl_profiles.dump_conf(cct, jf);
+
+ { // targets
+ Formatter::ArraySection as(jf, "profiles");
+ for (auto& t : explicit_profiles) {
+ Formatter::ObjectSection target_section(jf, "profile");
+ encode_json("name", t.first, &jf);
+ t.second->dump_conf(cct, jf);
+ }
+ }
+ }
+
+ string get_path(std::shared_ptr<AWSSyncConfig_Profile>& profile,
+ const RGWBucketInfo& bucket_info,
+ const rgw_obj_key& obj) {
+ string bucket_str;
+ string owner;
+ if (!bucket_info.owner.tenant.empty()) {
+ bucket_str = owner = bucket_info.owner.tenant + "-";
+ owner += bucket_info.owner.id;
+ }
+ bucket_str += bucket_info.bucket.name;
+
+ const string& path = profile->target_path;
+
+ string new_path;
+ apply_meta_param(path, "bucket", bucket_str, &new_path);
+ apply_meta_param(new_path, "owner", owner, &new_path);
+
+ new_path += string("/") + get_key_oid(obj);
+
+ return new_path;
+ }
+
+ void get_target(std::shared_ptr<AWSSyncConfig_Profile>& profile,
+ const RGWBucketInfo& bucket_info,
+ const rgw_obj_key& obj,
+ string *bucket_name,
+ string *obj_name) {
+ string path = get_path(profile, bucket_info, obj);
+ size_t pos = path.find('/');
+
+ *bucket_name = path.substr(0, pos);
+ *obj_name = path.substr(pos + 1);
+ }
+
+ void init_conns(RGWDataSyncEnv *sync_env, const string& id) {
+ update_config(sync_env, id);
+
+ auto& root_conf = root_profile->conn_conf;
+
+ root_profile->conn.reset(new S3RESTConn(sync_env->cct,
+ sync_env->store->svc.zone,
+ id,
+ { root_conf->endpoint },
+ root_conf->key,
+ root_conf->host_style));
+
+ for (auto i : explicit_profiles) {
+ auto& c = i.second;
+
+ c->conn.reset(new S3RESTConn(sync_env->cct,
+ sync_env->store->svc.zone,
+ id,
+ { c->conn_conf->endpoint },
+ c->conn_conf->key,
+ c->conn_conf->host_style));
+ }
+ }
+};
+
+
+struct AWSSyncInstanceEnv {
+ AWSSyncConfig conf;
+ string id;
+
+ explicit AWSSyncInstanceEnv(AWSSyncConfig& _conf) : conf(_conf) {}
+
+ void init(RGWDataSyncEnv *sync_env, uint64_t instance_id) {
+ char buf[32];
+ snprintf(buf, sizeof(buf), "%llx", (unsigned long long)instance_id);
+ id = buf;
+
+ conf.init_conns(sync_env, id);
+ }
+
+ void get_profile(const rgw_bucket& bucket, std::shared_ptr<AWSSyncConfig_Profile> *ptarget) {
+ conf.find_profile(bucket, ptarget);
+ ceph_assert(ptarget);
+ }
+};
+
+static int do_decode_rest_obj(CephContext *cct, map<string, bufferlist>& attrs, map<string, string>& headers, rgw_rest_obj *info)
+{
+ for (auto header : headers) {
+ const string& val = header.second;
+ if (header.first == "RGWX_OBJECT_SIZE") {
+ info->content_len = atoi(val.c_str());
+ } else {
+ info->attrs[header.first] = val;
+ }
+ }
+
+ info->acls.set_ctx(cct);
+ auto aiter = attrs.find(RGW_ATTR_ACL);
+ if (aiter != attrs.end()) {
+ bufferlist& bl = aiter->second;
+ auto bliter = bl.cbegin();
+ try {
+ info->acls.decode(bliter);
+ } catch (buffer::error& err) {
+ ldout(cct, 0) << "ERROR: failed to decode policy off attrs" << dendl;
+ return -EIO;
+ }
+ } else {
+ ldout(cct, 0) << "WARNING: acl attrs not provided" << dendl;
+ }
+
+ return 0;
+}
+
+class RGWRESTStreamGetCRF : public RGWStreamReadHTTPResourceCRF
+{
+ RGWDataSyncEnv *sync_env;
+ RGWRESTConn *conn;
+ rgw_obj src_obj;
+ RGWRESTConn::get_obj_params req_params;
+
+ rgw_sync_aws_src_obj_properties src_properties;
+public:
+ RGWRESTStreamGetCRF(CephContext *_cct,
+ RGWCoroutinesEnv *_env,
+ RGWCoroutine *_caller,
+ RGWDataSyncEnv *_sync_env,
+ RGWRESTConn *_conn,
+ rgw_obj& _src_obj,
+ const rgw_sync_aws_src_obj_properties& _src_properties) : RGWStreamReadHTTPResourceCRF(_cct, _env, _caller,
+ _sync_env->http_manager, _src_obj.key),
+ sync_env(_sync_env), conn(_conn), src_obj(_src_obj),
+ src_properties(_src_properties) {
+ }
+
+ int init() override {
+ /* init input connection */
+
+
+ req_params.get_op = true;
+ req_params.prepend_metadata = true;
+
+ req_params.unmod_ptr = &src_properties.mtime;
+ req_params.etag = src_properties.etag;
+ req_params.mod_zone_id = src_properties.zone_short_id;
+ req_params.mod_pg_ver = src_properties.pg_ver;
+
+ if (range.is_set) {
+ req_params.range_is_set = true;
+ req_params.range_start = range.ofs;
+ req_params.range_end = range.ofs + range.size - 1;
+ }
+
+ RGWRESTStreamRWRequest *in_req;
+ int ret = conn->get_obj(src_obj, req_params, false /* send */, &in_req);
+ if (ret < 0) {
+ ldout(sync_env->cct, 0) << "ERROR: " << __func__ << "(): conn->get_obj() returned ret=" << ret << dendl;
+ return ret;
+ }
+
+ set_req(in_req);
+
+ return RGWStreamReadHTTPResourceCRF::init();
+ }
+
+ int decode_rest_obj(map<string, string>& headers, bufferlist& extra_data) override {
+ map<string, bufferlist> src_attrs;
+
+ ldout(sync_env->cct, 20) << __func__ << ":" << " headers=" << headers << " extra_data.length()=" << extra_data.length() << dendl;
+
+ if (extra_data.length() > 0) {
+ JSONParser jp;
+ if (!jp.parse(extra_data.c_str(), extra_data.length())) {
+ ldout(sync_env->cct, 0) << "ERROR: failed to parse response extra data. len=" << extra_data.length() << " data=" << extra_data.c_str() << dendl;
+ return -EIO;
+ }
+
+ JSONDecoder::decode_json("attrs", src_attrs, &jp);
+ }
+ return do_decode_rest_obj(sync_env->cct, src_attrs, headers, &rest_obj);
+ }
+
+ bool need_extra_data() override {
+ return true;
+ }
+};
+
+static std::set<string> keep_headers = { "CONTENT_TYPE",
+ "CONTENT_ENCODING",
+ "CONTENT_DISPOSITION",
+ "CONTENT_LANGUAGE" };
+
+class RGWAWSStreamPutCRF : public RGWStreamWriteHTTPResourceCRF
+{
+ RGWDataSyncEnv *sync_env;
+ rgw_sync_aws_src_obj_properties src_properties;
+ std::shared_ptr<AWSSyncConfig_Profile> target;
+ rgw_obj dest_obj;
+ string etag;
+public:
+ RGWAWSStreamPutCRF(CephContext *_cct,
+ RGWCoroutinesEnv *_env,
+ RGWCoroutine *_caller,
+ RGWDataSyncEnv *_sync_env,
+ const rgw_sync_aws_src_obj_properties& _src_properties,
+ std::shared_ptr<AWSSyncConfig_Profile>& _target,
+ rgw_obj& _dest_obj) : RGWStreamWriteHTTPResourceCRF(_cct, _env, _caller, _sync_env->http_manager),
+ sync_env(_sync_env), src_properties(_src_properties), target(_target), dest_obj(_dest_obj) {
+ }
+
+ int init() override {
+ /* init output connection */
+ RGWRESTStreamS3PutObj *out_req{nullptr};
+
+ if (multipart.is_multipart) {
+ char buf[32];
+ snprintf(buf, sizeof(buf), "%d", multipart.part_num);
+ rgw_http_param_pair params[] = { { "uploadId", multipart.upload_id.c_str() },
+ { "partNumber", buf },
+ { nullptr, nullptr } };
+ target->conn->put_obj_send_init(dest_obj, params, &out_req);
+ } else {
+ target->conn->put_obj_send_init(dest_obj, nullptr, &out_req);
+ }
+
+ set_req(out_req);
+
+ return RGWStreamWriteHTTPResourceCRF::init();
+ }
+
+ static bool keep_attr(const string& h) {
+ return (keep_headers.find(h) != keep_headers.end() ||
+ boost::algorithm::starts_with(h, "X_AMZ_"));
+ }
+
+ static void init_send_attrs(CephContext *cct,
+ const rgw_rest_obj& rest_obj,
+ const rgw_sync_aws_src_obj_properties& src_properties,
+ const AWSSyncConfig_Profile *target,
+ map<string, string> *attrs) {
+ auto& new_attrs = *attrs;
+
+ new_attrs.clear();
+
+ for (auto& hi : rest_obj.attrs) {
+ if (keep_attr(hi.first)) {
+ new_attrs.insert(hi);
+ }
+ }
+
+ auto acl = rest_obj.acls.get_acl();
+
+ map<int, vector<string> > access_map;
+
+ if (target->acls) {
+ for (auto& grant : acl.get_grant_map()) {
+ auto& orig_grantee = grant.first;
+ auto& perm = grant.second;
+
+ string grantee;
+
+ const auto& am = target->acls->acl_mappings;
+
+ auto iter = am.find(orig_grantee);
+ if (iter == am.end()) {
+ ldout(cct, 20) << "acl_mappings: Could not find " << orig_grantee << " .. ignoring" << dendl;
+ continue;
+ }
+
+ grantee = iter->second.dest_id;
+
+ string type;
+
+ switch (iter->second.type) {
+ case ACL_TYPE_CANON_USER:
+ type = "id";
+ break;
+ case ACL_TYPE_EMAIL_USER:
+ type = "emailAddress";
+ break;
+ case ACL_TYPE_GROUP:
+ type = "uri";
+ break;
+ default:
+ continue;
+ }
+
+ string tv = type + "=" + grantee;
+
+ int flags = perm.get_permission().get_permissions();
+ if ((flags & RGW_PERM_FULL_CONTROL) == RGW_PERM_FULL_CONTROL) {
+ access_map[flags].push_back(tv);
+ continue;
+ }
+
+ for (int i = 1; i <= RGW_PERM_WRITE_ACP; i <<= 1) {
+ if (flags & i) {
+ access_map[i].push_back(tv);
+ }
+ }
+ }
+ }
+
+ for (auto aiter : access_map) {
+ int grant_type = aiter.first;
+
+ string header_str("x-amz-grant-");
+
+ switch (grant_type) {
+ case RGW_PERM_READ:
+ header_str.append("read");
+ break;
+ case RGW_PERM_WRITE:
+ header_str.append("write");
+ break;
+ case RGW_PERM_READ_ACP:
+ header_str.append("read-acp");
+ break;
+ case RGW_PERM_WRITE_ACP:
+ header_str.append("write-acp");
+ break;
+ case RGW_PERM_FULL_CONTROL:
+ header_str.append("full-control");
+ break;
+ }
+
+ string s;
+
+ for (auto viter : aiter.second) {
+ if (!s.empty()) {
+ s.append(", ");
+ }
+ s.append(viter);
+ }
+
+ ldout(cct, 20) << "acl_mappings: set acl: " << header_str << "=" << s << dendl;
+
+ new_attrs[header_str] = s;
+ }
+
+ char buf[32];
+ snprintf(buf, sizeof(buf), "%llu", (long long)src_properties.versioned_epoch);
+ new_attrs["x-amz-meta-rgwx-versioned-epoch"] = buf;
+
+ utime_t ut(src_properties.mtime);
+ snprintf(buf, sizeof(buf), "%lld.%09lld",
+ (long long)ut.sec(),
+ (long long)ut.nsec());
+
+ new_attrs["x-amz-meta-rgwx-source-mtime"] = buf;
+ new_attrs["x-amz-meta-rgwx-source-etag"] = src_properties.etag;
+ new_attrs["x-amz-meta-rgwx-source-key"] = rest_obj.key.name;
+ if (!rest_obj.key.instance.empty()) {
+ new_attrs["x-amz-meta-rgwx-source-version-id"] = rest_obj.key.instance;
+ }
+ }
+
+ void send_ready(const rgw_rest_obj& rest_obj) override {
+ RGWRESTStreamS3PutObj *r = static_cast<RGWRESTStreamS3PutObj *>(req);
+
+ map<string, string> new_attrs;
+ if (!multipart.is_multipart) {
+ init_send_attrs(sync_env->cct, rest_obj, src_properties, target.get(), &new_attrs);
+ }
+
+ r->set_send_length(rest_obj.content_len);
+
+ RGWAccessControlPolicy policy;
+
+ r->send_ready(target->conn->get_key(), new_attrs, policy, false);
+ }
+
+ void handle_headers(const map<string, string>& headers) {
+ for (auto h : headers) {
+ if (h.first == "ETAG") {
+ etag = h.second;
+ }
+ }
+ }
+
+ bool get_etag(string *petag) {
+ if (etag.empty()) {
+ return false;
+ }
+ *petag = etag;
+ return true;
+ }
+};
+
+
+class RGWAWSStreamObjToCloudPlainCR : public RGWCoroutine {
+ RGWDataSyncEnv *sync_env;
+ RGWRESTConn *source_conn;
+ std::shared_ptr<AWSSyncConfig_Profile> target;
+ rgw_obj src_obj;
+ rgw_obj dest_obj;
+
+ rgw_sync_aws_src_obj_properties src_properties;
+
+ std::shared_ptr<RGWStreamReadHTTPResourceCRF> in_crf;
+ std::shared_ptr<RGWStreamWriteHTTPResourceCRF> out_crf;
+
+public:
+ RGWAWSStreamObjToCloudPlainCR(RGWDataSyncEnv *_sync_env,
+ RGWRESTConn *_source_conn,
+ const rgw_obj& _src_obj,
+ const rgw_sync_aws_src_obj_properties& _src_properties,
+ std::shared_ptr<AWSSyncConfig_Profile> _target,
+ const rgw_obj& _dest_obj) : RGWCoroutine(_sync_env->cct),
+ sync_env(_sync_env),
+ source_conn(_source_conn),
+ target(_target),
+ src_obj(_src_obj),
+ dest_obj(_dest_obj),
+ src_properties(_src_properties) {}
+
+ int operate() override {
+ reenter(this) {
+ /* init input */
+ in_crf.reset(new RGWRESTStreamGetCRF(cct, get_env(), this, sync_env,
+ source_conn, src_obj,
+ src_properties));
+
+ /* init output */
+ out_crf.reset(new RGWAWSStreamPutCRF(cct, get_env(), this, sync_env,
+ src_properties, target, dest_obj));
+
+ yield call(new RGWStreamSpliceCR(cct, sync_env->http_manager, in_crf, out_crf));
+ if (retcode < 0) {
+ return set_cr_error(retcode);
+ }
+
+ return set_cr_done();
+ }
+
+ return 0;
+ }
+};
+
+class RGWAWSStreamObjToCloudMultipartPartCR : public RGWCoroutine {
+ RGWDataSyncEnv *sync_env;
+ RGWRESTConn *source_conn;
+ std::shared_ptr<AWSSyncConfig_Profile> target;
+ rgw_obj src_obj;
+ rgw_obj dest_obj;
+
+ rgw_sync_aws_src_obj_properties src_properties;
+
+ string upload_id;
+
+ rgw_sync_aws_multipart_part_info part_info;
+
+ std::shared_ptr<RGWStreamReadHTTPResourceCRF> in_crf;
+ std::shared_ptr<RGWStreamWriteHTTPResourceCRF> out_crf;
+
+ string *petag;
+
+public:
+ RGWAWSStreamObjToCloudMultipartPartCR(RGWDataSyncEnv *_sync_env,
+ RGWRESTConn *_source_conn,
+ const rgw_obj& _src_obj,
+ std::shared_ptr<AWSSyncConfig_Profile>& _target,
+ const rgw_obj& _dest_obj,
+ const rgw_sync_aws_src_obj_properties& _src_properties,
+ const string& _upload_id,
+ const rgw_sync_aws_multipart_part_info& _part_info,
+ string *_petag) : RGWCoroutine(_sync_env->cct),
+ sync_env(_sync_env),
+ source_conn(_source_conn),
+ target(_target),
+ src_obj(_src_obj),
+ dest_obj(_dest_obj),
+ src_properties(_src_properties),
+ upload_id(_upload_id),
+ part_info(_part_info),
+ petag(_petag) {}
+
+ int operate() override {
+ reenter(this) {
+ /* init input */
+ in_crf.reset(new RGWRESTStreamGetCRF(cct, get_env(), this, sync_env,
+ source_conn, src_obj,
+ src_properties));
+
+ in_crf->set_range(part_info.ofs, part_info.size);
+
+ /* init output */
+ out_crf.reset(new RGWAWSStreamPutCRF(cct, get_env(), this, sync_env,
+ src_properties, target, dest_obj));
+
+ out_crf->set_multipart(upload_id, part_info.part_num, part_info.size);
+
+ yield call(new RGWStreamSpliceCR(cct, sync_env->http_manager, in_crf, out_crf));
+ if (retcode < 0) {
+ return set_cr_error(retcode);
+ }
+
+ if (!(static_cast<RGWAWSStreamPutCRF *>(out_crf.get()))->get_etag(petag)) {
+ ldout(sync_env->cct, 0) << "ERROR: failed to get etag from PUT request" << dendl;
+ return set_cr_error(-EIO);
+ }
+
+ return set_cr_done();
+ }
+
+ return 0;
+ }
+};
+
+class RGWAWSAbortMultipartCR : public RGWCoroutine {
+ RGWDataSyncEnv *sync_env;
+ RGWRESTConn *dest_conn;
+ rgw_obj dest_obj;
+
+ string upload_id;
+
+public:
+ RGWAWSAbortMultipartCR(RGWDataSyncEnv *_sync_env,
+ RGWRESTConn *_dest_conn,
+ const rgw_obj& _dest_obj,
+ const string& _upload_id) : RGWCoroutine(_sync_env->cct),
+ sync_env(_sync_env),
+ dest_conn(_dest_conn),
+ dest_obj(_dest_obj),
+ upload_id(_upload_id) {}
+
+ int operate() override {
+ reenter(this) {
+
+ yield {
+ rgw_http_param_pair params[] = { { "uploadId", upload_id.c_str() }, {nullptr, nullptr} };
+ bufferlist bl;
+ call(new RGWDeleteRESTResourceCR(sync_env->cct, dest_conn, sync_env->http_manager,
+ obj_to_aws_path(dest_obj), params));
+ }
+
+ if (retcode < 0) {
+ ldout(sync_env->cct, 0) << "ERROR: failed to abort multipart upload for dest object=" << dest_obj << " (retcode=" << retcode << ")" << dendl;
+ return set_cr_error(retcode);
+ }
+
+ return set_cr_done();
+ }
+
+ return 0;
+ }
+};
+
+class RGWAWSInitMultipartCR : public RGWCoroutine {
+ RGWDataSyncEnv *sync_env;
+ RGWRESTConn *dest_conn;
+ rgw_obj dest_obj;
+
+ uint64_t obj_size;
+ map<string, string> attrs;
+
+ bufferlist out_bl;
+
+ string *upload_id;
+
+ struct InitMultipartResult {
+ string bucket;
+ string key;
+ string upload_id;
+
+ void decode_xml(XMLObj *obj) {
+ RGWXMLDecoder::decode_xml("Bucket", bucket, obj);
+ RGWXMLDecoder::decode_xml("Key", key, obj);
+ RGWXMLDecoder::decode_xml("UploadId", upload_id, obj);
+ }
+ } result;
+
+public:
+ RGWAWSInitMultipartCR(RGWDataSyncEnv *_sync_env,
+ RGWRESTConn *_dest_conn,
+ const rgw_obj& _dest_obj,
+ uint64_t _obj_size,
+ const map<string, string>& _attrs,
+ string *_upload_id) : RGWCoroutine(_sync_env->cct),
+ sync_env(_sync_env),
+ dest_conn(_dest_conn),
+ dest_obj(_dest_obj),
+ obj_size(_obj_size),
+ attrs(_attrs),
+ upload_id(_upload_id) {}
+
+ int operate() override {
+ reenter(this) {
+
+ yield {
+ rgw_http_param_pair params[] = { { "uploads", nullptr }, {nullptr, nullptr} };
+ bufferlist bl;
+ call(new RGWPostRawRESTResourceCR <bufferlist> (sync_env->cct, dest_conn, sync_env->http_manager,
+ obj_to_aws_path(dest_obj), params, &attrs, bl, &out_bl));
+ }
+
+ if (retcode < 0) {
+ ldout(sync_env->cct, 0) << "ERROR: failed to initialize multipart upload for dest object=" << dest_obj << dendl;
+ return set_cr_error(retcode);
+ }
+ {
+ /*
+ * If one of the following fails we cannot abort upload, as we cannot
+ * extract the upload id. If one of these fail it's very likely that that's
+ * the least of our problem.
+ */
+ RGWXMLDecoder::XMLParser parser;
+ if (!parser.init()) {
+ ldout(sync_env->cct, 0) << "ERROR: failed to initialize xml parser for parsing multipart init response from server" << dendl;
+ return set_cr_error(-EIO);
+ }
+
+ if (!parser.parse(out_bl.c_str(), out_bl.length(), 1)) {
+ string str(out_bl.c_str(), out_bl.length());
+ ldout(sync_env->cct, 5) << "ERROR: failed to parse xml: " << str << dendl;
+ return set_cr_error(-EIO);
+ }
+
+ try {
+ RGWXMLDecoder::decode_xml("InitiateMultipartUploadResult", result, &parser, true);
+ } catch (RGWXMLDecoder::err& err) {
+ string str(out_bl.c_str(), out_bl.length());
+ ldout(sync_env->cct, 5) << "ERROR: unexpected xml: " << str << dendl;
+ return set_cr_error(-EIO);
+ }
+ }
+
+ ldout(sync_env->cct, 20) << "init multipart result: bucket=" << result.bucket << " key=" << result.key << " upload_id=" << result.upload_id << dendl;
+
+ *upload_id = result.upload_id;
+
+ return set_cr_done();
+ }
+
+ return 0;
+ }
+};
+
+class RGWAWSCompleteMultipartCR : public RGWCoroutine {
+ RGWDataSyncEnv *sync_env;
+ RGWRESTConn *dest_conn;
+ rgw_obj dest_obj;
+
+ bufferlist out_bl;
+
+ string upload_id;
+
+ struct CompleteMultipartReq {
+ map<int, rgw_sync_aws_multipart_part_info> parts;
+
+ explicit CompleteMultipartReq(const map<int, rgw_sync_aws_multipart_part_info>& _parts) : parts(_parts) {}
+
+ void dump_xml(Formatter *f) const {
+ for (auto p : parts) {
+ f->open_object_section("Part");
+ encode_xml("PartNumber", p.first, f);
+ encode_xml("ETag", p.second.etag, f);
+ f->close_section();
+ };
+ }
+ } req_enc;
+
+ struct CompleteMultipartResult {
+ string location;
+ string bucket;
+ string key;
+ string etag;
+
+ void decode_xml(XMLObj *obj) {
+ RGWXMLDecoder::decode_xml("Location", bucket, obj);
+ RGWXMLDecoder::decode_xml("Bucket", bucket, obj);
+ RGWXMLDecoder::decode_xml("Key", key, obj);
+ RGWXMLDecoder::decode_xml("ETag", etag, obj);
+ }
+ } result;
+
+public:
+ RGWAWSCompleteMultipartCR(RGWDataSyncEnv *_sync_env,
+ RGWRESTConn *_dest_conn,
+ const rgw_obj& _dest_obj,
+ string _upload_id,
+ const map<int, rgw_sync_aws_multipart_part_info>& _parts) : RGWCoroutine(_sync_env->cct),
+ sync_env(_sync_env),
+ dest_conn(_dest_conn),
+ dest_obj(_dest_obj),
+ upload_id(_upload_id),
+ req_enc(_parts) {}
+
+ int operate() override {
+ reenter(this) {
+
+ yield {
+ rgw_http_param_pair params[] = { { "uploadId", upload_id.c_str() }, {nullptr, nullptr} };
+ stringstream ss;
+ XMLFormatter formatter;
+
+ encode_xml("CompleteMultipartUpload", req_enc, &formatter);
+
+ formatter.flush(ss);
+
+ bufferlist bl;
+ bl.append(ss.str());
+
+ call(new RGWPostRawRESTResourceCR <bufferlist> (sync_env->cct, dest_conn, sync_env->http_manager,
+ obj_to_aws_path(dest_obj), params, nullptr, bl, &out_bl));
+ }
+
+ if (retcode < 0) {
+ ldout(sync_env->cct, 0) << "ERROR: failed to initialize multipart upload for dest object=" << dest_obj << dendl;
+ return set_cr_error(retcode);
+ }
+ {
+ /*
+ * If one of the following fails we cannot abort upload, as we cannot
+ * extract the upload id. If one of these fail it's very likely that that's
+ * the least of our problem.
+ */
+ RGWXMLDecoder::XMLParser parser;
+ if (!parser.init()) {
+ ldout(sync_env->cct, 0) << "ERROR: failed to initialize xml parser for parsing multipart init response from server" << dendl;
+ return set_cr_error(-EIO);
+ }
+
+ if (!parser.parse(out_bl.c_str(), out_bl.length(), 1)) {
+ string str(out_bl.c_str(), out_bl.length());
+ ldout(sync_env->cct, 5) << "ERROR: failed to parse xml: " << str << dendl;
+ return set_cr_error(-EIO);
+ }
+
+ try {
+ RGWXMLDecoder::decode_xml("CompleteMultipartUploadResult", result, &parser, true);
+ } catch (RGWXMLDecoder::err& err) {
+ string str(out_bl.c_str(), out_bl.length());
+ ldout(sync_env->cct, 5) << "ERROR: unexpected xml: " << str << dendl;
+ return set_cr_error(-EIO);
+ }
+ }
+
+ ldout(sync_env->cct, 20) << "complete multipart result: location=" << result.location << " bucket=" << result.bucket << " key=" << result.key << " etag=" << result.etag << dendl;
+
+ return set_cr_done();
+ }
+
+ return 0;
+ }
+};
+
+
+class RGWAWSStreamAbortMultipartUploadCR : public RGWCoroutine {
+ RGWDataSyncEnv *sync_env;
+ RGWRESTConn *dest_conn;
+ const rgw_obj dest_obj;
+ const rgw_raw_obj status_obj;
+
+ string upload_id;
+
+public:
+
+ RGWAWSStreamAbortMultipartUploadCR(RGWDataSyncEnv *_sync_env,
+ RGWRESTConn *_dest_conn,
+ const rgw_obj& _dest_obj,
+ const rgw_raw_obj& _status_obj,
+ const string& _upload_id) : RGWCoroutine(_sync_env->cct), sync_env(_sync_env),
+ dest_conn(_dest_conn),
+ dest_obj(_dest_obj),
+ status_obj(_status_obj),
+ upload_id(_upload_id) {}
+
+ int operate() override {
+ reenter(this) {
+ yield call(new RGWAWSAbortMultipartCR(sync_env, dest_conn, dest_obj, upload_id));
+ if (retcode < 0) {
+ ldout(sync_env->cct, 0) << "ERROR: failed to abort multipart upload dest obj=" << dest_obj << " upload_id=" << upload_id << " retcode=" << retcode << dendl;
+ /* ignore error, best effort */
+ }
+ yield call(new RGWRadosRemoveCR(sync_env->store, status_obj));
+ if (retcode < 0) {
+ ldout(sync_env->cct, 0) << "ERROR: failed to remove sync status obj obj=" << status_obj << " retcode=" << retcode << dendl;
+ /* ignore error, best effort */
+ }
+ return set_cr_done();
+ }
+
+ return 0;
+ }
+};
+
+class RGWAWSStreamObjToCloudMultipartCR : public RGWCoroutine {
+ RGWDataSyncEnv *sync_env;
+ AWSSyncConfig& conf;
+ RGWRESTConn *source_conn;
+ std::shared_ptr<AWSSyncConfig_Profile> target;
+ rgw_obj src_obj;
+ rgw_obj dest_obj;
+
+ uint64_t obj_size;
+ string src_etag;
+ rgw_sync_aws_src_obj_properties src_properties;
+ rgw_rest_obj rest_obj;
+
+ rgw_sync_aws_multipart_upload_info status;
+
+ map<string, string> new_attrs;
+
+ rgw_sync_aws_multipart_part_info *pcur_part_info{nullptr};
+
+ int ret_err{0};
+
+ rgw_raw_obj status_obj;
+
+public:
+ RGWAWSStreamObjToCloudMultipartCR(RGWDataSyncEnv *_sync_env,
+ AWSSyncConfig& _conf,
+ RGWRESTConn *_source_conn,
+ const rgw_obj& _src_obj,
+ std::shared_ptr<AWSSyncConfig_Profile>& _target,
+ const rgw_obj& _dest_obj,
+ uint64_t _obj_size,
+ const rgw_sync_aws_src_obj_properties& _src_properties,
+ const rgw_rest_obj& _rest_obj) : RGWCoroutine(_sync_env->cct),
+ sync_env(_sync_env),
+ conf(_conf),
+ source_conn(_source_conn),
+ target(_target),
+ src_obj(_src_obj),
+ dest_obj(_dest_obj),
+ obj_size(_obj_size),
+ src_properties(_src_properties),
+ rest_obj(_rest_obj),
+ status_obj(sync_env->store->svc.zone->get_zone_params().log_pool,
+ RGWBucketSyncStatusManager::obj_status_oid(sync_env->source_zone, src_obj)) {
+ }
+
+
+ int operate() override {
+ reenter(this) {
+ yield call(new RGWSimpleRadosReadCR<rgw_sync_aws_multipart_upload_info>(sync_env->async_rados, sync_env->store->svc.sysobj,
+ status_obj, &status, false));
+
+ if (retcode < 0 && retcode != -ENOENT) {
+ ldout(sync_env->cct, 0) << "ERROR: failed to read sync status of object " << src_obj << " retcode=" << retcode << dendl;
+ return retcode;
+ }
+
+ if (retcode >= 0) {
+ /* check here that mtime and size did not change */
+
+ if (status.src_properties.mtime != src_properties.mtime || status.obj_size != obj_size ||
+ status.src_properties.etag != src_properties.etag) {
+ yield call(new RGWAWSStreamAbortMultipartUploadCR(sync_env, target->conn.get(), dest_obj, status_obj, status.upload_id));
+ retcode = -ENOENT;
+ }
+ }
+
+ if (retcode == -ENOENT) {
+ RGWAWSStreamPutCRF::init_send_attrs(sync_env->cct, rest_obj, src_properties, target.get(), &new_attrs);
+
+ yield call(new RGWAWSInitMultipartCR(sync_env, target->conn.get(), dest_obj, status.obj_size, std::move(new_attrs), &status.upload_id));
+ if (retcode < 0) {
+ return set_cr_error(retcode);
+ }
+
+ status.obj_size = obj_size;
+ status.src_properties = src_properties;
+#define MULTIPART_MAX_PARTS 10000
+ uint64_t min_part_size = obj_size / MULTIPART_MAX_PARTS;
+ status.part_size = std::max(conf.s3.multipart_min_part_size, min_part_size);
+ status.num_parts = (obj_size + status.part_size - 1) / status.part_size;
+ status.cur_part = 1;
+ }
+
+ for (; (uint32_t)status.cur_part <= status.num_parts; ++status.cur_part) {
+ yield {
+ rgw_sync_aws_multipart_part_info& cur_part_info = status.parts[status.cur_part];
+ cur_part_info.part_num = status.cur_part;
+ cur_part_info.ofs = status.cur_ofs;
+ cur_part_info.size = std::min((uint64_t)status.part_size, status.obj_size - status.cur_ofs);
+
+ pcur_part_info = &cur_part_info;
+
+ status.cur_ofs += status.part_size;
+
+ call(new RGWAWSStreamObjToCloudMultipartPartCR(sync_env,
+ source_conn, src_obj,
+ target,
+ dest_obj,
+ status.src_properties,
+ status.upload_id,
+ cur_part_info,
+ &cur_part_info.etag));
+ }
+
+ if (retcode < 0) {
+ ldout(sync_env->cct, 0) << "ERROR: failed to sync obj=" << src_obj << ", sync via multipart upload, upload_id=" << status.upload_id << " part number " << status.cur_part << " (error: " << cpp_strerror(-retcode) << ")" << dendl;
+ ret_err = retcode;
+ yield call(new RGWAWSStreamAbortMultipartUploadCR(sync_env, target->conn.get(), dest_obj, status_obj, status.upload_id));
+ return set_cr_error(ret_err);
+ }
+
+ yield call(new RGWSimpleRadosWriteCR<rgw_sync_aws_multipart_upload_info>(sync_env->async_rados, sync_env->store->svc.sysobj, status_obj, status));
+ if (retcode < 0) {
+ ldout(sync_env->cct, 0) << "ERROR: failed to store multipart upload state, retcode=" << retcode << dendl;
+ /* continue with upload anyway */
+ }
+ ldout(sync_env->cct, 20) << "sync of object=" << src_obj << " via multipart upload, finished sending part #" << status.cur_part << " etag=" << pcur_part_info->etag << dendl;
+ }
+
+ yield call(new RGWAWSCompleteMultipartCR(sync_env, target->conn.get(), dest_obj, status.upload_id, status.parts));
+ if (retcode < 0) {
+ ldout(sync_env->cct, 0) << "ERROR: failed to complete multipart upload of obj=" << src_obj << " (error: " << cpp_strerror(-retcode) << ")" << dendl;
+ ret_err = retcode;
+ yield call(new RGWAWSStreamAbortMultipartUploadCR(sync_env, target->conn.get(), dest_obj, status_obj, status.upload_id));
+ return set_cr_error(ret_err);
+ }
+
+ /* remove status obj */
+ yield call(new RGWRadosRemoveCR(sync_env->store, status_obj));
+ if (retcode < 0) {
+ ldout(sync_env->cct, 0) << "ERROR: failed to abort multipart upload obj=" << src_obj << " upload_id=" << status.upload_id << " part number " << status.cur_part << " (" << cpp_strerror(-retcode) << ")" << dendl;
+ /* ignore error, best effort */
+ }
+ return set_cr_done();
+ }
+
+ return 0;
+ }
+};
+template <class T>
+int decode_attr(map<string, bufferlist>& attrs, const char *attr_name, T *result, T def_val)
+{
+ map<string, bufferlist>::iterator iter = attrs.find(attr_name);
+ if (iter == attrs.end()) {
+ *result = def_val;
+ return 0;
+ }
+ bufferlist& bl = iter->second;
+ if (bl.length() == 0) {
+ *result = def_val;
+ return 0;
+ }
+ auto bliter = bl.cbegin();
+ try {
+ decode(*result, bliter);
+ } catch (buffer::error& err) {
+ return -EIO;
+ }
+ return 0;
+}
+
+// maybe use Fetch Remote Obj instead?
+class RGWAWSHandleRemoteObjCBCR: public RGWStatRemoteObjCBCR {
+ AWSSyncInstanceEnv& instance;
+
+ uint64_t versioned_epoch{0};
+
+ RGWRESTConn *source_conn{nullptr};
+ std::shared_ptr<AWSSyncConfig_Profile> target;
+ bufferlist res;
+ unordered_map <string, bool> bucket_created;
+ string target_bucket_name;
+ string target_obj_name;
+ rgw_rest_obj rest_obj;
+ int ret{0};
+
+ uint32_t src_zone_short_id{0};
+ uint64_t src_pg_ver{0};
+
+ bufferlist out_bl;
+
+ struct CreateBucketResult {
+ string code;
+
+ void decode_xml(XMLObj *obj) {
+ RGWXMLDecoder::decode_xml("Code", code, obj);
+ }
+ } result;
+
+public:
+ RGWAWSHandleRemoteObjCBCR(RGWDataSyncEnv *_sync_env,
+ RGWBucketInfo& _bucket_info,
+ rgw_obj_key& _key,
+ AWSSyncInstanceEnv& _instance,
+ uint64_t _versioned_epoch) : RGWStatRemoteObjCBCR(_sync_env, _bucket_info, _key),
+ instance(_instance), versioned_epoch(_versioned_epoch)
+ {}
+
+ ~RGWAWSHandleRemoteObjCBCR(){
+ }
+
+ int operate() override {
+ reenter(this) {
+ ret = decode_attr(attrs, RGW_ATTR_PG_VER, &src_pg_ver, (uint64_t)0);
+ if (ret < 0) {
+ ldout(sync_env->cct, 0) << "ERROR: failed to decode pg ver attr, ignoring" << dendl;
+ } else {
+ ret = decode_attr(attrs, RGW_ATTR_SOURCE_ZONE, &src_zone_short_id, (uint32_t)0);
+ if (ret < 0) {
+ ldout(sync_env->cct, 0) << "ERROR: failed to decode source zone short_id attr, ignoring" << dendl;
+ src_pg_ver = 0; /* all or nothing */
+ }
+ }
+ ldout(sync_env->cct, 4) << "AWS: download begin: z=" << sync_env->source_zone
+ << " b=" << bucket_info.bucket << " k=" << key << " size=" << size
+ << " mtime=" << mtime << " etag=" << etag
+ << " zone_short_id=" << src_zone_short_id << " pg_ver=" << src_pg_ver
+ << dendl;
+
+
+ source_conn = sync_env->store->svc.zone->get_zone_conn_by_id(sync_env->source_zone);
+ if (!source_conn) {
+ ldout(sync_env->cct, 0) << "ERROR: cannot find http connection to zone " << sync_env->source_zone << dendl;
+ return set_cr_error(-EINVAL);
+ }
+
+ instance.get_profile(bucket_info.bucket, &target);
+ instance.conf.get_target(target, bucket_info, key, &target_bucket_name, &target_obj_name);
+
+ if (bucket_created.find(target_bucket_name) == bucket_created.end()){
+ yield {
+ ldout(sync_env->cct,0) << "AWS: creating bucket " << target_bucket_name << dendl;
+ bufferlist bl;
+ call(new RGWPutRawRESTResourceCR <bufferlist> (sync_env->cct, target->conn.get(),
+ sync_env->http_manager,
+ target_bucket_name, nullptr, bl, &out_bl));
+ }
+ if (retcode < 0 ) {
+ RGWXMLDecoder::XMLParser parser;
+ if (!parser.init()) {
+ ldout(sync_env->cct, 0) << "ERROR: failed to initialize xml parser for parsing multipart init response from server" << dendl;
+ return set_cr_error(retcode);
+ }
+
+ if (!parser.parse(out_bl.c_str(), out_bl.length(), 1)) {
+ string str(out_bl.c_str(), out_bl.length());
+ ldout(sync_env->cct, 5) << "ERROR: failed to parse xml: " << str << dendl;
+ return set_cr_error(retcode);
+ }
+
+ try {
+ RGWXMLDecoder::decode_xml("Error", result, &parser, true);
+ } catch (RGWXMLDecoder::err& err) {
+ string str(out_bl.c_str(), out_bl.length());
+ ldout(sync_env->cct, 5) << "ERROR: unexpected xml: " << str << dendl;
+ return set_cr_error(retcode);
+ }
+
+ if (result.code != "BucketAlreadyOwnedByYou") {
+ return set_cr_error(retcode);
+ }
+ }
+
+ bucket_created[target_bucket_name] = true;
+ }
+
+ yield {
+ rgw_obj src_obj(bucket_info.bucket, key);
+
+ /* init output */
+ rgw_bucket target_bucket;
+ target_bucket.name = target_bucket_name; /* this is only possible because we only use bucket name for
+ uri resolution */
+ rgw_obj dest_obj(target_bucket, target_obj_name);
+
+
+ rgw_sync_aws_src_obj_properties src_properties;
+ src_properties.mtime = mtime;
+ src_properties.etag = etag;
+ src_properties.zone_short_id = src_zone_short_id;
+ src_properties.pg_ver = src_pg_ver;
+ src_properties.versioned_epoch = versioned_epoch;
+
+ if (size < instance.conf.s3.multipart_sync_threshold) {
+ call(new RGWAWSStreamObjToCloudPlainCR(sync_env, source_conn, src_obj,
+ src_properties,
+ target,
+ dest_obj));
+ } else {
+ rgw_rest_obj rest_obj;
+ rest_obj.init(key);
+ if (do_decode_rest_obj(sync_env->cct, attrs, headers, &rest_obj)) {
+ ldout(sync_env->cct, 0) << "ERROR: failed to decode rest obj out of headers=" << headers << ", attrs=" << attrs << dendl;
+ return set_cr_error(-EINVAL);
+ }
+ call(new RGWAWSStreamObjToCloudMultipartCR(sync_env, instance.conf, source_conn, src_obj,
+ target, dest_obj, size, src_properties, rest_obj));
+ }
+ }
+ if (retcode < 0) {
+ return set_cr_error(retcode);
+ }
+
+ return set_cr_done();
+ }
+
+ return 0;
+ }
+};
+
+class RGWAWSHandleRemoteObjCR : public RGWCallStatRemoteObjCR {
+ AWSSyncInstanceEnv& instance;
+ uint64_t versioned_epoch;
+public:
+ RGWAWSHandleRemoteObjCR(RGWDataSyncEnv *_sync_env,
+ RGWBucketInfo& _bucket_info, rgw_obj_key& _key,
+ AWSSyncInstanceEnv& _instance, uint64_t _versioned_epoch) : RGWCallStatRemoteObjCR(_sync_env, _bucket_info, _key),
+ instance(_instance), versioned_epoch(_versioned_epoch) {
+ }
+
+ ~RGWAWSHandleRemoteObjCR() {}
+
+ RGWStatRemoteObjCBCR *allocate_callback() override {
+ return new RGWAWSHandleRemoteObjCBCR(sync_env, bucket_info, key, instance, versioned_epoch);
+ }
+};
+
+class RGWAWSRemoveRemoteObjCBCR : public RGWCoroutine {
+ RGWDataSyncEnv *sync_env{nullptr};
+ std::shared_ptr<AWSSyncConfig_Profile> target;
+ RGWBucketInfo bucket_info;
+ rgw_obj_key key;
+ ceph::real_time mtime;
+ AWSSyncInstanceEnv& instance;
+ int ret{0};
+public:
+ RGWAWSRemoveRemoteObjCBCR(RGWDataSyncEnv *_sync_env,
+ RGWBucketInfo& _bucket_info, rgw_obj_key& _key, const ceph::real_time& _mtime,
+ AWSSyncInstanceEnv& _instance) : RGWCoroutine(_sync_env->cct), sync_env(_sync_env),
+ bucket_info(_bucket_info), key(_key),
+ mtime(_mtime), instance(_instance) {}
+ int operate() override {
+ reenter(this) {
+ ldout(sync_env->cct, 0) << ": remove remote obj: z=" << sync_env->source_zone
+ << " b=" << bucket_info.bucket << " k=" << key << " mtime=" << mtime << dendl;
+ yield {
+ instance.get_profile(bucket_info.bucket, &target);
+ string path = instance.conf.get_path(target, bucket_info, key);
+ ldout(sync_env->cct, 0) << "AWS: removing aws object at" << path << dendl;
+
+ call(new RGWDeleteRESTResourceCR(sync_env->cct, target->conn.get(),
+ sync_env->http_manager,
+ path, nullptr /* params */));
+ }
+ if (retcode < 0) {
+ return set_cr_error(retcode);
+ }
+ return set_cr_done();
+ }
+ return 0;
+ }
+
+};
+
+
+class RGWAWSDataSyncModule: public RGWDataSyncModule {
+ CephContext *cct;
+ AWSSyncInstanceEnv instance;
+public:
+ RGWAWSDataSyncModule(CephContext *_cct, AWSSyncConfig& _conf) :
+ cct(_cct),
+ instance(_conf) {
+ }
+
+ void init(RGWDataSyncEnv *sync_env, uint64_t instance_id) override {
+ instance.init(sync_env, instance_id);
+ }
+
+ ~RGWAWSDataSyncModule() {}
+
+ RGWCoroutine *sync_object(RGWDataSyncEnv *sync_env, RGWBucketInfo& bucket_info, rgw_obj_key& key,
+ std::optional<uint64_t> versioned_epoch,
+ rgw_zone_set *zones_trace) override {
+ ldout(sync_env->cct, 0) << instance.id << ": sync_object: b=" << bucket_info.bucket << " k=" << key << " versioned_epoch=" << versioned_epoch.value_or(0) << dendl;
+ return new RGWAWSHandleRemoteObjCR(sync_env, bucket_info, key, instance, versioned_epoch.value_or(0));
+ }
+ RGWCoroutine *remove_object(RGWDataSyncEnv *sync_env, RGWBucketInfo& bucket_info, rgw_obj_key& key, real_time& mtime, bool versioned, uint64_t versioned_epoch,
+ rgw_zone_set *zones_trace) override {
+ ldout(sync_env->cct, 0) <<"rm_object: b=" << bucket_info.bucket << " k=" << key << " mtime=" << mtime << " versioned=" << versioned << " versioned_epoch=" << versioned_epoch << dendl;
+ return new RGWAWSRemoveRemoteObjCBCR(sync_env, bucket_info, key, mtime, instance);
+ }
+ RGWCoroutine *create_delete_marker(RGWDataSyncEnv *sync_env, RGWBucketInfo& bucket_info, rgw_obj_key& key, real_time& mtime,
+ rgw_bucket_entry_owner& owner, bool versioned, uint64_t versioned_epoch,
+ rgw_zone_set *zones_trace) override {
+ ldout(sync_env->cct, 0) <<"AWS Not implemented: create_delete_marker: b=" << bucket_info.bucket << " k=" << key << " mtime=" << mtime
+ << " versioned=" << versioned << " versioned_epoch=" << versioned_epoch << dendl;
+ return NULL;
+ }
+};
+
+class RGWAWSSyncModuleInstance : public RGWSyncModuleInstance {
+ RGWAWSDataSyncModule data_handler;
+public:
+ RGWAWSSyncModuleInstance(CephContext *cct, AWSSyncConfig& _conf) : data_handler(cct, _conf) {}
+ RGWDataSyncModule *get_data_handler() override {
+ return &data_handler;
+ }
+};
+
+int RGWAWSSyncModule::create_instance(CephContext *cct, const JSONFormattable& config, RGWSyncModuleInstanceRef *instance){
+ AWSSyncConfig conf;
+
+ int r = conf.init(cct, config);
+ if (r < 0) {
+ return r;
+ }
+
+ instance->reset(new RGWAWSSyncModuleInstance(cct, conf));
+ return 0;
+}
diff --git a/src/rgw/rgw_sync_module_aws.h b/src/rgw/rgw_sync_module_aws.h
new file mode 100644
index 00000000..a44202b5
--- /dev/null
+++ b/src/rgw/rgw_sync_module_aws.h
@@ -0,0 +1,111 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RGW_SYNC_MODULE_AWS_H
+#define RGW_SYNC_MODULE_AWS_H
+
+#include "rgw_sync_module.h"
+
+struct rgw_sync_aws_multipart_part_info {
+ int part_num{0};
+ uint64_t ofs{0};
+ uint64_t size{0};
+ string etag;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(part_num, bl);
+ encode(ofs, bl);
+ encode(size, bl);
+ encode(etag, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(part_num, bl);
+ decode(ofs, bl);
+ decode(size, bl);
+ decode(etag, bl);
+ DECODE_FINISH(bl);
+ }
+};
+WRITE_CLASS_ENCODER(rgw_sync_aws_multipart_part_info)
+
+struct rgw_sync_aws_src_obj_properties {
+ ceph::real_time mtime;
+ string etag;
+ uint32_t zone_short_id{0};
+ uint64_t pg_ver{0};
+ uint64_t versioned_epoch{0};
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(mtime, bl);
+ encode(etag, bl);
+ encode(zone_short_id, bl);
+ encode(pg_ver, bl);
+ encode(versioned_epoch, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(mtime, bl);
+ decode(etag, bl);
+ decode(zone_short_id, bl);
+ decode(pg_ver, bl);
+ decode(versioned_epoch, bl);
+ DECODE_FINISH(bl);
+ }
+};
+WRITE_CLASS_ENCODER(rgw_sync_aws_src_obj_properties)
+
+struct rgw_sync_aws_multipart_upload_info {
+ string upload_id;
+ uint64_t obj_size;
+ rgw_sync_aws_src_obj_properties src_properties;
+ uint32_t part_size{0};
+ uint32_t num_parts{0};
+
+ int cur_part{0};
+ uint64_t cur_ofs{0};
+
+ std::map<int, rgw_sync_aws_multipart_part_info> parts;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(upload_id, bl);
+ encode(obj_size, bl);
+ encode(src_properties, bl);
+ encode(part_size, bl);
+ encode(num_parts, bl);
+ encode(cur_part, bl);
+ encode(cur_ofs, bl);
+ encode(parts, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(upload_id, bl);
+ decode(obj_size, bl);
+ decode(src_properties, bl);
+ decode(part_size, bl);
+ decode(num_parts, bl);
+ decode(cur_part, bl);
+ decode(cur_ofs, bl);
+ decode(parts, bl);
+ DECODE_FINISH(bl);
+ }
+};
+WRITE_CLASS_ENCODER(rgw_sync_aws_multipart_upload_info)
+
+class RGWAWSSyncModule : public RGWSyncModule {
+ public:
+ RGWAWSSyncModule() {}
+ bool supports_data_export() override { return false;}
+ int create_instance(CephContext *cct, const JSONFormattable& config, RGWSyncModuleInstanceRef *instance) override;
+};
+
+#endif /* RGW_SYNC_MODULE_AWS_H */
diff --git a/src/rgw/rgw_sync_module_es.cc b/src/rgw/rgw_sync_module_es.cc
new file mode 100644
index 00000000..36b652a1
--- /dev/null
+++ b/src/rgw/rgw_sync_module_es.cc
@@ -0,0 +1,918 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "rgw_b64.h"
+#include "rgw_common.h"
+#include "rgw_coroutine.h"
+#include "rgw_sync_module.h"
+#include "rgw_data_sync.h"
+#include "rgw_sync_module_es.h"
+#include "rgw_sync_module_es_rest.h"
+#include "rgw_rest_conn.h"
+#include "rgw_cr_rest.h"
+#include "rgw_op.h"
+#include "rgw_es_query.h"
+#include "rgw_zone.h"
+
+#include "services/svc_zone.h"
+
+#include "include/str_list.h"
+
+#include <boost/asio/yield.hpp>
+
+#define dout_subsys ceph_subsys_rgw
+
+
+/*
+ * whitelist utility. Config string is a list of entries, where an entry is either an item,
+ * a prefix, or a suffix. An item would be the name of the entity that we'd look up,
+ * a prefix would be a string ending with an asterisk, a suffix would be a string starting
+ * with an asterisk. For example:
+ *
+ * bucket1, bucket2, foo*, *bar
+ */
+class ItemList {
+ bool approve_all{false};
+
+ set<string> entries;
+ set<string> prefixes;
+ set<string> suffixes;
+
+ void parse(const string& str) {
+ list<string> l;
+
+ get_str_list(str, ",", l);
+
+ for (auto& entry : l) {
+ entry = rgw_trim_whitespace(entry);
+ if (entry.empty()) {
+ continue;
+ }
+
+ if (entry == "*") {
+ approve_all = true;
+ return;
+ }
+
+ if (entry[0] == '*') {
+ suffixes.insert(entry.substr(1));
+ continue;
+ }
+
+ if (entry.back() == '*') {
+ prefixes.insert(entry.substr(0, entry.size() - 1));
+ continue;
+ }
+
+ entries.insert(entry);
+ }
+ }
+
+public:
+ ItemList() {}
+ void init(const string& str, bool def_val) {
+ if (str.empty()) {
+ approve_all = def_val;
+ } else {
+ parse(str);
+ }
+ }
+
+ bool exists(const string& entry) {
+ if (approve_all) {
+ return true;
+ }
+
+ if (entries.find(entry) != entries.end()) {
+ return true;
+ }
+
+ auto i = prefixes.upper_bound(entry);
+ if (i != prefixes.begin()) {
+ --i;
+ if (boost::algorithm::starts_with(entry, *i)) {
+ return true;
+ }
+ }
+
+ for (i = suffixes.begin(); i != suffixes.end(); ++i) {
+ if (boost::algorithm::ends_with(entry, *i)) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+};
+
+#define ES_NUM_SHARDS_MIN 5
+
+#define ES_NUM_SHARDS_DEFAULT 16
+#define ES_NUM_REPLICAS_DEFAULT 1
+
+using ESVersion = std::pair<int,int>;
+static constexpr ESVersion ES_V5{5,0};
+static constexpr ESVersion ES_V7{7,0};
+
+struct ESInfo {
+ std::string name;
+ std::string cluster_name;
+ std::string cluster_uuid;
+ ESVersion version;
+
+ void decode_json(JSONObj *obj);
+
+ std::string get_version_str(){
+ return std::to_string(version.first) + "." + std::to_string(version.second);
+ }
+};
+
+// simple wrapper structure to wrap the es version nested type
+struct es_version_decoder {
+ ESVersion version;
+
+ int parse_version(const std::string& s) {
+ int major, minor;
+ int ret = sscanf(s.c_str(), "%d.%d", &major, &minor);
+ if (ret < 0) {
+ return ret;
+ }
+ version = std::make_pair(major,minor);
+ return 0;
+ }
+
+ void decode_json(JSONObj *obj) {
+ std::string s;
+ JSONDecoder::decode_json("number",s,obj);
+ if (parse_version(s) < 0)
+ throw JSONDecoder::err("Failed to parse ElasticVersion");
+ }
+};
+
+
+void ESInfo::decode_json(JSONObj *obj)
+{
+ JSONDecoder::decode_json("name", name, obj);
+ JSONDecoder::decode_json("cluster_name", cluster_name, obj);
+ JSONDecoder::decode_json("cluster_uuid", cluster_uuid, obj);
+ es_version_decoder esv;
+ JSONDecoder::decode_json("version", esv, obj);
+ version = std::move(esv.version);
+}
+
+struct ElasticConfig {
+ uint64_t sync_instance{0};
+ string id;
+ string index_path;
+ std::unique_ptr<RGWRESTConn> conn;
+ bool explicit_custom_meta{true};
+ string override_index_path;
+ ItemList index_buckets;
+ ItemList allow_owners;
+ uint32_t num_shards{0};
+ uint32_t num_replicas{0};
+ std::map <string,string> default_headers = {{ "Content-Type", "application/json" }};
+ ESInfo es_info;
+
+ void init(CephContext *cct, const JSONFormattable& config) {
+ string elastic_endpoint = config["endpoint"];
+ id = string("elastic:") + elastic_endpoint;
+ conn.reset(new RGWRESTConn(cct, nullptr, id, { elastic_endpoint }));
+ explicit_custom_meta = config["explicit_custom_meta"](true);
+ index_buckets.init(config["index_buckets_list"], true); /* approve all buckets by default */
+ allow_owners.init(config["approved_owners_list"], true); /* approve all bucket owners by default */
+ override_index_path = config["override_index_path"];
+ num_shards = config["num_shards"](ES_NUM_SHARDS_DEFAULT);
+ if (num_shards < ES_NUM_SHARDS_MIN) {
+ num_shards = ES_NUM_SHARDS_MIN;
+ }
+ num_replicas = config["num_replicas"](ES_NUM_REPLICAS_DEFAULT);
+ if (string user = config["username"], pw = config["password"];
+ !user.empty() && !pw.empty()) {
+ auto auth_string = user + ":" + pw;
+ default_headers.emplace("AUTHORIZATION", "Basic " + rgw::to_base64(auth_string));
+ }
+
+ }
+
+ void init_instance(const RGWRealm& realm, uint64_t instance_id) {
+ sync_instance = instance_id;
+
+ if (!override_index_path.empty()) {
+ index_path = override_index_path;
+ return;
+ }
+
+ char buf[32];
+ snprintf(buf, sizeof(buf), "-%08x", (uint32_t)(sync_instance & 0xFFFFFFFF));
+
+ index_path = "/rgw-" + realm.get_name() + buf;
+ }
+
+ string get_index_path() {
+ return index_path;
+ }
+
+ map<string, string>& get_request_headers() {
+ return default_headers;
+ }
+
+ string get_obj_path(const RGWBucketInfo& bucket_info, const rgw_obj_key& key) {
+ if (es_info.version >= ES_V7) {
+ return index_path+ "/_doc/" + url_encode(bucket_info.bucket.bucket_id + ":" + key.name + ":" + (key.instance.empty() ? "null" : key.instance));
+;
+ } else {
+ return index_path + "/object/" + url_encode(bucket_info.bucket.bucket_id + ":" + key.name + ":" + (key.instance.empty() ? "null" : key.instance));
+ }
+ }
+
+ bool should_handle_operation(RGWBucketInfo& bucket_info) {
+ return index_buckets.exists(bucket_info.bucket.name) &&
+ allow_owners.exists(bucket_info.owner.to_str());
+ }
+};
+
+using ElasticConfigRef = std::shared_ptr<ElasticConfig>;
+
+static const char *es_type_to_str(const ESType& t) {
+ switch (t) {
+ case ESType::String: return "string";
+ case ESType::Text: return "text";
+ case ESType::Keyword: return "keyword";
+ case ESType::Long: return "long";
+ case ESType::Integer: return "integer";
+ case ESType::Short: return "short";
+ case ESType::Byte: return "byte";
+ case ESType::Double: return "double";
+ case ESType::Float: return "float";
+ case ESType::Half_Float: return "half_float";
+ case ESType::Scaled_Float: return "scaled_float";
+ case ESType::Date: return "date";
+ case ESType::Boolean: return "boolean";
+ case ESType::Integer_Range: return "integer_range";
+ case ESType::Float_Range: return "float_range";
+ case ESType::Double_Range: return "date_range";
+ case ESType::Date_Range: return "date_range";
+ case ESType::Geo_Point: return "geo_point";
+ case ESType::Ip: return "ip";
+ default:
+ return "<unknown>";
+ }
+}
+
+struct es_type_v2 {
+ ESType estype;
+ const char *format{nullptr};
+ std::optional<bool> analyzed;
+
+ es_type_v2(ESType et) : estype(et) {}
+
+ void dump(Formatter *f) const {
+ const char *type_str = es_type_to_str(estype);
+ encode_json("type", type_str, f);
+ if (format) {
+ encode_json("format", format, f);
+ }
+
+ auto is_analyzed = analyzed;
+
+ if (estype == ESType::String &&
+ !is_analyzed) {
+ is_analyzed = false;
+ }
+
+ if (is_analyzed) {
+ encode_json("index", (is_analyzed.value() ? "analyzed" : "not_analyzed"), f);
+ }
+ }
+};
+
+struct es_type_v5 {
+ ESType estype;
+ const char *format{nullptr};
+ std::optional<bool> analyzed;
+ std::optional<bool> index;
+
+ es_type_v5(ESType et) : estype(et) {}
+
+ void dump(Formatter *f) const {
+ ESType new_estype;
+ if (estype != ESType::String) {
+ new_estype = estype;
+ } else {
+ bool is_analyzed = analyzed.value_or(false);
+ new_estype = (is_analyzed ? ESType::Text : ESType::Keyword);
+ /* index = true; ... Not setting index=true, because that's the default,
+ * and dumping a boolean value *might* be a problem when backporting this
+ * because value might get quoted
+ */
+ }
+
+ const char *type_str = es_type_to_str(new_estype);
+ encode_json("type", type_str, f);
+ if (format) {
+ encode_json("format", format, f);
+ }
+ if (index) {
+ encode_json("index", index.value(), f);
+ }
+ }
+};
+
+template <class T>
+struct es_type : public T {
+ es_type(T t) : T(t) {}
+ es_type& set_format(const char *f) {
+ T::format = f;
+ return *this;
+ }
+
+ es_type& set_analyzed(bool a) {
+ T::analyzed = a;
+ return *this;
+ }
+};
+
+template <class T>
+struct es_index_mappings {
+ ESVersion es_version;
+ ESType string_type {ESType::String};
+
+ es_index_mappings(ESVersion esv):es_version(esv) {
+ }
+
+ es_type<T> est(ESType t) const {
+ return es_type<T>(t);
+ }
+
+ void dump_custom(const char *section, ESType type, const char *format, Formatter *f) const {
+ f->open_object_section(section);
+ ::encode_json("type", "nested", f);
+ f->open_object_section("properties");
+ encode_json("name", est(string_type), f);
+ encode_json("value", est(type).set_format(format), f);
+ f->close_section(); // entry
+ f->close_section(); // custom-string
+ }
+
+ void dump(Formatter *f) const {
+ if (es_version <= ES_V7)
+ f->open_object_section("object");
+ f->open_object_section("properties");
+ encode_json("bucket", est(string_type), f);
+ encode_json("name", est(string_type), f);
+ encode_json("instance", est(string_type), f);
+ encode_json("versioned_epoch", est(ESType::Long), f);
+ f->open_object_section("meta");
+ f->open_object_section("properties");
+ encode_json("cache_control", est(string_type), f);
+ encode_json("content_disposition", est(string_type), f);
+ encode_json("content_encoding", est(string_type), f);
+ encode_json("content_language", est(string_type), f);
+ encode_json("content_type", est(string_type), f);
+ encode_json("storage_class", est(string_type), f);
+ encode_json("etag", est(string_type), f);
+ encode_json("expires", est(string_type), f);
+ encode_json("mtime", est(ESType::Date)
+ .set_format("strict_date_optional_time||epoch_millis"), f);
+ encode_json("size", est(ESType::Long), f);
+ dump_custom("custom-string", string_type, nullptr, f);
+ dump_custom("custom-int", ESType::Long, nullptr, f);
+ dump_custom("custom-date", ESType::Date, "strict_date_optional_time||epoch_millis", f);
+ f->close_section(); // properties
+ f->close_section(); // meta
+ f->close_section(); // properties
+
+ if (es_version <= ES_V7)
+ f->close_section(); // object
+ }
+};
+
+struct es_index_settings {
+ uint32_t num_replicas;
+ uint32_t num_shards;
+
+ es_index_settings(uint32_t _replicas, uint32_t _shards) : num_replicas(_replicas), num_shards(_shards) {}
+
+ void dump(Formatter *f) const {
+ encode_json("number_of_replicas", num_replicas, f);
+ encode_json("number_of_shards", num_shards, f);
+ }
+};
+
+struct es_index_config_base {
+ virtual ~es_index_config_base() {}
+ virtual void dump(Formatter *f) const = 0;
+};
+
+template <class T>
+struct es_index_config : public es_index_config_base {
+ es_index_settings settings;
+ es_index_mappings<T> mappings;
+
+ es_index_config(es_index_settings& _s, ESVersion esv) : settings(_s), mappings(esv) {
+ }
+
+ void dump(Formatter *f) const {
+ encode_json("settings", settings, f);
+ encode_json("mappings", mappings, f);
+ }
+};
+
+static bool is_sys_attr(const std::string& attr_name){
+ static constexpr std::initializer_list<const char*> rgw_sys_attrs =
+ {RGW_ATTR_PG_VER,
+ RGW_ATTR_SOURCE_ZONE,
+ RGW_ATTR_ID_TAG,
+ RGW_ATTR_TEMPURL_KEY1,
+ RGW_ATTR_TEMPURL_KEY2,
+ RGW_ATTR_UNIX1,
+ RGW_ATTR_UNIX_KEY1
+ };
+
+ return std::find(rgw_sys_attrs.begin(), rgw_sys_attrs.end(), attr_name) != rgw_sys_attrs.end();
+}
+
+static size_t attr_len(const bufferlist& val)
+{
+ size_t len = val.length();
+ if (len && val[len - 1] == '\0') {
+ --len;
+ }
+
+ return len;
+}
+
+struct es_obj_metadata {
+ CephContext *cct;
+ ElasticConfigRef es_conf;
+ RGWBucketInfo bucket_info;
+ rgw_obj_key key;
+ ceph::real_time mtime;
+ uint64_t size;
+ map<string, bufferlist> attrs;
+ uint64_t versioned_epoch;
+
+ es_obj_metadata(CephContext *_cct, ElasticConfigRef _es_conf, const RGWBucketInfo& _bucket_info,
+ const rgw_obj_key& _key, ceph::real_time& _mtime, uint64_t _size,
+ map<string, bufferlist>& _attrs, uint64_t _versioned_epoch) : cct(_cct), es_conf(_es_conf), bucket_info(_bucket_info), key(_key),
+ mtime(_mtime), size(_size), attrs(std::move(_attrs)), versioned_epoch(_versioned_epoch) {}
+
+ void dump(Formatter *f) const {
+ map<string, string> out_attrs;
+ map<string, string> custom_meta;
+ RGWAccessControlPolicy policy;
+ set<string> permissions;
+ RGWObjTags obj_tags;
+
+ for (auto i : attrs) {
+ const string& attr_name = i.first;
+ bufferlist& val = i.second;
+
+ if (!boost::algorithm::starts_with(attr_name, RGW_ATTR_PREFIX)) {
+ continue;
+ }
+
+ if (boost::algorithm::starts_with(attr_name, RGW_ATTR_META_PREFIX)) {
+ custom_meta.emplace(attr_name.substr(sizeof(RGW_ATTR_META_PREFIX) - 1),
+ string(val.c_str(), attr_len(val)));
+ continue;
+ }
+
+ if (boost::algorithm::starts_with(attr_name, RGW_ATTR_CRYPT_PREFIX)) {
+ continue;
+ }
+
+ if (boost::algorithm::starts_with(attr_name, RGW_ATTR_OLH_PREFIX)) {
+ // skip versioned object olh info
+ continue;
+ }
+
+ if (attr_name == RGW_ATTR_ACL) {
+ try {
+ auto i = val.cbegin();
+ decode(policy, i);
+ } catch (buffer::error& err) {
+ ldout(cct, 0) << "ERROR: failed to decode acl for " << bucket_info.bucket << "/" << key << dendl;
+ continue;
+ }
+
+ const RGWAccessControlList& acl = policy.get_acl();
+
+ permissions.insert(policy.get_owner().get_id().to_str());
+ for (auto acliter : acl.get_grant_map()) {
+ const ACLGrant& grant = acliter.second;
+ if (grant.get_type().get_type() == ACL_TYPE_CANON_USER &&
+ ((uint32_t)grant.get_permission().get_permissions() & RGW_PERM_READ) != 0) {
+ rgw_user user;
+ if (grant.get_id(user)) {
+ permissions.insert(user.to_str());
+ }
+ }
+ }
+ } else if (attr_name == RGW_ATTR_TAGS) {
+ try {
+ auto tags_bl = val.cbegin();
+ decode(obj_tags, tags_bl);
+ } catch (buffer::error& err) {
+ ldout(cct,0) << "ERROR: failed to decode obj tags for "
+ << bucket_info.bucket << "/" << key << dendl;
+ continue;
+ }
+ } else if (attr_name == RGW_ATTR_COMPRESSION) {
+ RGWCompressionInfo cs_info;
+ try {
+ auto vals_bl = val.cbegin();
+ decode(cs_info, vals_bl);
+ } catch (buffer::error& err) {
+ ldout(cct,0) << "ERROR: failed to decode compression attr for "
+ << bucket_info.bucket << "/" << key << dendl;
+ continue;
+ }
+ out_attrs.emplace("compression",std::move(cs_info.compression_type));
+ } else {
+ if (!is_sys_attr(attr_name)) {
+ out_attrs.emplace(attr_name.substr(sizeof(RGW_ATTR_PREFIX) - 1),
+ std::string(val.c_str(), attr_len(val)));
+ }
+ }
+ }
+ ::encode_json("bucket", bucket_info.bucket.name, f);
+ ::encode_json("name", key.name, f);
+ string instance = key.instance;
+ if (instance.empty())
+ instance = "null";
+ ::encode_json("instance", instance, f);
+ ::encode_json("versioned_epoch", versioned_epoch, f);
+ ::encode_json("owner", policy.get_owner(), f);
+ ::encode_json("permissions", permissions, f);
+ f->open_object_section("meta");
+ ::encode_json("size", size, f);
+
+ string mtime_str;
+ rgw_to_iso8601(mtime, &mtime_str);
+ ::encode_json("mtime", mtime_str, f);
+ for (auto i : out_attrs) {
+ ::encode_json(i.first.c_str(), i.second, f);
+ }
+ map<string, string> custom_str;
+ map<string, string> custom_int;
+ map<string, string> custom_date;
+
+ for (auto i : custom_meta) {
+ auto config = bucket_info.mdsearch_config.find(i.first);
+ if (config == bucket_info.mdsearch_config.end()) {
+ if (!es_conf->explicit_custom_meta) {
+ /* default custom meta is of type string */
+ custom_str[i.first] = i.second;
+ } else {
+ ldout(cct, 20) << "custom meta entry key=" << i.first << " not found in bucket mdsearch config: " << bucket_info.mdsearch_config << dendl;
+ }
+ continue;
+ }
+ switch (config->second) {
+ case ESEntityTypeMap::ES_ENTITY_DATE:
+ custom_date[i.first] = i.second;
+ break;
+ case ESEntityTypeMap::ES_ENTITY_INT:
+ custom_int[i.first] = i.second;
+ break;
+ default:
+ custom_str[i.first] = i.second;
+ }
+ }
+
+ if (!custom_str.empty()) {
+ f->open_array_section("custom-string");
+ for (auto i : custom_str) {
+ f->open_object_section("entity");
+ ::encode_json("name", i.first.c_str(), f);
+ ::encode_json("value", i.second, f);
+ f->close_section();
+ }
+ f->close_section();
+ }
+ if (!custom_int.empty()) {
+ f->open_array_section("custom-int");
+ for (auto i : custom_int) {
+ f->open_object_section("entity");
+ ::encode_json("name", i.first.c_str(), f);
+ ::encode_json("value", i.second, f);
+ f->close_section();
+ }
+ f->close_section();
+ }
+ if (!custom_date.empty()) {
+ f->open_array_section("custom-date");
+ for (auto i : custom_date) {
+ /*
+ * try to exlicitly parse date field, otherwise elasticsearch could reject the whole doc,
+ * which will end up with failed sync
+ */
+ real_time t;
+ int r = parse_time(i.second.c_str(), &t);
+ if (r < 0) {
+ ldout(cct, 20) << __func__ << "(): failed to parse time (" << i.second << "), skipping encoding of custom date attribute" << dendl;
+ continue;
+ }
+
+ string time_str;
+ rgw_to_iso8601(t, &time_str);
+
+ f->open_object_section("entity");
+ ::encode_json("name", i.first.c_str(), f);
+ ::encode_json("value", time_str.c_str(), f);
+ f->close_section();
+ }
+ f->close_section();
+ }
+ f->close_section(); // meta
+ const auto& m = obj_tags.get_tags();
+ if (m.size() > 0){
+ f->open_array_section("tagging");
+ for (const auto &it : m) {
+ f->open_object_section("tag");
+ ::encode_json("key", it.first, f);
+ ::encode_json("value",it.second, f);
+ f->close_section();
+ }
+ f->close_section(); // tagging
+ }
+ }
+};
+
+class RGWElasticInitConfigCBCR : public RGWCoroutine {
+ RGWDataSyncEnv *sync_env;
+ ElasticConfigRef conf;
+ ESInfo es_info;
+
+ struct _err_response {
+ struct err_reason {
+ vector<err_reason> root_cause;
+ string type;
+ string reason;
+ string index;
+
+ void decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("root_cause", root_cause, obj);
+ JSONDecoder::decode_json("type", type, obj);
+ JSONDecoder::decode_json("reason", reason, obj);
+ JSONDecoder::decode_json("index", index, obj);
+ }
+ } error;
+
+ void decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("error", error, obj);
+ }
+ } err_response;
+
+public:
+ RGWElasticInitConfigCBCR(RGWDataSyncEnv *_sync_env,
+ ElasticConfigRef _conf) : RGWCoroutine(_sync_env->cct),
+ sync_env(_sync_env),
+ conf(_conf) {}
+ int operate() override {
+ reenter(this) {
+ ldout(sync_env->cct, 0) << ": init elasticsearch config zone=" << sync_env->source_zone << dendl;
+ yield call(new RGWReadRESTResourceCR<ESInfo> (sync_env->cct,
+ conf->conn.get(),
+ sync_env->http_manager,
+ "/", nullptr /*params*/,
+ &(conf->default_headers),
+ &es_info));
+ if (retcode < 0) {
+ return set_cr_error(retcode);
+ }
+
+ yield {
+ string path = conf->get_index_path();
+ ldout(sync_env->cct, 5) << "got elastic version=" << es_info.get_version_str() << dendl;
+
+ es_index_settings settings(conf->num_replicas, conf->num_shards);
+
+ std::unique_ptr<es_index_config_base> index_conf;
+
+ if (es_info.version >= ES_V5) {
+ ldout(sync_env->cct, 0) << "elasticsearch: index mapping: version >= 5" << dendl;
+ index_conf.reset(new es_index_config<es_type_v5>(settings, es_info.version));
+ } else {
+ ldout(sync_env->cct, 0) << "elasticsearch: index mapping: version < 5" << dendl;
+ index_conf.reset(new es_index_config<es_type_v2>(settings, es_info.version));
+ }
+ call(new RGWPutRESTResourceCR<es_index_config_base, int, _err_response> (sync_env->cct,
+ conf->conn.get(),
+ sync_env->http_manager,
+ path, nullptr /*params*/,
+ &(conf->default_headers),
+ *index_conf, nullptr, &err_response));
+ }
+ if (retcode < 0) {
+ ldout(sync_env->cct, 0) << "elasticsearch: failed to initialize index: response.type=" << err_response.error.type << " response.reason=" << err_response.error.reason << dendl;
+
+ if (err_response.error.type != "index_already_exists_exception" &&
+ err_response.error.type != "resource_already_exists_exception") {
+ return set_cr_error(retcode);
+ }
+
+ ldout(sync_env->cct, 0) << "elasticsearch: index already exists, assuming external initialization" << dendl;
+ }
+ return set_cr_done();
+ }
+ return 0;
+ }
+
+};
+
+class RGWElasticHandleRemoteObjCBCR : public RGWStatRemoteObjCBCR {
+ ElasticConfigRef conf;
+ uint64_t versioned_epoch;
+public:
+ RGWElasticHandleRemoteObjCBCR(RGWDataSyncEnv *_sync_env,
+ RGWBucketInfo& _bucket_info, rgw_obj_key& _key,
+ ElasticConfigRef _conf, uint64_t _versioned_epoch) : RGWStatRemoteObjCBCR(_sync_env, _bucket_info, _key), conf(_conf),
+ versioned_epoch(_versioned_epoch) {}
+ int operate() override {
+ reenter(this) {
+ ldout(sync_env->cct, 10) << ": stat of remote obj: z=" << sync_env->source_zone
+ << " b=" << bucket_info.bucket << " k=" << key
+ << " size=" << size << " mtime=" << mtime << dendl;
+
+ yield {
+ string path = conf->get_obj_path(bucket_info, key);
+ es_obj_metadata doc(sync_env->cct, conf, bucket_info, key, mtime, size, attrs, versioned_epoch);
+
+ call(new RGWPutRESTResourceCR<es_obj_metadata, int>(sync_env->cct, conf->conn.get(),
+ sync_env->http_manager,
+ path, nullptr /* params */,
+ &(conf->default_headers),
+ doc, nullptr /* result */));
+
+ }
+ if (retcode < 0) {
+ return set_cr_error(retcode);
+ }
+ return set_cr_done();
+ }
+ return 0;
+ }
+};
+
+class RGWElasticHandleRemoteObjCR : public RGWCallStatRemoteObjCR {
+ ElasticConfigRef conf;
+ uint64_t versioned_epoch;
+public:
+ RGWElasticHandleRemoteObjCR(RGWDataSyncEnv *_sync_env,
+ RGWBucketInfo& _bucket_info, rgw_obj_key& _key,
+ ElasticConfigRef _conf, uint64_t _versioned_epoch) : RGWCallStatRemoteObjCR(_sync_env, _bucket_info, _key),
+ conf(_conf), versioned_epoch(_versioned_epoch) {
+ }
+
+ ~RGWElasticHandleRemoteObjCR() override {}
+
+ RGWStatRemoteObjCBCR *allocate_callback() override {
+ return new RGWElasticHandleRemoteObjCBCR(sync_env, bucket_info, key, conf, versioned_epoch);
+ }
+};
+
+class RGWElasticRemoveRemoteObjCBCR : public RGWCoroutine {
+ RGWDataSyncEnv *sync_env;
+ RGWBucketInfo bucket_info;
+ rgw_obj_key key;
+ ceph::real_time mtime;
+ ElasticConfigRef conf;
+public:
+ RGWElasticRemoveRemoteObjCBCR(RGWDataSyncEnv *_sync_env,
+ RGWBucketInfo& _bucket_info, rgw_obj_key& _key, const ceph::real_time& _mtime,
+ ElasticConfigRef _conf) : RGWCoroutine(_sync_env->cct), sync_env(_sync_env),
+ bucket_info(_bucket_info), key(_key),
+ mtime(_mtime), conf(_conf) {}
+ int operate() override {
+ reenter(this) {
+ ldout(sync_env->cct, 10) << ": remove remote obj: z=" << sync_env->source_zone
+ << " b=" << bucket_info.bucket << " k=" << key << " mtime=" << mtime << dendl;
+ yield {
+ string path = conf->get_obj_path(bucket_info, key);
+
+ call(new RGWDeleteRESTResourceCR(sync_env->cct, conf->conn.get(),
+ sync_env->http_manager,
+ path, nullptr /* params */));
+ }
+ if (retcode < 0) {
+ return set_cr_error(retcode);
+ }
+ return set_cr_done();
+ }
+ return 0;
+ }
+
+};
+
+class RGWElasticDataSyncModule : public RGWDataSyncModule {
+ ElasticConfigRef conf;
+public:
+ RGWElasticDataSyncModule(CephContext *cct, const JSONFormattable& config) : conf(std::make_shared<ElasticConfig>()) {
+ conf->init(cct, config);
+ }
+ ~RGWElasticDataSyncModule() override {}
+
+ void init(RGWDataSyncEnv *sync_env, uint64_t instance_id) override {
+ conf->init_instance(sync_env->store->svc.zone->get_realm(), instance_id);
+ // try to get elastic search version
+ RGWCoroutinesManager crs(sync_env->store->ctx(), sync_env->store->get_cr_registry());
+ RGWHTTPManager http_manager(sync_env->store->ctx(), crs.get_completion_mgr());
+ int ret = http_manager.start();
+ if (ret < 0) {
+ return;
+ }
+ ret = crs.run(new RGWReadRESTResourceCR<ESInfo>(sync_env->cct,
+ conf->conn.get(),
+ &http_manager,
+ "/", nullptr,
+ &(conf->default_headers),
+ &(conf->es_info)));
+ http_manager.stop();
+ if (ret < 0) {
+ ldout(sync_env->cct, 1) << conf->id << ": fetch elastic info failed: " << ret << dendl;
+ } else {
+ ldout(sync_env->cct, 5) << conf->id << ": got elastic version=" << conf->es_info.get_version_str() << dendl;
+ }
+ }
+
+ RGWCoroutine *init_sync(RGWDataSyncEnv *sync_env) override {
+ ldout(sync_env->cct, 5) << conf->id << ": init" << dendl;
+ return new RGWElasticInitConfigCBCR(sync_env, conf);
+ }
+ RGWCoroutine *sync_object(RGWDataSyncEnv *sync_env, RGWBucketInfo& bucket_info, rgw_obj_key& key, std::optional<uint64_t> versioned_epoch, rgw_zone_set *zones_trace) override {
+ ldout(sync_env->cct, 10) << conf->id << ": sync_object: b=" << bucket_info.bucket << " k=" << key << " versioned_epoch=" << versioned_epoch.value_or(0) << dendl;
+ if (!conf->should_handle_operation(bucket_info)) {
+ ldout(sync_env->cct, 10) << conf->id << ": skipping operation (bucket not approved)" << dendl;
+ return nullptr;
+ }
+ return new RGWElasticHandleRemoteObjCR(sync_env, bucket_info, key, conf, versioned_epoch.value_or(0));
+ }
+ RGWCoroutine *remove_object(RGWDataSyncEnv *sync_env, RGWBucketInfo& bucket_info, rgw_obj_key& key, real_time& mtime, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) override {
+ /* versioned and versioned epoch params are useless in the elasticsearch backend case */
+ ldout(sync_env->cct, 10) << conf->id << ": rm_object: b=" << bucket_info.bucket << " k=" << key << " mtime=" << mtime << " versioned=" << versioned << " versioned_epoch=" << versioned_epoch << dendl;
+ if (!conf->should_handle_operation(bucket_info)) {
+ ldout(sync_env->cct, 10) << conf->id << ": skipping operation (bucket not approved)" << dendl;
+ return nullptr;
+ }
+ return new RGWElasticRemoveRemoteObjCBCR(sync_env, bucket_info, key, mtime, conf);
+ }
+ RGWCoroutine *create_delete_marker(RGWDataSyncEnv *sync_env, RGWBucketInfo& bucket_info, rgw_obj_key& key, real_time& mtime,
+ rgw_bucket_entry_owner& owner, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) override {
+ ldout(sync_env->cct, 10) << conf->id << ": create_delete_marker: b=" << bucket_info.bucket << " k=" << key << " mtime=" << mtime
+ << " versioned=" << versioned << " versioned_epoch=" << versioned_epoch << dendl;
+ ldout(sync_env->cct, 10) << conf->id << ": skipping operation (not handled)" << dendl;
+ return NULL;
+ }
+ RGWRESTConn *get_rest_conn() {
+ return conf->conn.get();
+ }
+
+ string get_index_path() {
+ return conf->get_index_path();
+ }
+
+ map<string, string>& get_request_headers() {
+ return conf->get_request_headers();
+ }
+};
+
+RGWElasticSyncModuleInstance::RGWElasticSyncModuleInstance(CephContext *cct, const JSONFormattable& config)
+{
+ data_handler = std::unique_ptr<RGWElasticDataSyncModule>(new RGWElasticDataSyncModule(cct, config));
+}
+
+RGWDataSyncModule *RGWElasticSyncModuleInstance::get_data_handler()
+{
+ return data_handler.get();
+}
+
+RGWRESTConn *RGWElasticSyncModuleInstance::get_rest_conn()
+{
+ return data_handler->get_rest_conn();
+}
+
+string RGWElasticSyncModuleInstance::get_index_path() {
+ return data_handler->get_index_path();
+}
+
+map<string, string>& RGWElasticSyncModuleInstance::get_request_headers() {
+ return data_handler->get_request_headers();
+}
+
+RGWRESTMgr *RGWElasticSyncModuleInstance::get_rest_filter(int dialect, RGWRESTMgr *orig) {
+ if (dialect != RGW_REST_S3) {
+ return orig;
+ }
+ delete orig;
+ return new RGWRESTMgr_MDSearch_S3();
+}
+
+int RGWElasticSyncModule::create_instance(CephContext *cct, const JSONFormattable& config, RGWSyncModuleInstanceRef *instance) {
+ string endpoint = config["endpoint"];
+ instance->reset(new RGWElasticSyncModuleInstance(cct, config));
+ return 0;
+}
+
diff --git a/src/rgw/rgw_sync_module_es.h b/src/rgw/rgw_sync_module_es.h
new file mode 100644
index 00000000..cb5c9106
--- /dev/null
+++ b/src/rgw/rgw_sync_module_es.h
@@ -0,0 +1,62 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_SYNC_MODULE_ES_H
+#define CEPH_RGW_SYNC_MODULE_ES_H
+
+#include "rgw_sync_module.h"
+
+enum class ESType {
+ /* string datatypes */
+ String, /* Deprecated Since 5.X+ */
+ Text,
+ Keyword,
+
+ /* Numeric Types */
+ Long, Integer, Short, Byte, Double, Float, Half_Float, Scaled_Float,
+
+ /* Date Type */
+ Date,
+
+ /* Boolean */
+ Boolean,
+
+ /* Binary; Must Be Base64 Encoded */
+ Binary,
+
+ /* Range Types */
+ Integer_Range, Float_Range, Long_Range, Double_Range, Date_Range,
+
+ /* A Few Specialized Types */
+ Geo_Point,
+ Ip
+};
+
+
+class RGWElasticSyncModule : public RGWSyncModule {
+public:
+ RGWElasticSyncModule() {}
+ bool supports_data_export() override {
+ return false;
+ }
+ int create_instance(CephContext *cct, const JSONFormattable& config, RGWSyncModuleInstanceRef *instance) override;
+};
+
+class RGWElasticDataSyncModule;
+class RGWRESTConn;
+
+class RGWElasticSyncModuleInstance : public RGWSyncModuleInstance {
+ std::unique_ptr<RGWElasticDataSyncModule> data_handler;
+public:
+ RGWElasticSyncModuleInstance(CephContext *cct, const JSONFormattable& config);
+ RGWDataSyncModule *get_data_handler() override;
+ RGWRESTMgr *get_rest_filter(int dialect, RGWRESTMgr *orig) override;
+ RGWRESTConn *get_rest_conn();
+ std::string get_index_path();
+ map<string, string>& get_request_headers();
+ bool supports_user_writes() override {
+ return true;
+ }
+};
+
+#endif
diff --git a/src/rgw/rgw_sync_module_es_rest.cc b/src/rgw/rgw_sync_module_es_rest.cc
new file mode 100644
index 00000000..751d8220
--- /dev/null
+++ b/src/rgw/rgw_sync_module_es_rest.cc
@@ -0,0 +1,423 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "rgw_sync_module_es.h"
+#include "rgw_sync_module_es_rest.h"
+#include "rgw_es_query.h"
+#include "rgw_op.h"
+#include "rgw_rest.h"
+#include "rgw_rest_s3.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+struct es_index_obj_response {
+ string bucket;
+ rgw_obj_key key;
+ uint64_t versioned_epoch{0};
+ ACLOwner owner;
+ set<string> read_permissions;
+
+ struct {
+ uint64_t size{0};
+ ceph::real_time mtime;
+ string etag;
+ string content_type;
+ string storage_class;
+ map<string, string> custom_str;
+ map<string, int64_t> custom_int;
+ map<string, string> custom_date;
+
+ template <class T>
+ struct _custom_entry {
+ string name;
+ T value;
+ void decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("name", name, obj);
+ JSONDecoder::decode_json("value", value, obj);
+ }
+ };
+
+ void decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("size", size, obj);
+ string mtime_str;
+ JSONDecoder::decode_json("mtime", mtime_str, obj);
+ parse_time(mtime_str.c_str(), &mtime);
+ JSONDecoder::decode_json("etag", etag, obj);
+ JSONDecoder::decode_json("content_type", content_type, obj);
+ JSONDecoder::decode_json("storage_class", storage_class, obj);
+ list<_custom_entry<string> > str_entries;
+ JSONDecoder::decode_json("custom-string", str_entries, obj);
+ for (auto& e : str_entries) {
+ custom_str[e.name] = e.value;
+ }
+ list<_custom_entry<int64_t> > int_entries;
+ JSONDecoder::decode_json("custom-int", int_entries, obj);
+ for (auto& e : int_entries) {
+ custom_int[e.name] = e.value;
+ }
+ list<_custom_entry<string> > date_entries;
+ JSONDecoder::decode_json("custom-date", date_entries, obj);
+ for (auto& e : date_entries) {
+ custom_date[e.name] = e.value;
+ }
+ }
+ } meta;
+
+ void decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("bucket", bucket, obj);
+ JSONDecoder::decode_json("name", key.name, obj);
+ JSONDecoder::decode_json("instance", key.instance, obj);
+ JSONDecoder::decode_json("versioned_epoch", versioned_epoch, obj);
+ JSONDecoder::decode_json("permissions", read_permissions, obj);
+ JSONDecoder::decode_json("owner", owner, obj);
+ JSONDecoder::decode_json("meta", meta, obj);
+ }
+};
+
+struct es_search_response {
+ uint32_t took;
+ bool timed_out;
+ struct {
+ uint32_t total;
+ uint32_t successful;
+ uint32_t failed;
+ void decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("total", total, obj);
+ JSONDecoder::decode_json("successful", successful, obj);
+ JSONDecoder::decode_json("failed", failed, obj);
+ }
+ } shards;
+ struct obj_hit {
+ string index;
+ string type;
+ string id;
+ // double score
+ es_index_obj_response source;
+ void decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("_index", index, obj);
+ JSONDecoder::decode_json("_type", type, obj);
+ JSONDecoder::decode_json("_id", id, obj);
+ JSONDecoder::decode_json("_source", source, obj);
+ }
+ };
+ struct {
+ uint32_t total;
+ // double max_score;
+ list<obj_hit> hits;
+ void decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("total", total, obj);
+ // JSONDecoder::decode_json("max_score", max_score, obj);
+ JSONDecoder::decode_json("hits", hits, obj);
+ }
+ } hits;
+ void decode_json(JSONObj *obj) {
+ JSONDecoder::decode_json("took", took, obj);
+ JSONDecoder::decode_json("timed_out", timed_out, obj);
+ JSONDecoder::decode_json("_shards", shards, obj);
+ JSONDecoder::decode_json("hits", hits, obj);
+ }
+};
+
+class RGWMetadataSearchOp : public RGWOp {
+ RGWSyncModuleInstanceRef sync_module_ref;
+ RGWElasticSyncModuleInstance *es_module;
+protected:
+ string expression;
+ string custom_prefix;
+#define MAX_KEYS_DEFAULT 100
+ uint64_t max_keys{MAX_KEYS_DEFAULT};
+ string marker_str;
+ uint64_t marker{0};
+ string next_marker;
+ bool is_truncated{false};
+ string err;
+
+ es_search_response response;
+
+public:
+ RGWMetadataSearchOp(const RGWSyncModuleInstanceRef& sync_module) : sync_module_ref(sync_module) {
+ es_module = static_cast<RGWElasticSyncModuleInstance *>(sync_module_ref.get());
+ }
+
+ int verify_permission() override {
+ return 0;
+ }
+ virtual int get_params() = 0;
+ void pre_exec() override;
+ void execute() override;
+
+ const char* name() const override { return "metadata_search"; }
+ virtual RGWOpType get_type() override { return RGW_OP_METADATA_SEARCH; }
+ virtual uint32_t op_mask() override { return RGW_OP_TYPE_READ; }
+};
+
+void RGWMetadataSearchOp::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+void RGWMetadataSearchOp::execute()
+{
+ op_ret = get_params();
+ if (op_ret < 0)
+ return;
+
+ list<pair<string, string> > conds;
+
+ if (!s->user->system) {
+ conds.push_back(make_pair("permissions", s->user->user_id.to_str()));
+ }
+
+ if (!s->bucket_name.empty()) {
+ conds.push_back(make_pair("bucket", s->bucket_name));
+ }
+
+ ESQueryCompiler es_query(expression, &conds, custom_prefix);
+
+ static map<string, string, ltstr_nocase> aliases = {
+ { "bucket", "bucket" }, /* forces lowercase */
+ { "name", "name" },
+ { "key", "name" },
+ { "instance", "instance" },
+ { "etag", "meta.etag" },
+ { "size", "meta.size" },
+ { "mtime", "meta.mtime" },
+ { "lastmodified", "meta.mtime" },
+ { "last_modified", "meta.mtime" },
+ { "contenttype", "meta.content_type" },
+ { "content_type", "meta.content_type" },
+ { "storageclass", "meta.storage_class" },
+ { "storage_class", "meta.storage_class" },
+ };
+ es_query.set_field_aliases(&aliases);
+
+ static map<string, ESEntityTypeMap::EntityType> generic_map = { {"bucket", ESEntityTypeMap::ES_ENTITY_STR},
+ {"name", ESEntityTypeMap::ES_ENTITY_STR},
+ {"instance", ESEntityTypeMap::ES_ENTITY_STR},
+ {"permissions", ESEntityTypeMap::ES_ENTITY_STR},
+ {"meta.etag", ESEntityTypeMap::ES_ENTITY_STR},
+ {"meta.content_type", ESEntityTypeMap::ES_ENTITY_STR},
+ {"meta.mtime", ESEntityTypeMap::ES_ENTITY_DATE},
+ {"meta.size", ESEntityTypeMap::ES_ENTITY_INT},
+ {"meta.storage_class", ESEntityTypeMap::ES_ENTITY_STR} };
+ ESEntityTypeMap gm(generic_map);
+ es_query.set_generic_type_map(&gm);
+
+ static set<string> restricted_fields = { {"permissions"} };
+ es_query.set_restricted_fields(&restricted_fields);
+
+ map<string, ESEntityTypeMap::EntityType> custom_map;
+ for (auto& i : s->bucket_info.mdsearch_config) {
+ custom_map[i.first] = (ESEntityTypeMap::EntityType)i.second;
+ }
+
+ ESEntityTypeMap em(custom_map);
+ es_query.set_custom_type_map(&em);
+
+ bool valid = es_query.compile(&err);
+ if (!valid) {
+ ldout(s->cct, 10) << "invalid query, failed generating request json" << dendl;
+ op_ret = -EINVAL;
+ return;
+ }
+
+ JSONFormatter f;
+ encode_json("root", es_query, &f);
+
+ RGWRESTConn *conn = es_module->get_rest_conn();
+
+ bufferlist in;
+ bufferlist out;
+
+ stringstream ss;
+
+ f.flush(ss);
+ in.append(ss.str());
+
+ string resource = es_module->get_index_path() + "/_search";
+ param_vec_t params;
+ static constexpr int BUFSIZE = 32;
+ char buf[BUFSIZE];
+ snprintf(buf, sizeof(buf), "%lld", (long long)max_keys);
+ params.push_back(param_pair_t("size", buf));
+ if (marker > 0) {
+ params.push_back(param_pair_t("from", marker_str.c_str()));
+ }
+ ldout(s->cct, 20) << "sending request to elasticsearch, payload=" << string(in.c_str(), in.length()) << dendl;
+ auto& extra_headers = es_module->get_request_headers();
+ op_ret = conn->get_resource(resource, &params, &extra_headers, out, &in);
+ if (op_ret < 0) {
+ ldout(s->cct, 0) << "ERROR: failed to fetch resource (r=" << resource << ", ret=" << op_ret << ")" << dendl;
+ return;
+ }
+
+ ldout(s->cct, 20) << "response: " << string(out.c_str(), out.length()) << dendl;
+
+ JSONParser jparser;
+ if (!jparser.parse(out.c_str(), out.length())) {
+ ldout(s->cct, 0) << "ERROR: failed to parse elasticsearch response" << dendl;
+ op_ret = -EINVAL;
+ return;
+ }
+
+ try {
+ decode_json_obj(response, &jparser);
+ } catch (JSONDecoder::err& e) {
+ ldout(s->cct, 0) << "ERROR: failed to decode JSON input: " << e.message << dendl;
+ op_ret = -EINVAL;
+ return;
+ }
+
+}
+
+class RGWMetadataSearch_ObjStore_S3 : public RGWMetadataSearchOp {
+public:
+ explicit RGWMetadataSearch_ObjStore_S3(const RGWSyncModuleInstanceRef& _sync_module) : RGWMetadataSearchOp(_sync_module) {
+ custom_prefix = "x-amz-meta-";
+ }
+
+ int get_params() override {
+ expression = s->info.args.get("query");
+ bool exists;
+ string max_keys_str = s->info.args.get("max-keys", &exists);
+#define MAX_KEYS_MAX 10000
+ if (exists) {
+ string err;
+ max_keys = strict_strtoll(max_keys_str.c_str(), 10, &err);
+ if (!err.empty()) {
+ return -EINVAL;
+ }
+ if (max_keys > MAX_KEYS_MAX) {
+ max_keys = MAX_KEYS_MAX;
+ }
+ }
+ marker_str = s->info.args.get("marker", &exists);
+ if (exists) {
+ string err;
+ marker = strict_strtoll(marker_str.c_str(), 10, &err);
+ if (!err.empty()) {
+ return -EINVAL;
+ }
+ }
+ uint64_t nm = marker + max_keys;
+ static constexpr int BUFSIZE = 32;
+ char buf[BUFSIZE];
+ snprintf(buf, sizeof(buf), "%lld", (long long)nm);
+ next_marker = buf;
+ return 0;
+ }
+ void send_response() override {
+ if (op_ret) {
+ s->err.message = err;
+ set_req_state_err(s, op_ret);
+ }
+ dump_errno(s);
+ end_header(s, this, "application/xml");
+
+ if (op_ret < 0) {
+ return;
+ }
+
+ is_truncated = (response.hits.hits.size() >= max_keys);
+
+ s->formatter->open_object_section("SearchMetadataResponse");
+ s->formatter->dump_string("Marker", marker_str);
+ s->formatter->dump_string("IsTruncated", (is_truncated ? "true" : "false"));
+ if (is_truncated) {
+ s->formatter->dump_string("NextMarker", next_marker);
+ }
+ if (s->format == RGW_FORMAT_JSON) {
+ s->formatter->open_array_section("Objects");
+ }
+ for (auto& i : response.hits.hits) {
+ s->formatter->open_object_section("Contents");
+ es_index_obj_response& e = i.source;
+ s->formatter->dump_string("Bucket", e.bucket);
+ s->formatter->dump_string("Key", e.key.name);
+ string instance = (!e.key.instance.empty() ? e.key.instance : "null");
+ s->formatter->dump_string("Instance", instance.c_str());
+ s->formatter->dump_int("VersionedEpoch", e.versioned_epoch);
+ dump_time(s, "LastModified", &e.meta.mtime);
+ s->formatter->dump_int("Size", e.meta.size);
+ s->formatter->dump_format("ETag", "\"%s\"", e.meta.etag.c_str());
+ s->formatter->dump_string("ContentType", e.meta.content_type.c_str());
+ s->formatter->dump_string("StorageClass", e.meta.storage_class.c_str());
+ dump_owner(s, e.owner.get_id(), e.owner.get_display_name());
+ s->formatter->open_array_section("CustomMetadata");
+ for (auto& m : e.meta.custom_str) {
+ s->formatter->open_object_section("Entry");
+ s->formatter->dump_string("Name", m.first.c_str());
+ s->formatter->dump_string("Value", m.second);
+ s->formatter->close_section();
+ }
+ for (auto& m : e.meta.custom_int) {
+ s->formatter->open_object_section("Entry");
+ s->formatter->dump_string("Name", m.first.c_str());
+ s->formatter->dump_int("Value", m.second);
+ s->formatter->close_section();
+ }
+ for (auto& m : e.meta.custom_date) {
+ s->formatter->open_object_section("Entry");
+ s->formatter->dump_string("Name", m.first.c_str());
+ s->formatter->dump_string("Value", m.second);
+ s->formatter->close_section();
+ }
+ s->formatter->close_section();
+ rgw_flush_formatter(s, s->formatter);
+ s->formatter->close_section();
+ };
+ if (s->format == RGW_FORMAT_JSON) {
+ s->formatter->close_section();
+ }
+ s->formatter->close_section();
+ rgw_flush_formatter_and_reset(s, s->formatter);
+ }
+};
+
+class RGWHandler_REST_MDSearch_S3 : public RGWHandler_REST_S3 {
+protected:
+ RGWOp *op_get() override {
+ if (s->info.args.exists("query")) {
+ return new RGWMetadataSearch_ObjStore_S3(store->get_sync_module());
+ }
+ if (!s->init_state.url_bucket.empty() &&
+ s->info.args.exists("mdsearch")) {
+ return new RGWGetBucketMetaSearch_ObjStore_S3;
+ }
+ return nullptr;
+ }
+ RGWOp *op_head() override {
+ return nullptr;
+ }
+ RGWOp *op_post() override {
+ return nullptr;
+ }
+public:
+ explicit RGWHandler_REST_MDSearch_S3(const rgw::auth::StrategyRegistry& auth_registry) : RGWHandler_REST_S3(auth_registry) {}
+ virtual ~RGWHandler_REST_MDSearch_S3() {}
+};
+
+
+RGWHandler_REST* RGWRESTMgr_MDSearch_S3::get_handler(struct req_state* const s,
+ const rgw::auth::StrategyRegistry& auth_registry,
+ const std::string& frontend_prefix)
+{
+ int ret =
+ RGWHandler_REST_S3::init_from_header(s,
+ RGW_FORMAT_XML, true);
+ if (ret < 0) {
+ return nullptr;
+ }
+
+ if (!s->object.empty()) {
+ return nullptr;
+ }
+
+ RGWHandler_REST *handler = new RGWHandler_REST_MDSearch_S3(auth_registry);
+
+ ldout(s->cct, 20) << __func__ << " handler=" << typeid(*handler).name()
+ << dendl;
+ return handler;
+}
+
diff --git a/src/rgw/rgw_sync_module_es_rest.h b/src/rgw/rgw_sync_module_es_rest.h
new file mode 100644
index 00000000..b31b8e2c
--- /dev/null
+++ b/src/rgw/rgw_sync_module_es_rest.h
@@ -0,0 +1,20 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_SYNC_MODULE_ES_REST_H
+#define CEPH_RGW_SYNC_MODULE_ES_REST_H
+
+#include "rgw_rest.h"
+
+class RGWElasticSyncModuleInstance;
+
+class RGWRESTMgr_MDSearch_S3 : public RGWRESTMgr {
+public:
+ explicit RGWRESTMgr_MDSearch_S3() {}
+
+ RGWHandler_REST *get_handler(struct req_state* s,
+ const rgw::auth::StrategyRegistry& auth_registry,
+ const std::string& frontend_prefix) override;
+};
+
+#endif
diff --git a/src/rgw/rgw_sync_module_log.cc b/src/rgw/rgw_sync_module_log.cc
new file mode 100644
index 00000000..2b893aad
--- /dev/null
+++ b/src/rgw/rgw_sync_module_log.cc
@@ -0,0 +1,74 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "rgw_common.h"
+#include "rgw_coroutine.h"
+#include "rgw_cr_rados.h"
+#include "rgw_sync_module.h"
+#include "rgw_data_sync.h"
+#include "rgw_sync_module_log.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+class RGWLogStatRemoteObjCBCR : public RGWStatRemoteObjCBCR {
+public:
+ RGWLogStatRemoteObjCBCR(RGWDataSyncEnv *_sync_env,
+ RGWBucketInfo& _bucket_info, rgw_obj_key& _key) : RGWStatRemoteObjCBCR(_sync_env, _bucket_info, _key) {}
+ int operate() override {
+ ldout(sync_env->cct, 0) << "SYNC_LOG: stat of remote obj: z=" << sync_env->source_zone
+ << " b=" << bucket_info.bucket << " k=" << key << " size=" << size << " mtime=" << mtime
+ << " attrs=" << attrs << dendl;
+ return set_cr_done();
+ }
+
+};
+
+class RGWLogStatRemoteObjCR : public RGWCallStatRemoteObjCR {
+public:
+ RGWLogStatRemoteObjCR(RGWDataSyncEnv *_sync_env,
+ RGWBucketInfo& _bucket_info, rgw_obj_key& _key) : RGWCallStatRemoteObjCR(_sync_env, _bucket_info, _key) {
+ }
+
+ ~RGWLogStatRemoteObjCR() override {}
+
+ RGWStatRemoteObjCBCR *allocate_callback() override {
+ return new RGWLogStatRemoteObjCBCR(sync_env, bucket_info, key);
+ }
+};
+
+class RGWLogDataSyncModule : public RGWDataSyncModule {
+ string prefix;
+public:
+ explicit RGWLogDataSyncModule(const string& _prefix) : prefix(_prefix) {}
+
+ RGWCoroutine *sync_object(RGWDataSyncEnv *sync_env, RGWBucketInfo& bucket_info, rgw_obj_key& key, std::optional<uint64_t> versioned_epoch, rgw_zone_set *zones_trace) override {
+ ldout(sync_env->cct, 0) << prefix << ": SYNC_LOG: sync_object: b=" << bucket_info.bucket << " k=" << key << " versioned_epoch=" << versioned_epoch.value_or(0) << dendl;
+ return new RGWLogStatRemoteObjCR(sync_env, bucket_info, key);
+ }
+ RGWCoroutine *remove_object(RGWDataSyncEnv *sync_env, RGWBucketInfo& bucket_info, rgw_obj_key& key, real_time& mtime, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) override {
+ ldout(sync_env->cct, 0) << prefix << ": SYNC_LOG: rm_object: b=" << bucket_info.bucket << " k=" << key << " mtime=" << mtime << " versioned=" << versioned << " versioned_epoch=" << versioned_epoch << dendl;
+ return NULL;
+ }
+ RGWCoroutine *create_delete_marker(RGWDataSyncEnv *sync_env, RGWBucketInfo& bucket_info, rgw_obj_key& key, real_time& mtime,
+ rgw_bucket_entry_owner& owner, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) override {
+ ldout(sync_env->cct, 0) << prefix << ": SYNC_LOG: create_delete_marker: b=" << bucket_info.bucket << " k=" << key << " mtime=" << mtime
+ << " versioned=" << versioned << " versioned_epoch=" << versioned_epoch << dendl;
+ return NULL;
+ }
+};
+
+class RGWLogSyncModuleInstance : public RGWSyncModuleInstance {
+ RGWLogDataSyncModule data_handler;
+public:
+ explicit RGWLogSyncModuleInstance(const string& prefix) : data_handler(prefix) {}
+ RGWDataSyncModule *get_data_handler() override {
+ return &data_handler;
+ }
+};
+
+int RGWLogSyncModule::create_instance(CephContext *cct, const JSONFormattable& config, RGWSyncModuleInstanceRef *instance) {
+ string prefix = config["prefix"];
+ instance->reset(new RGWLogSyncModuleInstance(prefix));
+ return 0;
+}
+
diff --git a/src/rgw/rgw_sync_module_log.h b/src/rgw/rgw_sync_module_log.h
new file mode 100644
index 00000000..d0059e32
--- /dev/null
+++ b/src/rgw/rgw_sync_module_log.h
@@ -0,0 +1,18 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_SYNC_MODULE_LOG_H
+#define CEPH_RGW_SYNC_MODULE_LOG_H
+
+#include "rgw_sync_module.h"
+
+class RGWLogSyncModule : public RGWSyncModule {
+public:
+ RGWLogSyncModule() {}
+ bool supports_data_export() override {
+ return false;
+ }
+ int create_instance(CephContext *cct, const JSONFormattable& config, RGWSyncModuleInstanceRef *instance) override;
+};
+
+#endif
diff --git a/src/rgw/rgw_sync_module_pubsub.cc b/src/rgw/rgw_sync_module_pubsub.cc
new file mode 100644
index 00000000..fd514b81
--- /dev/null
+++ b/src/rgw/rgw_sync_module_pubsub.cc
@@ -0,0 +1,1578 @@
+#include "rgw_common.h"
+#include "rgw_coroutine.h"
+#include "rgw_sync_module.h"
+#include "rgw_data_sync.h"
+#include "rgw_sync_module_pubsub.h"
+#include "rgw_sync_module_pubsub_rest.h"
+#include "rgw_rest_conn.h"
+#include "rgw_cr_rados.h"
+#include "rgw_cr_rest.h"
+#include "rgw_cr_tools.h"
+#include "rgw_op.h"
+#include "rgw_pubsub.h"
+#include "rgw_pubsub_push.h"
+#include "rgw_notify_event_type.h"
+#include "rgw_perf_counters.h"
+#ifdef WITH_RADOSGW_AMQP_ENDPOINT
+#include "rgw_amqp.h"
+#endif
+#ifdef WITH_RADOSGW_KAFKA_ENDPOINT
+#include "rgw_kafka.h"
+#endif
+
+#include <boost/algorithm/hex.hpp>
+#include <boost/asio/yield.hpp>
+
+#define dout_subsys ceph_subsys_rgw
+
+
+#define PUBSUB_EVENTS_RETENTION_DEFAULT 7
+
+/*
+
+config:
+
+{
+ "tenant": <tenant>, # default: <empty>
+ "uid": <uid>, # default: "pubsub"
+ "data_bucket_prefix": <prefix> # default: "pubsub-"
+ "data_oid_prefix": <prefix> #
+ "events_retention_days": <int> # default: 7
+ "start_with_full_sync" <bool> # default: false
+
+ # non-dynamic config
+ "notifications": [
+ {
+ "path": <notification-path>, # this can be either an explicit path: <bucket>, or <bucket>/<object>,
+ # or a prefix if it ends with a wildcard
+ "topic": <topic-name>
+ },
+ ...
+ ],
+ "subscriptions": [
+ {
+ "name": <subscription-name>,
+ "topic": <topic>,
+ "push_endpoint": <endpoint>,
+ "push_endpoint_args:" <arg list>. # any push endpoint specific args (include all args)
+ "data_bucket": <bucket>, # override name of bucket where subscription data will be store
+ "data_oid_prefix": <prefix> # set prefix for subscription data object ids
+ "s3_id": <id> # in case of S3 compatible notifications, the notification ID will be set here
+ },
+ ...
+ ]
+}
+
+*/
+
+// utility function to convert the args list from string format
+// (ampresend separated with equal sign) to prased structure
+RGWHTTPArgs string_to_args(const std::string& str_args) {
+ RGWHTTPArgs args;
+ args.set(str_args);
+ args.parse();
+ return args;
+}
+
+struct PSSubConfig {
+ std::string name;
+ std::string topic;
+ std::string push_endpoint_name;
+ std::string push_endpoint_args;
+ std::string data_bucket_name;
+ std::string data_oid_prefix;
+ std::string s3_id;
+ std::string arn_topic;
+ RGWPubSubEndpoint::Ptr push_endpoint;
+
+ void from_user_conf(CephContext *cct, const rgw_pubsub_sub_config& uc) {
+ name = uc.name;
+ topic = uc.topic;
+ push_endpoint_name = uc.dest.push_endpoint;
+ data_bucket_name = uc.dest.bucket_name;
+ data_oid_prefix = uc.dest.oid_prefix;
+ s3_id = uc.s3_id;
+ arn_topic = uc.dest.arn_topic;
+ if (!push_endpoint_name.empty()) {
+ push_endpoint_args = uc.dest.push_endpoint_args;
+ try {
+ push_endpoint = RGWPubSubEndpoint::create(push_endpoint_name, arn_topic, string_to_args(push_endpoint_args), cct);
+ ldout(cct, 20) << "push endpoint created: " << push_endpoint->to_str() << dendl;
+ } catch (const RGWPubSubEndpoint::configuration_error& e) {
+ ldout(cct, 1) << "ERROR: failed to create push endpoint: "
+ << push_endpoint_name << " due to: " << e.what() << dendl;
+ }
+ }
+ }
+
+ void dump(Formatter *f) const {
+ encode_json("name", name, f);
+ encode_json("topic", topic, f);
+ encode_json("push_endpoint", push_endpoint_name, f);
+ encode_json("push_endpoint_args", push_endpoint_args, f);
+ encode_json("data_bucket_name", data_bucket_name, f);
+ encode_json("data_oid_prefix", data_oid_prefix, f);
+ encode_json("s3_id", s3_id, f);
+ }
+
+ void init(CephContext *cct, const JSONFormattable& config,
+ const string& data_bucket_prefix,
+ const string& default_oid_prefix) {
+ name = config["name"];
+ topic = config["topic"];
+ push_endpoint_name = config["push_endpoint"];
+ string default_bucket_name = data_bucket_prefix + name;
+ data_bucket_name = config["data_bucket"](default_bucket_name.c_str());
+ data_oid_prefix = config["data_oid_prefix"](default_oid_prefix.c_str());
+ s3_id = config["s3_id"];
+ arn_topic = config["arn_topic"];
+ if (!push_endpoint_name.empty()) {
+ push_endpoint_args = config["push_endpoint_args"];
+ try {
+ push_endpoint = RGWPubSubEndpoint::create(push_endpoint_name, arn_topic, string_to_args(push_endpoint_args), cct);
+ ldout(cct, 20) << "push endpoint created: " << push_endpoint->to_str() << dendl;
+ } catch (const RGWPubSubEndpoint::configuration_error& e) {
+ ldout(cct, 1) << "ERROR: failed to create push endpoint: "
+ << push_endpoint_name << " due to: " << e.what() << dendl;
+ }
+ }
+ }
+};
+
+using PSSubConfigRef = std::shared_ptr<PSSubConfig>;
+
+struct PSTopicConfig {
+ std::string name;
+ std::set<std::string> subs;
+ std::string opaque_data;
+
+ void dump(Formatter *f) const {
+ encode_json("name", name, f);
+ encode_json("subs", subs, f);
+ encode_json("opaque", opaque_data, f);
+ }
+};
+
+struct PSNotificationConfig {
+ uint64_t id{0};
+ string path; /* a path or a path prefix that would trigger the event (prefix: if ends with a wildcard) */
+ string topic;
+ bool is_prefix{false};
+
+
+ void dump(Formatter *f) const {
+ encode_json("id", id, f);
+ encode_json("path", path, f);
+ encode_json("topic", topic, f);
+ encode_json("is_prefix", is_prefix, f);
+ }
+
+ void init(CephContext *cct, const JSONFormattable& config) {
+ path = config["path"];
+ if (!path.empty() && path[path.size() - 1] == '*') {
+ path = path.substr(0, path.size() - 1);
+ is_prefix = true;
+ }
+ topic = config["topic"];
+ }
+};
+
+template<class T>
+static string json_str(const char *name, const T& obj, bool pretty = false)
+{
+ stringstream ss;
+ JSONFormatter f(pretty);
+
+ encode_json(name, obj, &f);
+ f.flush(ss);
+
+ return ss.str();
+}
+
+using PSTopicConfigRef = std::shared_ptr<PSTopicConfig>;
+using TopicsRef = std::shared_ptr<std::vector<PSTopicConfigRef>>;
+
+struct PSConfig {
+ const std::string id{"pubsub"};
+ rgw_user user;
+ std::string data_bucket_prefix;
+ std::string data_oid_prefix;
+
+ int events_retention_days{0};
+
+ uint64_t sync_instance{0};
+ uint64_t max_id{0};
+
+ /* FIXME: no hard coded buckets, we'll have configurable topics */
+ std::map<std::string, PSSubConfigRef> subs;
+ std::map<std::string, PSTopicConfigRef> topics;
+ std::multimap<std::string, PSNotificationConfig> notifications;
+
+ bool start_with_full_sync{false};
+
+ void dump(Formatter *f) const {
+ encode_json("id", id, f);
+ encode_json("user", user, f);
+ encode_json("data_bucket_prefix", data_bucket_prefix, f);
+ encode_json("data_oid_prefix", data_oid_prefix, f);
+ encode_json("events_retention_days", events_retention_days, f);
+ encode_json("sync_instance", sync_instance, f);
+ encode_json("max_id", max_id, f);
+ {
+ Formatter::ArraySection section(*f, "subs");
+ for (auto& sub : subs) {
+ encode_json("sub", *sub.second, f);
+ }
+ }
+ {
+ Formatter::ArraySection section(*f, "topics");
+ for (auto& topic : topics) {
+ encode_json("topic", *topic.second, f);
+ }
+ }
+ {
+ Formatter::ObjectSection section(*f, "notifications");
+ std::string last;
+ for (auto& notif : notifications) {
+ const string& n = notif.first;
+ if (n != last) {
+ if (!last.empty()) {
+ f->close_section();
+ }
+ f->open_array_section(n.c_str());
+ }
+ last = n;
+ encode_json("notifications", notif.second, f);
+ }
+ if (!last.empty()) {
+ f->close_section();
+ }
+ }
+ encode_json("start_with_full_sync", start_with_full_sync, f);
+ }
+
+ void init(CephContext *cct, const JSONFormattable& config) {
+ string uid = config["uid"]("pubsub");
+ user = rgw_user(config["tenant"], uid);
+ data_bucket_prefix = config["data_bucket_prefix"]("pubsub-");
+ data_oid_prefix = config["data_oid_prefix"];
+ events_retention_days = config["events_retention_days"](PUBSUB_EVENTS_RETENTION_DEFAULT);
+
+ for (auto& c : config["notifications"].array()) {
+ PSNotificationConfig nc;
+ nc.id = ++max_id;
+ nc.init(cct, c);
+ notifications.insert(std::make_pair(nc.path, nc));
+
+ PSTopicConfig topic_config = { .name = nc.topic };
+ topics[nc.topic] = make_shared<PSTopicConfig>(topic_config);
+ }
+ for (auto& c : config["subscriptions"].array()) {
+ auto sc = std::make_shared<PSSubConfig>();
+ sc->init(cct, c, data_bucket_prefix, data_oid_prefix);
+ subs[sc->name] = sc;
+ auto iter = topics.find(sc->topic);
+ if (iter != topics.end()) {
+ iter->second->subs.insert(sc->name);
+ }
+ }
+ start_with_full_sync = config["start_with_full_sync"](false);
+
+ ldout(cct, 5) << "pubsub: module config (parsed representation):\n" << json_str("config", *this, true) << dendl;
+ }
+
+ void init_instance(const RGWRealm& realm, uint64_t instance_id) {
+ sync_instance = instance_id;
+ }
+
+ void get_topics(CephContext *cct, const rgw_bucket& bucket, const rgw_obj_key& key, TopicsRef *result) {
+ const std::string path = bucket.name + "/" + key.name;
+
+ auto iter = notifications.upper_bound(path);
+ if (iter == notifications.begin()) {
+ return;
+ }
+
+ do {
+ --iter;
+ if (iter->first.size() > path.size()) {
+ break;
+ }
+ if (path.compare(0, iter->first.size(), iter->first) != 0) {
+ break;
+ }
+
+ PSNotificationConfig& target = iter->second;
+
+ if (!target.is_prefix &&
+ path.size() != iter->first.size()) {
+ continue;
+ }
+
+ auto topic = topics.find(target.topic);
+ if (topic == topics.end()) {
+ continue;
+ }
+
+ ldout(cct, 20) << ": found topic for path=" << bucket << "/" << key << ": id=" << target.id <<
+ " target_path=" << target.path << ", topic=" << target.topic << dendl;
+ (*result)->push_back(topic->second);
+ } while (iter != notifications.begin());
+ }
+
+ bool find_sub(const string& name, PSSubConfigRef *ref) {
+ auto iter = subs.find(name);
+ if (iter != subs.end()) {
+ *ref = iter->second;
+ return true;
+ }
+ return false;
+ }
+};
+
+using PSConfigRef = std::shared_ptr<PSConfig>;
+template<typename EventType>
+using EventRef = std::shared_ptr<EventType>;
+
+struct objstore_event {
+ string id;
+ const rgw_bucket& bucket;
+ const rgw_obj_key& key;
+ const ceph::real_time& mtime;
+ const std::vector<std::pair<std::string, std::string> > *attrs;
+
+ objstore_event(const rgw_bucket& _bucket,
+ const rgw_obj_key& _key,
+ const ceph::real_time& _mtime,
+ const std::vector<std::pair<std::string, std::string> > *_attrs) : bucket(_bucket),
+ key(_key),
+ mtime(_mtime),
+ attrs(_attrs) {}
+
+ string get_hash() {
+ string etag;
+ RGWMD5Etag hash;
+ hash.update(bucket.bucket_id);
+ hash.update(key.name);
+ hash.update(key.instance);
+ hash.finish(&etag);
+
+ assert(etag.size() > 8);
+
+ return etag.substr(0, 8);
+ }
+
+ void dump(Formatter *f) const {
+ {
+ Formatter::ObjectSection s(*f, "bucket");
+ encode_json("name", bucket.name, f);
+ encode_json("tenant", bucket.tenant, f);
+ encode_json("bucket_id", bucket.bucket_id, f);
+ }
+ {
+ Formatter::ObjectSection s(*f, "key");
+ encode_json("name", key.name, f);
+ encode_json("instance", key.instance, f);
+ }
+ utime_t mt(mtime);
+ encode_json("mtime", mt, f);
+ Formatter::ObjectSection s(*f, "attrs");
+ if (attrs) {
+ for (auto& attr : *attrs) {
+ encode_json(attr.first.c_str(), attr.second.c_str(), f);
+ }
+ }
+ }
+};
+
+static void make_event_ref(CephContext *cct, const rgw_bucket& bucket,
+ const rgw_obj_key& key,
+ const ceph::real_time& mtime,
+ const std::vector<std::pair<std::string, std::string> > *attrs,
+ rgw::notify::EventType event_type,
+ EventRef<rgw_pubsub_event> *event) {
+ *event = std::make_shared<rgw_pubsub_event>();
+
+ EventRef<rgw_pubsub_event>& e = *event;
+ e->event_name = rgw::notify::to_ceph_string(event_type);
+ e->source = bucket.name + "/" + key.name;
+ e->timestamp = real_clock::now();
+
+ objstore_event oevent(bucket, key, mtime, attrs);
+
+ const utime_t ts(e->timestamp);
+ set_event_id(e->id, oevent.get_hash(), ts);
+
+ encode_json("info", oevent, &e->info);
+}
+
+static void make_s3_record_ref(CephContext *cct, const rgw_bucket& bucket,
+ const rgw_user& owner,
+ const rgw_obj_key& key,
+ const ceph::real_time& mtime,
+ const std::vector<std::pair<std::string, std::string> > *attrs,
+ rgw::notify::EventType event_type,
+ EventRef<rgw_pubsub_s3_record> *record) {
+ *record = std::make_shared<rgw_pubsub_s3_record>();
+
+ EventRef<rgw_pubsub_s3_record>& r = *record;
+ r->eventTime = mtime;
+ r->eventName = rgw::notify::to_string(event_type);
+ // userIdentity: not supported in sync module
+ // x_amz_request_id: not supported in sync module
+ // x_amz_id_2: not supported in sync module
+ // configurationId is filled from subscription configuration
+ r->bucket_name = bucket.name;
+ r->bucket_ownerIdentity = owner.to_str();
+ r->bucket_arn = to_string(rgw::ARN(bucket));
+ r->bucket_id = bucket.bucket_id; // rgw extension
+ r->object_key = key.name;
+ // object_size not supported in sync module
+ objstore_event oevent(bucket, key, mtime, attrs);
+ r->object_etag = oevent.get_hash();
+ r->object_versionId = key.instance;
+
+ // use timestamp as per key sequence id (hex encoded)
+ const utime_t ts(real_clock::now());
+ boost::algorithm::hex((const char*)&ts, (const char*)&ts + sizeof(utime_t),
+ std::back_inserter(r->object_sequencer));
+
+ set_event_id(r->id, r->object_etag, ts);
+}
+
+class PSManager;
+using PSManagerRef = std::shared_ptr<PSManager>;
+
+struct PSEnv {
+ PSConfigRef conf;
+ shared_ptr<RGWUserInfo> data_user_info;
+ PSManagerRef manager;
+
+ PSEnv() : conf(make_shared<PSConfig>()),
+ data_user_info(make_shared<RGWUserInfo>()) {}
+
+ void init(CephContext *cct, const JSONFormattable& config) {
+ conf->init(cct, config);
+ }
+
+ void init_instance(const RGWRealm& realm, uint64_t instance_id, PSManagerRef& mgr);
+};
+
+using PSEnvRef = std::shared_ptr<PSEnv>;
+
+template<typename EventType>
+class PSEvent {
+ const EventRef<EventType> event;
+
+public:
+ PSEvent(const EventRef<EventType>& _event) : event(_event) {}
+
+ void format(bufferlist *bl) const {
+ bl->append(json_str("", *event));
+ }
+
+ void encode_event(bufferlist& bl) const {
+ encode(*event, bl);
+ }
+
+ const string& id() const {
+ return event->id;
+ }
+};
+
+template <class T>
+class RGWSingletonCR : public RGWCoroutine {
+ friend class WrapperCR;
+
+ boost::asio::coroutine wrapper_state;
+ bool started{false};
+ int operate_ret{0};
+
+ struct WaiterInfo {
+ RGWCoroutine *cr{nullptr};
+ T *result;
+ };
+ using WaiterInfoRef = std::shared_ptr<WaiterInfo>;
+
+ deque<WaiterInfoRef> waiters;
+
+ void add_waiter(RGWCoroutine *cr, T *result) {
+ auto waiter = std::make_shared<WaiterInfo>();
+ waiter->cr = cr;
+ waiter->result = result;
+ waiters.push_back(waiter);
+ };
+
+ bool get_next_waiter(WaiterInfoRef *waiter) {
+ if (waiters.empty()) {
+ waiter->reset();
+ return false;
+ }
+
+ *waiter = waiters.front();
+ waiters.pop_front();
+ return true;
+ }
+
+ int operate_wrapper() override {
+ reenter(&wrapper_state) {
+ while (!is_done()) {
+ ldout(cct, 20) << __func__ << "(): operate_wrapper() -> operate()" << dendl;
+ operate_ret = operate();
+ if (operate_ret < 0) {
+ ldout(cct, 20) << *this << ": operate() returned r=" << operate_ret << dendl;
+ }
+ if (!is_done()) {
+ yield;
+ }
+ }
+
+ ldout(cct, 20) << __func__ << "(): RGWSingletonCR: operate_wrapper() done, need to wake up " << waiters.size() << " waiters" << dendl;
+ /* we're done, can't yield anymore */
+
+ WaiterInfoRef waiter;
+ while (get_next_waiter(&waiter)) {
+ ldout(cct, 20) << __func__ << "(): RGWSingletonCR: waking up waiter" << dendl;
+ waiter->cr->set_retcode(retcode);
+ waiter->cr->set_sleeping(false);
+ return_result(waiter->result);
+ put();
+ }
+
+ return retcode;
+ }
+ return 0;
+ }
+
+ virtual void return_result(T *result) {}
+
+public:
+ RGWSingletonCR(CephContext *_cct)
+ : RGWCoroutine(_cct) {}
+
+ int execute(RGWCoroutine *caller, T *result = nullptr) {
+ if (!started) {
+ ldout(cct, 20) << __func__ << "(): singleton not started, starting" << dendl;
+ started = true;
+ caller->call(this);
+ return 0;
+ } else if (!is_done()) {
+ ldout(cct, 20) << __func__ << "(): singleton not done yet, registering as waiter" << dendl;
+ get();
+ add_waiter(caller, result);
+ caller->set_sleeping(true);
+ return 0;
+ }
+
+ ldout(cct, 20) << __func__ << "(): singleton done, returning retcode=" << retcode << dendl;
+ caller->set_retcode(retcode);
+ return_result(result);
+ return retcode;
+ }
+};
+
+
+class PSSubscription;
+using PSSubscriptionRef = std::shared_ptr<PSSubscription>;
+
+class PSSubscription {
+ class InitCR;
+ friend class InitCR;
+ friend class RGWPSHandleObjEventCR;
+
+ RGWDataSyncEnv *sync_env;
+ PSEnvRef env;
+ PSSubConfigRef sub_conf;
+ std::shared_ptr<rgw_get_bucket_info_result> get_bucket_info_result;
+ RGWBucketInfo *bucket_info{nullptr};
+ RGWDataAccessRef data_access;
+ RGWDataAccess::BucketRef bucket;
+
+ InitCR *init_cr{nullptr};
+
+ class InitBucketLifecycleCR : public RGWCoroutine {
+ RGWDataSyncEnv *sync_env;
+ PSConfigRef& conf;
+ LCRule rule;
+
+ int retention_days;
+
+ rgw_bucket_lifecycle_config_params lc_config;
+
+ public:
+ InitBucketLifecycleCR(RGWDataSyncEnv *_sync_env,
+ PSConfigRef& _conf,
+ RGWBucketInfo& _bucket_info,
+ std::map<string, bufferlist>& _bucket_attrs) : RGWCoroutine(_sync_env->cct),
+ sync_env(_sync_env),
+ conf(_conf) {
+ lc_config.bucket_info = _bucket_info;
+ lc_config.bucket_attrs = _bucket_attrs;
+ retention_days = conf->events_retention_days;
+ }
+
+ int operate() override {
+ reenter(this) {
+
+ rule.init_simple_days_rule("Pubsub Expiration", "" /* all objects in bucket */, retention_days);
+
+ {
+ /* maybe we already have it configured? */
+ RGWLifecycleConfiguration old_config;
+ auto aiter = lc_config.bucket_attrs.find(RGW_ATTR_LC);
+ if (aiter != lc_config.bucket_attrs.end()) {
+ bufferlist::const_iterator iter{&aiter->second};
+ try {
+ old_config.decode(iter);
+ } catch (const buffer::error& e) {
+ ldout(cct, 0) << __func__ << "(): decode life cycle config failed" << dendl;
+ }
+ }
+
+ auto old_rules = old_config.get_rule_map();
+ for (auto ori : old_rules) {
+ auto& old_rule = ori.second;
+
+ if (old_rule.get_prefix().empty() &&
+ old_rule.get_expiration().get_days() == retention_days &&
+ old_rule.is_enabled()) {
+ ldout(sync_env->cct, 20) << "no need to set lifecycle rule on bucketi, existing rule matches config" << dendl;
+ return set_cr_done();
+ }
+ }
+ }
+
+ lc_config.config.add_rule(rule);
+ yield call(new RGWBucketLifecycleConfigCR(sync_env->async_rados,
+ sync_env->store,
+ lc_config));
+ if (retcode < 0) {
+ ldout(sync_env->cct, 0) << "ERROR: failed to set lifecycle on bucket: ret=" << retcode << dendl;
+ return set_cr_error(retcode);
+ }
+
+ return set_cr_done();
+ }
+ return 0;
+ }
+ };
+
+ class InitCR : public RGWSingletonCR<bool> {
+ RGWDataSyncEnv *sync_env;
+ PSSubscriptionRef sub;
+ rgw_get_bucket_info_params get_bucket_info;
+ rgw_bucket_create_local_params create_bucket;
+ PSConfigRef& conf;
+ PSSubConfigRef& sub_conf;
+ int i;
+
+ public:
+ InitCR(RGWDataSyncEnv *_sync_env,
+ PSSubscriptionRef& _sub) : RGWSingletonCR<bool>(_sync_env->cct),
+ sync_env(_sync_env),
+ sub(_sub), conf(sub->env->conf),
+ sub_conf(sub->sub_conf) {
+ }
+
+ int operate() override {
+ reenter(this) {
+ get_bucket_info.tenant = conf->user.tenant;
+ get_bucket_info.bucket_name = sub_conf->data_bucket_name;
+ sub->get_bucket_info_result = make_shared<rgw_get_bucket_info_result>();
+
+ for (i = 0; i < 2; ++i) {
+ yield call(new RGWGetBucketInfoCR(sync_env->async_rados,
+ sync_env->store,
+ get_bucket_info,
+ sub->get_bucket_info_result));
+ if (retcode < 0 && retcode != -ENOENT) {
+ ldout(sync_env->cct, 0) << "ERROR: failed to geting bucket info: " << "tenant="
+ << get_bucket_info.tenant << " name=" << get_bucket_info.bucket_name << ": ret=" << retcode << dendl;
+ }
+ if (retcode == 0) {
+ {
+ auto& result = sub->get_bucket_info_result;
+ sub->bucket_info = &result->bucket_info;
+
+ int ret = sub->data_access->get_bucket(result->bucket_info, result->attrs, &sub->bucket);
+ if (ret < 0) {
+ ldout(sync_env->cct, 0) << "ERROR: data_access.get_bucket() bucket=" << result->bucket_info.bucket << " failed, ret=" << ret << dendl;
+ return set_cr_error(ret);
+ }
+ }
+
+ yield call(new InitBucketLifecycleCR(sync_env, conf,
+ sub->get_bucket_info_result->bucket_info,
+ sub->get_bucket_info_result->attrs));
+ if (retcode < 0) {
+ ldout(sync_env->cct, 0) << "ERROR: failed to init lifecycle on bucket (bucket=" << sub_conf->data_bucket_name << ") ret=" << retcode << dendl;
+ return set_cr_error(retcode);
+ }
+
+ return set_cr_done();
+ }
+
+ create_bucket.user_info = sub->env->data_user_info;
+ create_bucket.bucket_name = sub_conf->data_bucket_name;
+ ldout(sync_env->cct, 20) << "pubsub: bucket create: using user info: " << json_str("obj", *sub->env->data_user_info, true) << dendl;
+ yield call(new RGWBucketCreateLocalCR(sync_env->async_rados,
+ sync_env->store,
+ create_bucket));
+ if (retcode < 0) {
+ ldout(sync_env->cct, 0) << "ERROR: failed to create bucket: " << "tenant="
+ << get_bucket_info.tenant << " name=" << get_bucket_info.bucket_name << ": ret=" << retcode << dendl;
+ return set_cr_error(retcode);
+ }
+
+ /* second iteration: we got -ENOENT and created a bucket */
+ }
+
+ /* failed twice on -ENOENT, unexpected */
+ ldout(sync_env->cct, 0) << "ERROR: failed to create bucket " << "tenant=" << get_bucket_info.tenant
+ << " name=" << get_bucket_info.bucket_name << dendl;
+ return set_cr_error(-EIO);
+ }
+ return 0;
+ }
+ };
+
+ template<typename EventType>
+ class StoreEventCR : public RGWCoroutine {
+ RGWDataSyncEnv* const sync_env;
+ const PSSubscriptionRef sub;
+ const PSEvent<EventType> pse;
+ const string oid_prefix;
+
+ public:
+ StoreEventCR(RGWDataSyncEnv* const _sync_env,
+ const PSSubscriptionRef& _sub,
+ const EventRef<EventType>& _event) : RGWCoroutine(_sync_env->cct),
+ sync_env(_sync_env),
+ sub(_sub),
+ pse(_event),
+ oid_prefix(sub->sub_conf->data_oid_prefix) {
+ }
+
+ int operate() override {
+ rgw_object_simple_put_params put_obj;
+ reenter(this) {
+
+ put_obj.bucket = sub->bucket;
+ put_obj.key = rgw_obj_key(oid_prefix + pse.id());
+
+ pse.format(&put_obj.data);
+
+ {
+ bufferlist bl;
+ pse.encode_event(bl);
+ bufferlist bl64;
+ bl.encode_base64(bl64);
+ put_obj.user_data = bl64.to_str();
+ }
+
+ yield call(new RGWObjectSimplePutCR(sync_env->async_rados,
+ sync_env->store,
+ put_obj));
+ if (retcode < 0) {
+ ldpp_dout(sync_env->dpp, 10) << "failed to store event: " << put_obj.bucket << "/" << put_obj.key << " ret=" << retcode << dendl;
+ return set_cr_error(retcode);
+ } else {
+ ldpp_dout(sync_env->dpp, 20) << "event stored: " << put_obj.bucket << "/" << put_obj.key << dendl;
+ }
+
+ return set_cr_done();
+ }
+ return 0;
+ }
+ };
+
+ template<typename EventType>
+ class PushEventCR : public RGWCoroutine {
+ RGWDataSyncEnv* const sync_env;
+ const EventRef<EventType> event;
+ const PSSubConfigRef& sub_conf;
+
+ public:
+ PushEventCR(RGWDataSyncEnv* const _sync_env,
+ const PSSubscriptionRef& _sub,
+ const EventRef<EventType>& _event) : RGWCoroutine(_sync_env->cct),
+ sync_env(_sync_env),
+ event(_event),
+ sub_conf(_sub->sub_conf) {
+ }
+
+ int operate() override {
+ reenter(this) {
+ ceph_assert(sub_conf->push_endpoint);
+ yield call(sub_conf->push_endpoint->send_to_completion_async(*event.get(), sync_env));
+
+ if (retcode < 0) {
+ ldout(sync_env->cct, 10) << "failed to push event: " << event->id <<
+ " to endpoint: " << sub_conf->push_endpoint_name << " ret=" << retcode << dendl;
+ return set_cr_error(retcode);
+ }
+
+ ldout(sync_env->cct, 20) << "event: " << event->id <<
+ " pushed to endpoint: " << sub_conf->push_endpoint_name << dendl;
+ return set_cr_done();
+ }
+ return 0;
+ }
+ };
+
+public:
+ PSSubscription(RGWDataSyncEnv *_sync_env,
+ PSEnvRef _env,
+ PSSubConfigRef& _sub_conf) : sync_env(_sync_env),
+ env(_env),
+ sub_conf(_sub_conf),
+ data_access(std::make_shared<RGWDataAccess>(sync_env->store)) {}
+
+ PSSubscription(RGWDataSyncEnv *_sync_env,
+ PSEnvRef _env,
+ rgw_pubsub_sub_config& user_sub_conf) : sync_env(_sync_env),
+ env(_env),
+ sub_conf(std::make_shared<PSSubConfig>()),
+ data_access(std::make_shared<RGWDataAccess>(sync_env->store)) {
+ sub_conf->from_user_conf(sync_env->cct, user_sub_conf);
+ }
+ virtual ~PSSubscription() {
+ if (init_cr) {
+ init_cr->put();
+ }
+ }
+
+ template <class C>
+ static PSSubscriptionRef get_shared(RGWDataSyncEnv *_sync_env,
+ PSEnvRef _env,
+ C& _sub_conf) {
+ auto sub = std::make_shared<PSSubscription>(_sync_env, _env, _sub_conf);
+ sub->init_cr = new InitCR(_sync_env, sub);
+ sub->init_cr->get();
+ return sub;
+ }
+
+ int call_init_cr(RGWCoroutine *caller) {
+ return init_cr->execute(caller);
+ }
+
+ template<typename EventType>
+ static RGWCoroutine *store_event_cr(RGWDataSyncEnv* const sync_env, const PSSubscriptionRef& sub, const EventRef<EventType>& event) {
+ return new StoreEventCR<EventType>(sync_env, sub, event);
+ }
+
+ template<typename EventType>
+ static RGWCoroutine *push_event_cr(RGWDataSyncEnv* const sync_env, const PSSubscriptionRef& sub, const EventRef<EventType>& event) {
+ return new PushEventCR<EventType>(sync_env, sub, event);
+ }
+ friend class InitCR;
+};
+
+class PSManager
+{
+ RGWDataSyncEnv *sync_env;
+ PSEnvRef env;
+
+ std::map<string, PSSubscriptionRef> subs;
+
+ class GetSubCR : public RGWSingletonCR<PSSubscriptionRef> {
+ RGWDataSyncEnv *sync_env;
+ PSManagerRef mgr;
+ rgw_user owner;
+ string sub_name;
+ string sub_id;
+ PSSubscriptionRef *ref;
+
+ PSConfigRef conf;
+
+ PSSubConfigRef sub_conf;
+ rgw_pubsub_sub_config user_sub_conf;
+
+ public:
+ GetSubCR(RGWDataSyncEnv *_sync_env,
+ PSManagerRef& _mgr,
+ const rgw_user& _owner,
+ const string& _sub_name,
+ PSSubscriptionRef *_ref) : RGWSingletonCR<PSSubscriptionRef>(_sync_env->cct),
+ sync_env(_sync_env),
+ mgr(_mgr),
+ owner(_owner),
+ sub_name(_sub_name),
+ ref(_ref),
+ conf(mgr->env->conf) {
+ }
+ ~GetSubCR() { }
+
+ int operate() override {
+ reenter(this) {
+ if (owner.empty()) {
+ if (!conf->find_sub(sub_name, &sub_conf)) {
+ ldout(sync_env->cct, 10) << "failed to find subscription config: name=" << sub_name << dendl;
+ mgr->remove_get_sub(owner, sub_name);
+ return set_cr_error(-ENOENT);
+ }
+
+ *ref = PSSubscription::get_shared(sync_env, mgr->env, sub_conf);
+ } else {
+ using ReadInfoCR = RGWSimpleRadosReadCR<rgw_pubsub_sub_config>;
+ yield {
+ RGWUserPubSub ups(sync_env->store, owner);
+ rgw_raw_obj obj;
+ ups.get_sub_meta_obj(sub_name, &obj);
+ bool empty_on_enoent = false;
+ call(new ReadInfoCR(sync_env->async_rados, sync_env->store->svc.sysobj,
+ obj,
+ &user_sub_conf, empty_on_enoent));
+ }
+ if (retcode < 0) {
+ mgr->remove_get_sub(owner, sub_name);
+ return set_cr_error(retcode);
+ }
+
+ *ref = PSSubscription::get_shared(sync_env, mgr->env, user_sub_conf);
+ }
+
+ yield (*ref)->call_init_cr(this);
+ if (retcode < 0) {
+ ldout(sync_env->cct, 10) << "failed to init subscription" << dendl;
+ mgr->remove_get_sub(owner, sub_name);
+ return set_cr_error(retcode);
+ }
+
+ if (owner.empty()) {
+ mgr->subs[sub_name] = *ref;
+ }
+ mgr->remove_get_sub(owner, sub_name);
+
+ return set_cr_done();
+ }
+ return 0;
+ }
+
+ void return_result(PSSubscriptionRef *result) override {
+ ldout(cct, 20) << __func__ << "(): returning result: retcode=" << retcode << " resultp=" << (void *)result << dendl;
+ if (retcode >= 0) {
+ *result = *ref;
+ }
+ }
+ };
+
+ string sub_id(const rgw_user& owner, const string& sub_name) {
+ string owner_prefix;
+ if (!owner.empty()) {
+ owner_prefix = owner.to_str() + "/";
+ }
+
+ return owner_prefix + sub_name;
+ }
+
+ std::map<std::string, GetSubCR *> get_subs;
+
+ GetSubCR *& get_get_subs(const rgw_user& owner, const string& name) {
+ return get_subs[sub_id(owner, name)];
+ }
+
+ void remove_get_sub(const rgw_user& owner, const string& name) {
+ get_subs.erase(sub_id(owner, name));
+ }
+
+ bool find_sub_instance(const rgw_user& owner, const string& sub_name, PSSubscriptionRef *sub) {
+ auto iter = subs.find(sub_id(owner, sub_name));
+ if (iter != subs.end()) {
+ *sub = iter->second;
+ return true;
+ }
+ return false;
+ }
+
+ PSManager(RGWDataSyncEnv *_sync_env,
+ PSEnvRef _env) : sync_env(_sync_env),
+ env(_env) {}
+
+public:
+ static PSManagerRef get_shared(RGWDataSyncEnv *_sync_env,
+ PSEnvRef _env) {
+ return std::shared_ptr<PSManager>(new PSManager(_sync_env, _env));
+ }
+
+ static int call_get_subscription_cr(RGWDataSyncEnv *sync_env, PSManagerRef& mgr,
+ RGWCoroutine *caller, const rgw_user& owner, const string& sub_name, PSSubscriptionRef *ref) {
+ if (mgr->find_sub_instance(owner, sub_name, ref)) {
+ /* found it! nothing to execute */
+ ldout(sync_env->cct, 20) << __func__ << "(): found sub instance" << dendl;
+ }
+ auto& gs = mgr->get_get_subs(owner, sub_name);
+ if (!gs) {
+ ldout(sync_env->cct, 20) << __func__ << "(): first get subs" << dendl;
+ gs = new GetSubCR(sync_env, mgr, owner, sub_name, ref);
+ }
+ ldout(sync_env->cct, 20) << __func__ << "(): executing get subs" << dendl;
+ return gs->execute(caller, ref);
+ }
+
+ friend class GetSubCR;
+};
+
+void PSEnv::init_instance(const RGWRealm& realm, uint64_t instance_id, PSManagerRef& mgr) {
+ manager = mgr;
+ conf->init_instance(realm, instance_id);
+}
+
+class RGWPSInitEnvCBCR : public RGWCoroutine {
+ RGWDataSyncEnv *sync_env;
+ PSEnvRef env;
+ PSConfigRef& conf;
+
+ rgw_user_create_params create_user;
+ rgw_get_user_info_params get_user_info;
+public:
+ RGWPSInitEnvCBCR(RGWDataSyncEnv *_sync_env,
+ PSEnvRef& _env) : RGWCoroutine(_sync_env->cct),
+ sync_env(_sync_env),
+ env(_env), conf(env->conf) {}
+ int operate() override {
+ reenter(this) {
+ ldout(sync_env->cct, 0) << ": init pubsub config zone=" << sync_env->source_zone << dendl;
+
+ /* nothing to do here right now */
+ create_user.user = conf->user;
+ create_user.max_buckets = 0; /* unlimited */
+ create_user.display_name = "pubsub";
+ create_user.generate_key = false;
+ yield call(new RGWUserCreateCR(sync_env->async_rados, sync_env->store, create_user));
+ if (retcode < 0) {
+ ldout(sync_env->store->ctx(), 0) << "ERROR: failed to create rgw user: ret=" << retcode << dendl;
+ return set_cr_error(retcode);
+ }
+
+ get_user_info.user = conf->user;
+ yield call(new RGWGetUserInfoCR(sync_env->async_rados, sync_env->store, get_user_info, env->data_user_info));
+ if (retcode < 0) {
+ ldout(sync_env->store->ctx(), 0) << "ERROR: failed to create rgw user: ret=" << retcode << dendl;
+ return set_cr_error(retcode);
+ }
+
+ ldout(sync_env->cct, 20) << "pubsub: get user info cr returned: " << json_str("obj", *env->data_user_info, true) << dendl;
+
+
+ return set_cr_done();
+ }
+ return 0;
+ }
+};
+
+bool match(const rgw_pubsub_topic_filter& filter, const std::string& key_name, rgw::notify::EventType event_type) {
+ if (!match(filter.events, event_type)) {
+ return false;
+ }
+ if (!match(filter.s3_filter.key_filter, key_name)) {
+ return false;
+ }
+ return true;
+}
+
+class RGWPSFindBucketTopicsCR : public RGWCoroutine {
+ RGWDataSyncEnv *sync_env;
+ PSEnvRef env;
+ rgw_user owner;
+ rgw_bucket bucket;
+ rgw_obj_key key;
+ rgw::notify::EventType event_type;
+
+ RGWUserPubSub ups;
+
+ rgw_raw_obj bucket_obj;
+ rgw_raw_obj user_obj;
+ rgw_pubsub_bucket_topics bucket_topics;
+ rgw_pubsub_user_topics user_topics;
+ TopicsRef *topics;
+public:
+ RGWPSFindBucketTopicsCR(RGWDataSyncEnv *_sync_env,
+ PSEnvRef& _env,
+ const rgw_user& _owner,
+ const rgw_bucket& _bucket,
+ const rgw_obj_key& _key,
+ rgw::notify::EventType _event_type,
+ TopicsRef *_topics) : RGWCoroutine(_sync_env->cct),
+ sync_env(_sync_env),
+ env(_env),
+ owner(_owner),
+ bucket(_bucket),
+ key(_key),
+ event_type(_event_type),
+ ups(_sync_env->store, owner),
+ topics(_topics) {
+ *topics = std::make_shared<vector<PSTopicConfigRef> >();
+ }
+ int operate() override {
+ reenter(this) {
+ ups.get_bucket_meta_obj(bucket, &bucket_obj);
+ ups.get_user_meta_obj(&user_obj);
+
+ using ReadInfoCR = RGWSimpleRadosReadCR<rgw_pubsub_bucket_topics>;
+ yield {
+ bool empty_on_enoent = true;
+ call(new ReadInfoCR(sync_env->async_rados, sync_env->store->svc.sysobj,
+ bucket_obj,
+ &bucket_topics, empty_on_enoent));
+ }
+ if (retcode < 0 && retcode != -ENOENT) {
+ return set_cr_error(retcode);
+ }
+
+ ldout(sync_env->cct, 20) << "RGWPSFindBucketTopicsCR(): found " << bucket_topics.topics.size() << " topics for bucket " << bucket << dendl;
+
+ if (!bucket_topics.topics.empty()) {
+ using ReadUserTopicsInfoCR = RGWSimpleRadosReadCR<rgw_pubsub_user_topics>;
+ yield {
+ bool empty_on_enoent = true;
+ call(new ReadUserTopicsInfoCR(sync_env->async_rados, sync_env->store->svc.sysobj,
+ user_obj,
+ &user_topics, empty_on_enoent));
+ }
+ if (retcode < 0 && retcode != -ENOENT) {
+ return set_cr_error(retcode);
+ }
+ }
+
+ for (auto& titer : bucket_topics.topics) {
+ auto& topic_filter = titer.second;
+ auto& info = topic_filter.topic;
+ if (!match(topic_filter, key.name, event_type)) {
+ continue;
+ }
+ std::shared_ptr<PSTopicConfig> tc = std::make_shared<PSTopicConfig>();
+ tc->name = info.name;
+ tc->subs = user_topics.topics[info.name].subs;
+ tc->opaque_data = info.opaque_data;
+ (*topics)->push_back(tc);
+ }
+
+ env->conf->get_topics(sync_env->cct, bucket, key, topics);
+ return set_cr_done();
+ }
+ return 0;
+ }
+};
+
+class RGWPSHandleObjEventCR : public RGWCoroutine {
+ RGWDataSyncEnv* const sync_env;
+ const PSEnvRef env;
+ const rgw_user& owner;
+ const EventRef<rgw_pubsub_event> event;
+ const EventRef<rgw_pubsub_s3_record> record;
+ const TopicsRef topics;
+ const std::array<rgw_user, 2> owners;
+ bool has_subscriptions;
+ bool event_handled;
+ bool sub_conf_found;
+ PSSubscriptionRef sub;
+ std::array<rgw_user, 2>::const_iterator oiter;
+ std::vector<PSTopicConfigRef>::const_iterator titer;
+ std::set<std::string>::const_iterator siter;
+ int last_sub_conf_error;
+
+public:
+ RGWPSHandleObjEventCR(RGWDataSyncEnv* const _sync_env,
+ const PSEnvRef _env,
+ const rgw_user& _owner,
+ const EventRef<rgw_pubsub_event>& _event,
+ const EventRef<rgw_pubsub_s3_record>& _record,
+ const TopicsRef& _topics) : RGWCoroutine(_sync_env->cct),
+ sync_env(_sync_env),
+ env(_env),
+ owner(_owner),
+ event(_event),
+ record(_record),
+ topics(_topics),
+ owners({owner, rgw_user{}}),
+ has_subscriptions(false),
+ event_handled(false) {}
+
+ int operate() override {
+ reenter(this) {
+ ldout(sync_env->cct, 20) << ": handle event: obj: z=" << sync_env->source_zone
+ << " event=" << json_str("event", *event, false)
+ << " owner=" << owner << dendl;
+
+ ldout(sync_env->cct, 20) << "pubsub: " << topics->size() << " topics found for path" << dendl;
+
+ // outside caller should check that
+ ceph_assert(!topics->empty());
+
+ if (perfcounter) perfcounter->inc(l_rgw_pubsub_event_triggered);
+
+ // loop over all topics related to the bucket/object
+ for (titer = topics->begin(); titer != topics->end(); ++titer) {
+ ldout(sync_env->cct, 20) << ": notification for " << event->source << ": topic=" <<
+ (*titer)->name << ", has " << (*titer)->subs.size() << " subscriptions" << dendl;
+ // loop over all subscriptions of the topic
+ for (siter = (*titer)->subs.begin(); siter != (*titer)->subs.end(); ++siter) {
+ ldout(sync_env->cct, 20) << ": subscription: " << *siter << dendl;
+ has_subscriptions = true;
+ sub_conf_found = false;
+ // try to read subscription configuration from global/user cond
+ // configuration is considered missing only if does not exist in either
+ for (oiter = owners.begin(); oiter != owners.end(); ++oiter) {
+ yield PSManager::call_get_subscription_cr(sync_env, env->manager, this, *oiter, *siter, &sub);
+ if (retcode < 0) {
+ if (sub_conf_found) {
+ // not a real issue, sub conf already found
+ retcode = 0;
+ }
+ last_sub_conf_error = retcode;
+ continue;
+ }
+ sub_conf_found = true;
+ if (sub->sub_conf->s3_id.empty()) {
+ // subscription was not made by S3 compatible API
+ ldout(sync_env->cct, 20) << "storing event for subscription=" << *siter << " owner=" << *oiter << " ret=" << retcode << dendl;
+ yield call(PSSubscription::store_event_cr(sync_env, sub, event));
+ if (retcode < 0) {
+ if (perfcounter) perfcounter->inc(l_rgw_pubsub_store_fail);
+ ldout(sync_env->cct, 1) << "ERROR: failed to store event for subscription=" << *siter << " ret=" << retcode << dendl;
+ } else {
+ if (perfcounter) perfcounter->inc(l_rgw_pubsub_store_ok);
+ event_handled = true;
+ }
+ if (sub->sub_conf->push_endpoint) {
+ ldout(sync_env->cct, 20) << "push event for subscription=" << *siter << " owner=" << *oiter << " ret=" << retcode << dendl;
+ yield call(PSSubscription::push_event_cr(sync_env, sub, event));
+ if (retcode < 0) {
+ if (perfcounter) perfcounter->inc(l_rgw_pubsub_push_failed);
+ ldout(sync_env->cct, 1) << "ERROR: failed to push event for subscription=" << *siter << " ret=" << retcode << dendl;
+ } else {
+ if (perfcounter) perfcounter->inc(l_rgw_pubsub_push_ok);
+ event_handled = true;
+ }
+ }
+ } else {
+ // subscription was made by S3 compatible API
+ ldout(sync_env->cct, 20) << "storing record for subscription=" << *siter << " owner=" << *oiter << " ret=" << retcode << dendl;
+ record->configurationId = sub->sub_conf->s3_id;
+ record->opaque_data = (*titer)->opaque_data;
+ yield call(PSSubscription::store_event_cr(sync_env, sub, record));
+ if (retcode < 0) {
+ if (perfcounter) perfcounter->inc(l_rgw_pubsub_store_fail);
+ ldout(sync_env->cct, 1) << "ERROR: failed to store record for subscription=" << *siter << " ret=" << retcode << dendl;
+ } else {
+ if (perfcounter) perfcounter->inc(l_rgw_pubsub_store_ok);
+ event_handled = true;
+ }
+ if (sub->sub_conf->push_endpoint) {
+ ldout(sync_env->cct, 20) << "push record for subscription=" << *siter << " owner=" << *oiter << " ret=" << retcode << dendl;
+ yield call(PSSubscription::push_event_cr(sync_env, sub, record));
+ if (retcode < 0) {
+ if (perfcounter) perfcounter->inc(l_rgw_pubsub_push_failed);
+ ldout(sync_env->cct, 1) << "ERROR: failed to push record for subscription=" << *siter << " ret=" << retcode << dendl;
+ } else {
+ if (perfcounter) perfcounter->inc(l_rgw_pubsub_push_ok);
+ event_handled = true;
+ }
+ }
+ }
+ }
+ if (!sub_conf_found) {
+ // could not find conf for subscription at user or global levels
+ if (perfcounter) perfcounter->inc(l_rgw_pubsub_missing_conf);
+ ldout(sync_env->cct, 1) << "ERROR: failed to find subscription config for subscription=" << *siter
+ << " ret=" << last_sub_conf_error << dendl;
+ if (retcode == -ENOENT) {
+ // missing subscription info should be reflected back as invalid argument
+ // and not as missing object
+ retcode = -EINVAL;
+ }
+ }
+ }
+ }
+ if (has_subscriptions && !event_handled) {
+ // event is considered "lost" of it has subscriptions on any of its topics
+ // but it was not stored in, or pushed to, any of them
+ if (perfcounter) perfcounter->inc(l_rgw_pubsub_event_lost);
+ }
+ if (retcode < 0) {
+ return set_cr_error(retcode);
+ }
+ return set_cr_done();
+ }
+ return 0;
+ }
+};
+
+// coroutine invoked on remote object creation
+class RGWPSHandleRemoteObjCBCR : public RGWStatRemoteObjCBCR {
+ RGWDataSyncEnv *sync_env;
+ PSEnvRef env;
+ std::optional<uint64_t> versioned_epoch;
+ EventRef<rgw_pubsub_event> event;
+ EventRef<rgw_pubsub_s3_record> record;
+ TopicsRef topics;
+public:
+ RGWPSHandleRemoteObjCBCR(RGWDataSyncEnv *_sync_env,
+ RGWBucketInfo& _bucket_info, rgw_obj_key& _key,
+ PSEnvRef _env, std::optional<uint64_t> _versioned_epoch,
+ TopicsRef& _topics) : RGWStatRemoteObjCBCR(_sync_env, _bucket_info, _key),
+ sync_env(_sync_env),
+ env(_env),
+ versioned_epoch(_versioned_epoch),
+ topics(_topics) {
+ }
+ int operate() override {
+ reenter(this) {
+ ldout(sync_env->cct, 20) << ": stat of remote obj: z=" << sync_env->source_zone
+ << " b=" << bucket_info.bucket << " k=" << key << " size=" << size << " mtime=" << mtime
+ << " attrs=" << attrs << dendl;
+ {
+ std::vector<std::pair<std::string, std::string> > attrs;
+ for (auto& attr : attrs) {
+ std::string k = attr.first;
+ if (boost::algorithm::starts_with(k, RGW_ATTR_PREFIX)) {
+ k = k.substr(sizeof(RGW_ATTR_PREFIX) - 1);
+ }
+ attrs.push_back(std::make_pair(k, attr.second));
+ }
+ // at this point we don't know whether we need the ceph event or S3 record
+ // this is why both are created here, once we have information about the
+ // subscription, we will store/push only the relevant ones
+ make_event_ref(sync_env->cct,
+ bucket_info.bucket, key,
+ mtime, &attrs,
+ rgw::notify::ObjectCreated, &event);
+ make_s3_record_ref(sync_env->cct,
+ bucket_info.bucket, bucket_info.owner, key,
+ mtime, &attrs,
+ rgw::notify::ObjectCreated, &record);
+ }
+
+ yield call(new RGWPSHandleObjEventCR(sync_env, env, bucket_info.owner, event, record, topics));
+ if (retcode < 0) {
+ return set_cr_error(retcode);
+ }
+ return set_cr_done();
+ }
+ return 0;
+ }
+};
+
+class RGWPSHandleRemoteObjCR : public RGWCallStatRemoteObjCR {
+ PSEnvRef env;
+ std::optional<uint64_t> versioned_epoch;
+ TopicsRef topics;
+public:
+ RGWPSHandleRemoteObjCR(RGWDataSyncEnv *_sync_env,
+ RGWBucketInfo& _bucket_info, rgw_obj_key& _key,
+ PSEnvRef _env, std::optional<uint64_t> _versioned_epoch,
+ TopicsRef& _topics) : RGWCallStatRemoteObjCR(_sync_env, _bucket_info, _key),
+ env(_env), versioned_epoch(_versioned_epoch),
+ topics(_topics) {
+ }
+
+ ~RGWPSHandleRemoteObjCR() override {}
+
+ RGWStatRemoteObjCBCR *allocate_callback() override {
+ return new RGWPSHandleRemoteObjCBCR(sync_env, bucket_info, key, env, versioned_epoch, topics);
+ }
+};
+
+class RGWPSHandleObjCreateCR : public RGWCoroutine {
+
+ RGWDataSyncEnv *sync_env;
+ RGWBucketInfo bucket_info;
+ rgw_obj_key key;
+ PSEnvRef env;
+ std::optional<uint64_t> versioned_epoch;
+ TopicsRef topics;
+public:
+ RGWPSHandleObjCreateCR(RGWDataSyncEnv *_sync_env,
+ RGWBucketInfo& _bucket_info, rgw_obj_key& _key,
+ PSEnvRef _env, std::optional<uint64_t> _versioned_epoch) : RGWCoroutine(_sync_env->cct),
+ sync_env(_sync_env),
+ bucket_info(_bucket_info),
+ key(_key),
+ env(_env),
+ versioned_epoch(_versioned_epoch) {
+ }
+
+ ~RGWPSHandleObjCreateCR() override {}
+
+ int operate() override {
+ reenter(this) {
+ yield call(new RGWPSFindBucketTopicsCR(sync_env, env, bucket_info.owner,
+ bucket_info.bucket, key,
+ rgw::notify::ObjectCreated,
+ &topics));
+ if (retcode < 0) {
+ ldout(sync_env->cct, 1) << "ERROR: RGWPSFindBucketTopicsCR returned ret=" << retcode << dendl;
+ return set_cr_error(retcode);
+ }
+ if (topics->empty()) {
+ ldout(sync_env->cct, 20) << "no topics found for " << bucket_info.bucket << "/" << key << dendl;
+ return set_cr_done();
+ }
+ yield call(new RGWPSHandleRemoteObjCR(sync_env, bucket_info, key, env, versioned_epoch, topics));
+ if (retcode < 0) {
+ return set_cr_error(retcode);
+ }
+ return set_cr_done();
+ }
+ return 0;
+ }
+};
+
+// coroutine invoked on remote object deletion
+class RGWPSGenericObjEventCBCR : public RGWCoroutine {
+ RGWDataSyncEnv *sync_env;
+ PSEnvRef env;
+ rgw_user owner;
+ rgw_bucket bucket;
+ rgw_obj_key key;
+ ceph::real_time mtime;
+ rgw::notify::EventType event_type;
+ EventRef<rgw_pubsub_event> event;
+ EventRef<rgw_pubsub_s3_record> record;
+ TopicsRef topics;
+public:
+ RGWPSGenericObjEventCBCR(RGWDataSyncEnv *_sync_env,
+ PSEnvRef _env,
+ RGWBucketInfo& _bucket_info, rgw_obj_key& _key, const ceph::real_time& _mtime,
+ rgw::notify::EventType _event_type) : RGWCoroutine(_sync_env->cct),
+ sync_env(_sync_env),
+ env(_env),
+ owner(_bucket_info.owner),
+ bucket(_bucket_info.bucket),
+ key(_key),
+ mtime(_mtime), event_type(_event_type) {}
+ int operate() override {
+ reenter(this) {
+ ldout(sync_env->cct, 20) << ": remove remote obj: z=" << sync_env->source_zone
+ << " b=" << bucket << " k=" << key << " mtime=" << mtime << dendl;
+ yield call(new RGWPSFindBucketTopicsCR(sync_env, env, owner, bucket, key, event_type, &topics));
+ if (retcode < 0) {
+ ldout(sync_env->cct, 1) << "ERROR: RGWPSFindBucketTopicsCR returned ret=" << retcode << dendl;
+ return set_cr_error(retcode);
+ }
+ if (topics->empty()) {
+ ldout(sync_env->cct, 20) << "no topics found for " << bucket << "/" << key << dendl;
+ return set_cr_done();
+ }
+ // at this point we don't know whether we need the ceph event or S3 record
+ // this is why both are created here, once we have information about the
+ // subscription, we will store/push only the relevant ones
+ make_event_ref(sync_env->cct,
+ bucket, key,
+ mtime, nullptr,
+ event_type, &event);
+ make_s3_record_ref(sync_env->cct,
+ bucket, owner, key,
+ mtime, nullptr,
+ event_type, &record);
+ yield call(new RGWPSHandleObjEventCR(sync_env, env, owner, event, record, topics));
+ if (retcode < 0) {
+ return set_cr_error(retcode);
+ }
+ return set_cr_done();
+ }
+ return 0;
+ }
+
+};
+
+class RGWPSDataSyncModule : public RGWDataSyncModule {
+ PSEnvRef env;
+ PSConfigRef& conf;
+
+public:
+ RGWPSDataSyncModule(CephContext *cct, const JSONFormattable& config) : env(std::make_shared<PSEnv>()), conf(env->conf) {
+ env->init(cct, config);
+ }
+
+ ~RGWPSDataSyncModule() override {}
+
+ void init(RGWDataSyncEnv *sync_env, uint64_t instance_id) override {
+ PSManagerRef mgr = PSManager::get_shared(sync_env, env);
+ env->init_instance(sync_env->store->svc.zone->get_realm(), instance_id, mgr);
+ }
+
+ RGWCoroutine *start_sync(RGWDataSyncEnv *sync_env) override {
+ ldout(sync_env->cct, 5) << conf->id << ": start" << dendl;
+ return new RGWPSInitEnvCBCR(sync_env, env);
+ }
+
+ RGWCoroutine *sync_object(RGWDataSyncEnv *sync_env, RGWBucketInfo& bucket_info,
+ rgw_obj_key& key, std::optional<uint64_t> versioned_epoch, rgw_zone_set *zones_trace) override {
+ ldout(sync_env->cct, 10) << conf->id << ": sync_object: b=" << bucket_info.bucket <<
+ " k=" << key << " versioned_epoch=" << versioned_epoch.value_or(0) << dendl;
+ return new RGWPSHandleObjCreateCR(sync_env, bucket_info, key, env, versioned_epoch);
+ }
+
+ RGWCoroutine *remove_object(RGWDataSyncEnv *sync_env, RGWBucketInfo& bucket_info,
+ rgw_obj_key& key, real_time& mtime, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) override {
+ ldout(sync_env->cct, 10) << conf->id << ": rm_object: b=" << bucket_info.bucket <<
+ " k=" << key << " mtime=" << mtime << " versioned=" << versioned << " versioned_epoch=" << versioned_epoch << dendl;
+ return new RGWPSGenericObjEventCBCR(sync_env, env, bucket_info, key, mtime, rgw::notify::ObjectRemovedDelete);
+ }
+
+ RGWCoroutine *create_delete_marker(RGWDataSyncEnv *sync_env, RGWBucketInfo& bucket_info,
+ rgw_obj_key& key, real_time& mtime, rgw_bucket_entry_owner& owner, bool versioned, uint64_t versioned_epoch, rgw_zone_set *zones_trace) override {
+ ldout(sync_env->cct, 10) << conf->id << ": create_delete_marker: b=" << bucket_info.bucket <<
+ " k=" << key << " mtime=" << mtime << " versioned=" << versioned << " versioned_epoch=" << versioned_epoch << dendl;
+ return new RGWPSGenericObjEventCBCR(sync_env, env, bucket_info, key, mtime, rgw::notify::ObjectRemovedDeleteMarkerCreated);
+ }
+
+ PSConfigRef& get_conf() { return conf; }
+};
+
+RGWPSSyncModuleInstance::RGWPSSyncModuleInstance(CephContext *cct, const JSONFormattable& config)
+{
+ data_handler = std::unique_ptr<RGWPSDataSyncModule>(new RGWPSDataSyncModule(cct, config));
+ const std::string jconf = json_str("conf", *data_handler->get_conf());
+ JSONParser p;
+ if (!p.parse(jconf.c_str(), jconf.size())) {
+ ldout(cct, 1) << "ERROR: failed to parse sync module effective conf: " << jconf << dendl;
+ effective_conf = config;
+ } else {
+ effective_conf.decode_json(&p);
+ }
+#ifdef WITH_RADOSGW_AMQP_ENDPOINT
+ if (!rgw::amqp::init(cct)) {
+ ldout(cct, 1) << "ERROR: failed to initialize AMQP manager in pubsub sync module" << dendl;
+ }
+#endif
+#ifdef WITH_RADOSGW_KAFKA_ENDPOINT
+ if (!rgw::kafka::init(cct)) {
+ ldout(cct, 1) << "ERROR: failed to initialize Kafka manager in pubsub sync module" << dendl;
+ }
+#endif
+}
+
+RGWPSSyncModuleInstance::~RGWPSSyncModuleInstance() {
+#ifdef WITH_RADOSGW_AMQP_ENDPOINT
+ rgw::amqp::shutdown();
+#endif
+#ifdef WITH_RADOSGW_KAFKA_ENDPOINT
+ rgw::kafka::shutdown();
+#endif
+}
+
+RGWDataSyncModule *RGWPSSyncModuleInstance::get_data_handler()
+{
+ return data_handler.get();
+}
+
+RGWRESTMgr *RGWPSSyncModuleInstance::get_rest_filter(int dialect, RGWRESTMgr *orig) {
+ if (dialect != RGW_REST_S3) {
+ return orig;
+ }
+ return new RGWRESTMgr_PubSub();
+}
+
+bool RGWPSSyncModuleInstance::should_full_sync() const {
+ return data_handler->get_conf()->start_with_full_sync;
+}
+
+int RGWPSSyncModule::create_instance(CephContext *cct, const JSONFormattable& config, RGWSyncModuleInstanceRef *instance) {
+ instance->reset(new RGWPSSyncModuleInstance(cct, config));
+ return 0;
+}
+
+
diff --git a/src/rgw/rgw_sync_module_pubsub.h b/src/rgw/rgw_sync_module_pubsub.h
new file mode 100644
index 00000000..68d39786
--- /dev/null
+++ b/src/rgw/rgw_sync_module_pubsub.h
@@ -0,0 +1,40 @@
+#ifndef CEPH_RGW_SYNC_MODULE_PUBSUB_H
+#define CEPH_RGW_SYNC_MODULE_PUBSUB_H
+
+#include "rgw_sync_module.h"
+
+class RGWPSSyncModule : public RGWSyncModule {
+public:
+ RGWPSSyncModule() {}
+ bool supports_data_export() override {
+ return false;
+ }
+ bool supports_writes() override {
+ return true;
+ }
+ int create_instance(CephContext *cct, const JSONFormattable& config, RGWSyncModuleInstanceRef *instance) override;
+};
+
+class RGWPSDataSyncModule;
+class RGWRESTConn;
+
+class RGWPSSyncModuleInstance : public RGWSyncModuleInstance {
+ std::unique_ptr<RGWPSDataSyncModule> data_handler;
+ JSONFormattable effective_conf;
+public:
+ RGWPSSyncModuleInstance(CephContext *cct, const JSONFormattable& config);
+ ~RGWPSSyncModuleInstance();
+ RGWDataSyncModule *get_data_handler() override;
+ RGWRESTMgr *get_rest_filter(int dialect, RGWRESTMgr *orig) override;
+ bool supports_user_writes() override {
+ return true;
+ }
+ const JSONFormattable& get_effective_conf() {
+ return effective_conf;
+ }
+ // start with full sync based on configuration
+ // default to incremental only
+ virtual bool should_full_sync() const override;
+};
+
+#endif
diff --git a/src/rgw/rgw_sync_module_pubsub_rest.cc b/src/rgw/rgw_sync_module_pubsub_rest.cc
new file mode 100644
index 00000000..aec5a346
--- /dev/null
+++ b/src/rgw/rgw_sync_module_pubsub_rest.cc
@@ -0,0 +1,526 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#include <algorithm>
+#include "rgw_rest_pubsub_common.h"
+#include "rgw_rest_pubsub.h"
+#include "rgw_sync_module_pubsub.h"
+#include "rgw_pubsub_push.h"
+#include "rgw_sync_module_pubsub_rest.h"
+#include "rgw_pubsub.h"
+#include "rgw_op.h"
+#include "rgw_rest.h"
+#include "rgw_rest_s3.h"
+#include "rgw_arn.h"
+#include "rgw_zone.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+// command: PUT /topics/<topic-name>[&push-endpoint=<endpoint>[&<arg1>=<value1>]]
+class RGWPSCreateTopic_ObjStore : public RGWPSCreateTopicOp {
+public:
+ int get_params() override {
+
+ topic_name = s->object.name;
+
+ opaque_data = s->info.args.get("OpaqueData");
+ dest.push_endpoint = s->info.args.get("push-endpoint");
+
+ if (!validate_and_update_endpoint_secret(dest, s->cct, *(s->info.env))) {
+ return -EINVAL;
+ }
+ dest.push_endpoint_args = s->info.args.get_str();
+ // dest object only stores endpoint info
+ // bucket to store events/records will be set only when subscription is created
+ dest.bucket_name = "";
+ dest.oid_prefix = "";
+ dest.arn_topic = topic_name;
+ // the topic ARN will be sent in the reply
+ const rgw::ARN arn(rgw::Partition::aws, rgw::Service::sns,
+ store->svc.zone->get_zonegroup().get_name(),
+ s->user->user_id.tenant, topic_name);
+ topic_arn = arn.to_string();
+ return 0;
+ }
+
+ void send_response() override {
+ if (op_ret) {
+ set_req_state_err(s, op_ret);
+ }
+ dump_errno(s);
+ end_header(s, this, "application/json");
+
+ if (op_ret < 0) {
+ return;
+ }
+
+ {
+ Formatter::ObjectSection section(*s->formatter, "result");
+ encode_json("arn", topic_arn, s->formatter);
+ }
+ rgw_flush_formatter_and_reset(s, s->formatter);
+ }
+};
+
+// command: GET /topics
+class RGWPSListTopics_ObjStore : public RGWPSListTopicsOp {
+public:
+ void send_response() override {
+ if (op_ret) {
+ set_req_state_err(s, op_ret);
+ }
+ dump_errno(s);
+ end_header(s, this, "application/json");
+
+ if (op_ret < 0) {
+ return;
+ }
+
+ encode_json("result", result, s->formatter);
+ rgw_flush_formatter_and_reset(s, s->formatter);
+ }
+};
+
+// command: GET /topics/<topic-name>
+class RGWPSGetTopic_ObjStore : public RGWPSGetTopicOp {
+public:
+ int get_params() override {
+ topic_name = s->object.name;
+ return 0;
+ }
+
+ void send_response() override {
+ if (op_ret) {
+ set_req_state_err(s, op_ret);
+ }
+ dump_errno(s);
+ end_header(s, this, "application/json");
+
+ if (op_ret < 0) {
+ return;
+ }
+
+ encode_json("result", result, s->formatter);
+ rgw_flush_formatter_and_reset(s, s->formatter);
+ }
+};
+
+// command: DELETE /topics/<topic-name>
+class RGWPSDeleteTopic_ObjStore : public RGWPSDeleteTopicOp {
+public:
+ int get_params() override {
+ topic_name = s->object.name;
+ return 0;
+ }
+};
+
+// ceph specifc topics handler factory
+class RGWHandler_REST_PSTopic : public RGWHandler_REST_S3 {
+protected:
+ int init_permissions(RGWOp* op) override {
+ return 0;
+ }
+
+ int read_permissions(RGWOp* op) override {
+ return 0;
+ }
+
+ bool supports_quota() override {
+ return false;
+ }
+
+ RGWOp *op_get() override {
+ if (s->init_state.url_bucket.empty()) {
+ return nullptr;
+ }
+ if (s->object.empty()) {
+ return new RGWPSListTopics_ObjStore();
+ }
+ return new RGWPSGetTopic_ObjStore();
+ }
+ RGWOp *op_put() override {
+ if (!s->object.empty()) {
+ return new RGWPSCreateTopic_ObjStore();
+ }
+ return nullptr;
+ }
+ RGWOp *op_delete() override {
+ if (!s->object.empty()) {
+ return new RGWPSDeleteTopic_ObjStore();
+ }
+ return nullptr;
+ }
+public:
+ explicit RGWHandler_REST_PSTopic(const rgw::auth::StrategyRegistry& auth_registry) : RGWHandler_REST_S3(auth_registry) {}
+ virtual ~RGWHandler_REST_PSTopic() = default;
+};
+
+// command: PUT /subscriptions/<sub-name>?topic=<topic-name>[&push-endpoint=<endpoint>[&<arg1>=<value1>]]...
+class RGWPSCreateSub_ObjStore : public RGWPSCreateSubOp {
+public:
+ int get_params() override {
+ sub_name = s->object.name;
+
+ bool exists;
+ topic_name = s->info.args.get("topic", &exists);
+ if (!exists) {
+ ldout(s->cct, 1) << "missing required param 'topic'" << dendl;
+ return -EINVAL;
+ }
+
+ const auto psmodule = static_cast<RGWPSSyncModuleInstance*>(store->get_sync_module().get());
+ const auto& conf = psmodule->get_effective_conf();
+
+ dest.push_endpoint = s->info.args.get("push-endpoint");
+ if (!validate_and_update_endpoint_secret(dest, s->cct, *(s->info.env))) {
+ return -EINVAL;
+ }
+ dest.push_endpoint_args = s->info.args.get_str();
+ dest.bucket_name = string(conf["data_bucket_prefix"]) + s->owner.get_id().to_str() + "-" + topic_name;
+ dest.oid_prefix = string(conf["data_oid_prefix"]) + sub_name + "/";
+ dest.arn_topic = topic_name;
+
+ return 0;
+ }
+};
+
+// command: GET /subscriptions/<sub-name>
+class RGWPSGetSub_ObjStore : public RGWPSGetSubOp {
+public:
+ int get_params() override {
+ sub_name = s->object.name;
+ return 0;
+ }
+ void send_response() override {
+ if (op_ret) {
+ set_req_state_err(s, op_ret);
+ }
+ dump_errno(s);
+ end_header(s, this, "application/json");
+
+ if (op_ret < 0) {
+ return;
+ }
+
+ encode_json("result", result, s->formatter);
+ rgw_flush_formatter_and_reset(s, s->formatter);
+ }
+};
+
+// command: DELETE /subscriptions/<sub-name>
+class RGWPSDeleteSub_ObjStore : public RGWPSDeleteSubOp {
+public:
+ int get_params() override {
+ sub_name = s->object.name;
+ topic_name = s->info.args.get("topic");
+ return 0;
+ }
+};
+
+// command: POST /subscriptions/<sub-name>?ack&event-id=<event-id>
+class RGWPSAckSubEvent_ObjStore : public RGWPSAckSubEventOp {
+public:
+ explicit RGWPSAckSubEvent_ObjStore() {}
+
+ int get_params() override {
+ sub_name = s->object.name;
+
+ bool exists;
+
+ event_id = s->info.args.get("event-id", &exists);
+ if (!exists) {
+ ldout(s->cct, 1) << "missing required param 'event-id'" << dendl;
+ return -EINVAL;
+ }
+ return 0;
+ }
+};
+
+// command: GET /subscriptions/<sub-name>?events[&max-entries=<max-entries>][&marker=<marker>]
+class RGWPSPullSubEvents_ObjStore : public RGWPSPullSubEventsOp {
+public:
+ int get_params() override {
+ sub_name = s->object.name;
+ marker = s->info.args.get("marker");
+ const int ret = s->info.args.get_int("max-entries", &max_entries,
+ RGWUserPubSub::Sub::DEFAULT_MAX_EVENTS);
+ if (ret < 0) {
+ ldout(s->cct, 1) << "failed to parse 'max-entries' param" << dendl;
+ return -EINVAL;
+ }
+ return 0;
+ }
+
+ void send_response() override {
+ if (op_ret) {
+ set_req_state_err(s, op_ret);
+ }
+ dump_errno(s);
+ end_header(s, this, "application/json");
+
+ if (op_ret < 0) {
+ return;
+ }
+
+ encode_json("result", *sub, s->formatter);
+ rgw_flush_formatter_and_reset(s, s->formatter);
+ }
+};
+
+// subscriptions handler factory
+class RGWHandler_REST_PSSub : public RGWHandler_REST_S3 {
+protected:
+ int init_permissions(RGWOp* op) override {
+ return 0;
+ }
+
+ int read_permissions(RGWOp* op) override {
+ return 0;
+ }
+ bool supports_quota() override {
+ return false;
+ }
+ RGWOp *op_get() override {
+ if (s->object.empty()) {
+ return nullptr;
+ }
+ if (s->info.args.exists("events")) {
+ return new RGWPSPullSubEvents_ObjStore();
+ }
+ return new RGWPSGetSub_ObjStore();
+ }
+ RGWOp *op_put() override {
+ if (!s->object.empty()) {
+ return new RGWPSCreateSub_ObjStore();
+ }
+ return nullptr;
+ }
+ RGWOp *op_delete() override {
+ if (!s->object.empty()) {
+ return new RGWPSDeleteSub_ObjStore();
+ }
+ return nullptr;
+ }
+ RGWOp *op_post() override {
+ if (s->info.args.exists("ack")) {
+ return new RGWPSAckSubEvent_ObjStore();
+ }
+ return nullptr;
+ }
+public:
+ explicit RGWHandler_REST_PSSub(const rgw::auth::StrategyRegistry& auth_registry) : RGWHandler_REST_S3(auth_registry) {}
+ virtual ~RGWHandler_REST_PSSub() = default;
+};
+
+namespace {
+// extract bucket name from ceph specific notification command, with the format:
+// /notifications/<bucket-name>
+int notif_bucket_path(const string& path, std::string& bucket_name) {
+ if (path.empty()) {
+ return -EINVAL;
+ }
+ size_t pos = path.find('/');
+ if (pos == string::npos) {
+ return -EINVAL;
+ }
+ if (pos >= path.size()) {
+ return -EINVAL;
+ }
+
+ string type = path.substr(0, pos);
+ if (type != "bucket") {
+ return -EINVAL;
+ }
+
+ bucket_name = path.substr(pos + 1);
+ return 0;
+}
+}
+
+// command (ceph specific): PUT /notification/bucket/<bucket name>?topic=<topic name>
+class RGWPSCreateNotif_ObjStore : public RGWPSCreateNotifOp {
+private:
+ std::string topic_name;
+ rgw::notify::EventTypeList events;
+
+ int get_params() override {
+ bool exists;
+ topic_name = s->info.args.get("topic", &exists);
+ if (!exists) {
+ ldout(s->cct, 1) << "missing required param 'topic'" << dendl;
+ return -EINVAL;
+ }
+
+ std::string events_str = s->info.args.get("events", &exists);
+ if (!exists) {
+ // if no events are provided, we notify on all of them
+ events_str = "OBJECT_CREATE,OBJECT_DELETE,DELETE_MARKER_CREATE";
+ }
+ rgw::notify::from_string_list(events_str, events);
+ if (std::find(events.begin(), events.end(), rgw::notify::UnknownEvent) != events.end()) {
+ ldout(s->cct, 1) << "invalid event type in list: " << events_str << dendl;
+ return -EINVAL;
+ }
+ return notif_bucket_path(s->object.name, bucket_name);
+ }
+
+public:
+ const char* name() const override { return "pubsub_notification_create"; }
+ void execute() override;
+};
+
+void RGWPSCreateNotif_ObjStore::execute()
+{
+ ups.emplace(store, s->owner.get_id());
+
+ auto b = ups->get_bucket(bucket_info.bucket);
+ op_ret = b->create_notification(topic_name, events);
+ if (op_ret < 0) {
+ ldout(s->cct, 1) << "failed to create notification for topic '" << topic_name << "', ret=" << op_ret << dendl;
+ return;
+ }
+ ldout(s->cct, 20) << "successfully created notification for topic '" << topic_name << "'" << dendl;
+}
+
+// command: DELETE /notifications/bucket/<bucket>?topic=<topic-name>
+class RGWPSDeleteNotif_ObjStore : public RGWPSDeleteNotifOp {
+private:
+ std::string topic_name;
+
+ int get_params() override {
+ bool exists;
+ topic_name = s->info.args.get("topic", &exists);
+ if (!exists) {
+ ldout(s->cct, 1) << "missing required param 'topic'" << dendl;
+ return -EINVAL;
+ }
+ return notif_bucket_path(s->object.name, bucket_name);
+ }
+
+public:
+ void execute() override;
+ const char* name() const override { return "pubsub_notification_delete"; }
+};
+
+void RGWPSDeleteNotif_ObjStore::execute() {
+ op_ret = get_params();
+ if (op_ret < 0) {
+ return;
+ }
+
+ ups.emplace(store, s->owner.get_id());
+ auto b = ups->get_bucket(bucket_info.bucket);
+ op_ret = b->remove_notification(topic_name);
+ if (op_ret < 0) {
+ ldout(s->cct, 1) << "failed to remove notification from topic '" << topic_name << "', ret=" << op_ret << dendl;
+ return;
+ }
+ ldout(s->cct, 20) << "successfully removed notification from topic '" << topic_name << "'" << dendl;
+}
+
+// command: GET /notifications/bucket/<bucket>
+class RGWPSListNotifs_ObjStore : public RGWPSListNotifsOp {
+private:
+ rgw_pubsub_bucket_topics result;
+
+ int get_params() override {
+ return notif_bucket_path(s->object.name, bucket_name);
+ }
+
+public:
+ void execute() override;
+ void send_response() override {
+ if (op_ret) {
+ set_req_state_err(s, op_ret);
+ }
+ dump_errno(s);
+ end_header(s, this, "application/json");
+
+ if (op_ret < 0) {
+ return;
+ }
+ encode_json("result", result, s->formatter);
+ rgw_flush_formatter_and_reset(s, s->formatter);
+ }
+ const char* name() const override { return "pubsub_notifications_list"; }
+};
+
+void RGWPSListNotifs_ObjStore::execute()
+{
+ ups.emplace(store, s->owner.get_id());
+ auto b = ups->get_bucket(bucket_info.bucket);
+ op_ret = b->get_topics(&result);
+ if (op_ret < 0) {
+ ldout(s->cct, 1) << "failed to get topics, ret=" << op_ret << dendl;
+ return;
+ }
+}
+
+// ceph specific notification handler factory
+class RGWHandler_REST_PSNotifs : public RGWHandler_REST_S3 {
+protected:
+ int init_permissions(RGWOp* op) override {
+ return 0;
+ }
+
+ int read_permissions(RGWOp* op) override {
+ return 0;
+ }
+ bool supports_quota() override {
+ return false;
+ }
+ RGWOp *op_get() override {
+ if (s->object.empty()) {
+ return nullptr;
+ }
+ return new RGWPSListNotifs_ObjStore();
+ }
+ RGWOp *op_put() override {
+ if (!s->object.empty()) {
+ return new RGWPSCreateNotif_ObjStore();
+ }
+ return nullptr;
+ }
+ RGWOp *op_delete() override {
+ if (!s->object.empty()) {
+ return new RGWPSDeleteNotif_ObjStore();
+ }
+ return nullptr;
+ }
+public:
+ explicit RGWHandler_REST_PSNotifs(const rgw::auth::StrategyRegistry& auth_registry) : RGWHandler_REST_S3(auth_registry) {}
+ virtual ~RGWHandler_REST_PSNotifs() = default;
+};
+
+// factory for ceph specific PubSub REST handlers
+RGWHandler_REST* RGWRESTMgr_PubSub::get_handler(struct req_state* const s,
+ const rgw::auth::StrategyRegistry& auth_registry,
+ const std::string& frontend_prefix)
+{
+ if (RGWHandler_REST_S3::init_from_header(s, RGW_FORMAT_JSON, true) < 0) {
+ return nullptr;
+ }
+
+ RGWHandler_REST* handler{nullptr};
+
+ // ceph specific PubSub API: topics/subscriptions/notification are reserved bucket names
+ // this API is available only on RGW that belong to a pubsub zone
+ if (s->init_state.url_bucket == "topics") {
+ handler = new RGWHandler_REST_PSTopic(auth_registry);
+ } else if (s->init_state.url_bucket == "subscriptions") {
+ handler = new RGWHandler_REST_PSSub(auth_registry);
+ } else if (s->init_state.url_bucket == "notifications") {
+ handler = new RGWHandler_REST_PSNotifs(auth_registry);
+ } else if (s->info.args.exists("notification")) {
+ const int ret = RGWHandler_REST::allocate_formatter(s, RGW_FORMAT_XML, true);
+ if (ret == 0) {
+ handler = new RGWHandler_REST_PSNotifs_S3(auth_registry);
+ }
+ }
+
+ ldout(s->cct, 20) << __func__ << " handler=" << (handler ? typeid(*handler).name() : "<null>") << dendl;
+
+ return handler;
+}
+
diff --git a/src/rgw/rgw_sync_module_pubsub_rest.h b/src/rgw/rgw_sync_module_pubsub_rest.h
new file mode 100644
index 00000000..92fd8fe7
--- /dev/null
+++ b/src/rgw/rgw_sync_module_pubsub_rest.h
@@ -0,0 +1,13 @@
+#ifndef CEPH_RGW_SYNC_MODULE_PUBSUB_REST_H
+#define CEPH_RGW_SYNC_MODULE_PUBSUB_REST_H
+
+#include "rgw_rest.h"
+
+class RGWRESTMgr_PubSub : public RGWRESTMgr {
+public:
+ virtual RGWHandler_REST* get_handler(struct req_state* s,
+ const rgw::auth::StrategyRegistry& auth_registry,
+ const std::string& frontend_prefix) override;
+};
+
+#endif
diff --git a/src/rgw/rgw_sync_trace.cc b/src/rgw/rgw_sync_trace.cc
new file mode 100644
index 00000000..34aa00e9
--- /dev/null
+++ b/src/rgw/rgw_sync_trace.cc
@@ -0,0 +1,288 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_SYNC_TRACE_H
+#define CEPH_RGW_SYNC_TRACE_H
+
+#include <regex>
+
+#include "common/debug.h"
+#include "common/ceph_json.h"
+
+#include "rgw_sync_trace.h"
+#include "rgw_rados.h"
+
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw_sync
+
+RGWSyncTraceNode::RGWSyncTraceNode(CephContext *_cct, uint64_t _handle,
+ const RGWSyncTraceNodeRef& _parent,
+ const string& _type, const string& _id) : cct(_cct),
+ parent(_parent),
+ type(_type),
+ id(_id),
+ handle(_handle),
+ history(cct->_conf->rgw_sync_trace_per_node_log_size)
+{
+ if (parent.get()) {
+ prefix = parent->get_prefix();
+ }
+
+ if (!type.empty()) {
+ prefix += type;
+ if (!id.empty()) {
+ prefix += "[" + id + "]";
+ }
+ prefix += ":";
+ }
+}
+
+void RGWSyncTraceNode::log(int level, const string& s)
+{
+ status = s;
+ history.push_back(status);
+ /* dump output on either rgw_sync, or rgw -- but only once */
+ if (cct->_conf->subsys.should_gather(ceph_subsys_rgw_sync, level)) {
+ lsubdout(cct, rgw_sync,
+ ceph::dout::need_dynamic(level)) << "RGW-SYNC:" << to_str() << dendl;
+ } else {
+ lsubdout(cct, rgw,
+ ceph::dout::need_dynamic(level)) << "RGW-SYNC:" << to_str() << dendl;
+ }
+}
+
+
+class RGWSyncTraceServiceMapThread : public RGWRadosThread {
+ RGWRados *store;
+ RGWSyncTraceManager *manager;
+
+ uint64_t interval_msec() override {
+ return cct->_conf->rgw_sync_trace_servicemap_update_interval * 1000;
+ }
+public:
+ RGWSyncTraceServiceMapThread(RGWRados *_store, RGWSyncTraceManager *_manager)
+ : RGWRadosThread(_store, "sync-trace"), store(_store), manager(_manager) {}
+
+ int process() override;
+};
+
+int RGWSyncTraceServiceMapThread::process()
+{
+ map<string, string> status;
+ status["current_sync"] = manager->get_active_names();
+ int ret = store->update_service_map(std::move(status));
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "ERROR: update_service_map() returned ret=" << ret << dendl;
+ }
+ return 0;
+}
+
+RGWSyncTraceNodeRef RGWSyncTraceManager::add_node(const RGWSyncTraceNodeRef& parent,
+ const std::string& type,
+ const std::string& id)
+{
+ shunique_lock wl(lock, ceph::acquire_unique);
+ auto handle = alloc_handle();
+ RGWSyncTraceNodeRef& ref = nodes[handle];
+ ref.reset(new RGWSyncTraceNode(cct, handle, parent, type, id));
+ // return a separate shared_ptr that calls finish() on the node instead of
+ // deleting it. the lambda capture holds a reference to the original 'ref'
+ auto deleter = [ref, this] (RGWSyncTraceNode *node) { finish_node(node); };
+ return {ref.get(), deleter};
+}
+
+bool RGWSyncTraceNode::match(const string& search_term, bool search_history)
+{
+ try {
+ std::regex expr(search_term);
+ std::smatch m;
+
+ if (regex_search(prefix, m, expr)) {
+ return true;
+ }
+ if (regex_search(status, m,expr)) {
+ return true;
+ }
+ if (!search_history) {
+ return false;
+ }
+
+ for (auto h : history) {
+ if (regex_search(h, m, expr)) {
+ return true;
+ }
+ }
+ } catch (const std::regex_error& e) {
+ ldout(cct, 5) << "NOTICE: sync trace: bad expression: bad regex search term" << dendl;
+ }
+
+ return false;
+}
+
+void RGWSyncTraceManager::init(RGWRados *store)
+{
+ service_map_thread = new RGWSyncTraceServiceMapThread(store, this);
+ service_map_thread->start();
+}
+
+RGWSyncTraceManager::~RGWSyncTraceManager()
+{
+ cct->get_admin_socket()->unregister_commands(this);
+ service_map_thread->stop();
+ delete service_map_thread;
+
+ nodes.clear();
+}
+
+int RGWSyncTraceManager::hook_to_admin_command()
+{
+ AdminSocket *admin_socket = cct->get_admin_socket();
+
+ admin_commands = { { "sync trace show", "sync trace show name=search,type=CephString,req=false", "sync trace show [filter_str]: show current multisite tracing information" },
+ { "sync trace history", "sync trace history name=search,type=CephString,req=false", "sync trace history [filter_str]: show history of multisite tracing information" },
+ { "sync trace active", "sync trace active name=search,type=CephString,req=false", "show active multisite sync entities information" },
+ { "sync trace active_short", "sync trace active_short name=search,type=CephString,req=false", "show active multisite sync entities entries" } };
+ for (auto cmd : admin_commands) {
+ int r = admin_socket->register_command(cmd[0], cmd[1], this,
+ cmd[2]);
+ if (r < 0) {
+ lderr(cct) << "ERROR: fail to register admin socket command (r=" << r << ")" << dendl;
+ return r;
+ }
+ }
+ return 0;
+}
+
+static void dump_node(RGWSyncTraceNode *entry, bool show_history, JSONFormatter& f)
+{
+ f.open_object_section("entry");
+ ::encode_json("status", entry->to_str(), &f);
+ if (show_history) {
+ f.open_array_section("history");
+ for (auto h : entry->get_history()) {
+ ::encode_json("entry", h, &f);
+ }
+ f.close_section();
+ }
+ f.close_section();
+}
+
+string RGWSyncTraceManager::get_active_names()
+{
+ shunique_lock rl(lock, ceph::acquire_shared);
+
+ stringstream ss;
+ JSONFormatter f;
+
+ f.open_array_section("result");
+ for (auto n : nodes) {
+ auto& entry = n.second;
+
+ if (!entry->test_flags(RGW_SNS_FLAG_ACTIVE)) {
+ continue;
+ }
+ const string& name = entry->get_resource_name();
+ if (!name.empty()) {
+ ::encode_json("entry", name, &f);
+ }
+ f.flush(ss);
+ }
+ f.close_section();
+ f.flush(ss);
+
+ return ss.str();
+}
+
+bool RGWSyncTraceManager::call(std::string_view command, const cmdmap_t& cmdmap,
+ std::string_view format, bufferlist& out) {
+
+ bool show_history = (command == "sync trace history");
+ bool show_short = (command == "sync trace active_short");
+ bool show_active = (command == "sync trace active") || show_short;
+
+ string search;
+
+ auto si = cmdmap.find("search");
+ if (si != cmdmap.end()) {
+ search = boost::get<string>(si->second);
+ }
+
+ shunique_lock rl(lock, ceph::acquire_shared);
+
+ stringstream ss;
+ JSONFormatter f(true);
+
+ f.open_object_section("result");
+ f.open_array_section("running");
+ for (auto n : nodes) {
+ auto& entry = n.second;
+
+ if (!search.empty() && !entry->match(search, show_history)) {
+ continue;
+ }
+ if (show_active && !entry->test_flags(RGW_SNS_FLAG_ACTIVE)) {
+ continue;
+ }
+ if (show_short) {
+ const string& name = entry->get_resource_name();
+ if (!name.empty()) {
+ ::encode_json("entry", name, &f);
+ }
+ } else {
+ dump_node(entry.get(), show_history, f);
+ }
+ f.flush(ss);
+ }
+ f.close_section();
+
+ f.open_array_section("complete");
+ for (auto& entry : complete_nodes) {
+ if (!search.empty() && !entry->match(search, show_history)) {
+ continue;
+ }
+ if (show_active && !entry->test_flags(RGW_SNS_FLAG_ACTIVE)) {
+ continue;
+ }
+ dump_node(entry.get(), show_history, f);
+ f.flush(ss);
+ }
+ f.close_section();
+
+ f.close_section();
+ f.flush(ss);
+ out.append(ss);
+
+ return true;
+}
+
+void RGWSyncTraceManager::finish_node(RGWSyncTraceNode *node)
+{
+ RGWSyncTraceNodeRef old_node;
+
+ {
+ shunique_lock wl(lock, ceph::acquire_unique);
+ if (!node) {
+ return;
+ }
+ auto iter = nodes.find(node->handle);
+ if (iter == nodes.end()) {
+ /* not found, already finished */
+ return;
+ }
+
+ if (complete_nodes.full()) {
+ /* take a reference to the entry that is going to be evicted,
+ * can't let it get evicted under lock held, otherwise
+ * it's a deadlock as it will call finish_node()
+ */
+ old_node = complete_nodes.front();
+ }
+
+ complete_nodes.push_back(iter->second);
+ nodes.erase(iter);
+ }
+};
+
+#endif
+
diff --git a/src/rgw/rgw_sync_trace.h b/src/rgw/rgw_sync_trace.h
new file mode 100644
index 00000000..d2925cf1
--- /dev/null
+++ b/src/rgw/rgw_sync_trace.h
@@ -0,0 +1,142 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_SYNC_LOG_H
+#define CEPH_RGW_SYNC_LOG_H
+
+#include <atomic>
+
+#include "common/Mutex.h"
+#include "common/shunique_lock.h"
+#include "common/admin_socket.h"
+
+#include <set>
+#include <ostream>
+#include <string>
+#include <shared_mutex>
+#include <boost/circular_buffer.hpp>
+
+#define SSTR(o) ({ \
+ std::stringstream ss; \
+ ss << o; \
+ ss.str(); \
+})
+
+#define RGW_SNS_FLAG_ACTIVE 1
+#define RGW_SNS_FLAG_ERROR 2
+
+class RGWRados;
+class RGWSyncTraceManager;
+class RGWSyncTraceNode;
+class RGWSyncTraceServiceMapThread;
+
+using RGWSyncTraceNodeRef = std::shared_ptr<RGWSyncTraceNode>;
+
+class RGWSyncTraceNode final {
+ friend class RGWSyncTraceManager;
+
+ CephContext *cct;
+ RGWSyncTraceNodeRef parent;
+
+ uint16_t state{0};
+ std::string status;
+
+ Mutex lock{"RGWSyncTraceNode::lock"};
+
+ std::string type;
+ std::string id;
+
+ std::string prefix;
+
+ std::string resource_name;
+
+ uint64_t handle;
+
+ boost::circular_buffer<string> history;
+
+ // private constructor, create with RGWSyncTraceManager::add_node()
+ RGWSyncTraceNode(CephContext *_cct, uint64_t _handle,
+ const RGWSyncTraceNodeRef& _parent,
+ const std::string& _type, const std::string& _id);
+
+ public:
+ void set_resource_name(const string& s) {
+ resource_name = s;
+ }
+
+ const string& get_resource_name() {
+ return resource_name;
+ }
+
+ void set_flag(uint16_t s) {
+ state |= s;
+ }
+ void unset_flag(uint16_t s) {
+ state &= ~s;
+ }
+ bool test_flags(uint16_t f) {
+ return (state & f) == f;
+ }
+ void log(int level, const std::string& s);
+
+ std::string to_str() {
+ return prefix + " " + status;
+ }
+
+ const string& get_prefix() {
+ return prefix;
+ }
+
+ std::ostream& operator<<(std::ostream& os) {
+ os << to_str();
+ return os;
+ }
+
+ boost::circular_buffer<string>& get_history() {
+ return history;
+ }
+
+ bool match(const string& search_term, bool search_history);
+};
+
+class RGWSyncTraceManager : public AdminSocketHook {
+ friend class RGWSyncTraceNode;
+
+ mutable std::shared_timed_mutex lock;
+ using shunique_lock = ceph::shunique_lock<decltype(lock)>;
+
+ CephContext *cct;
+ RGWSyncTraceServiceMapThread *service_map_thread{nullptr};
+
+ std::map<uint64_t, RGWSyncTraceNodeRef> nodes;
+ boost::circular_buffer<RGWSyncTraceNodeRef> complete_nodes;
+
+ std::atomic<uint64_t> count = { 0 };
+
+ std::list<std::array<string, 3> > admin_commands;
+
+ uint64_t alloc_handle() {
+ return ++count;
+ }
+ void finish_node(RGWSyncTraceNode *node);
+
+public:
+ RGWSyncTraceManager(CephContext *_cct, int max_lru) : cct(_cct), complete_nodes(max_lru) {}
+ ~RGWSyncTraceManager();
+
+ void init(RGWRados *store);
+
+ const RGWSyncTraceNodeRef root_node;
+
+ RGWSyncTraceNodeRef add_node(const RGWSyncTraceNodeRef& parent,
+ const std::string& type,
+ const std::string& id = "");
+
+ int hook_to_admin_command();
+ bool call(std::string_view command, const cmdmap_t& cmdmap,
+ std::string_view format, bufferlist& out) override;
+ string get_active_names();
+};
+
+
+#endif
diff --git a/src/rgw/rgw_tag.cc b/src/rgw/rgw_tag.cc
new file mode 100644
index 00000000..05c48bb1
--- /dev/null
+++ b/src/rgw/rgw_tag.cc
@@ -0,0 +1,59 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <map>
+#include <string>
+
+#include <common/errno.h>
+#include <boost/algorithm/string.hpp>
+
+#include "rgw_tag.h"
+#include "rgw_common.h"
+
+static constexpr uint32_t MAX_OBJ_TAGS=10;
+static constexpr uint32_t MAX_TAG_KEY_SIZE=128;
+static constexpr uint32_t MAX_TAG_VAL_SIZE=256;
+
+bool RGWObjTags::add_tag(const string&key, const string& val){
+ return tag_map.emplace(std::make_pair(key,val)).second;
+}
+
+bool RGWObjTags::emplace_tag(std::string&& key, std::string&& val){
+ return tag_map.emplace(std::move(key), std::move(val)).second;
+}
+
+int RGWObjTags::check_and_add_tag(const string&key, const string& val){
+ if (tag_map.size() == MAX_OBJ_TAGS ||
+ key.size() > MAX_TAG_KEY_SIZE ||
+ val.size() > MAX_TAG_VAL_SIZE ||
+ key.size() == 0){
+ return -ERR_INVALID_TAG;
+ }
+
+ // if we get a conflicting key, either the XML is malformed or the user
+ // supplied an invalid string
+ if (!add_tag(key,val))
+ return -EINVAL;
+
+ return 0;
+}
+
+int RGWObjTags::set_from_string(const string& input){
+ int ret=0;
+ vector <string> kvs;
+ boost::split(kvs, input, boost::is_any_of("&"));
+ for (const auto& kv: kvs){
+ auto p = kv.find("=");
+ string key,val;
+ if (p != string::npos) {
+ ret = check_and_add_tag(url_decode(kv.substr(0,p)),
+ url_decode(kv.substr(p+1)));
+ } else {
+ ret = check_and_add_tag(url_decode(kv));
+ }
+
+ if (ret < 0)
+ return ret;
+ }
+ return ret;
+}
diff --git a/src/rgw/rgw_tag.h b/src/rgw/rgw_tag.h
new file mode 100644
index 00000000..80a18ae3
--- /dev/null
+++ b/src/rgw/rgw_tag.h
@@ -0,0 +1,46 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RGW_TAG_H
+#define RGW_TAG_H
+
+#include <string>
+#include <include/types.h>
+#include <boost/container/flat_map.hpp>
+
+class RGWObjTags
+{
+public:
+ using tag_map_t = boost::container::flat_map <std::string, std::string>;
+
+protected:
+ tag_map_t tag_map;
+ public:
+ RGWObjTags() = default;
+ ~RGWObjTags() = default;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1,1,bl);
+ encode(tag_map, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator &bl) {
+ DECODE_START_LEGACY_COMPAT_LEN(1, 1, 1, bl);
+ decode(tag_map,bl);
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const;
+ bool add_tag(const std::string& key, const std::string& val="");
+ bool emplace_tag(std::string&& key, std::string&& val);
+ int check_and_add_tag(const std::string& key, const std::string& val="");
+ size_t count() const {return tag_map.size();}
+ int set_from_string(const std::string& input);
+ void clear() { tag_map.clear(); }
+ bool empty() const noexcept { return tag_map.empty(); }
+ const tag_map_t& get_tags() const {return tag_map;}
+};
+WRITE_CLASS_ENCODER(RGWObjTags)
+
+#endif /* RGW_TAG_H */
diff --git a/src/rgw/rgw_tag_s3.cc b/src/rgw/rgw_tag_s3.cc
new file mode 100644
index 00000000..c5ad87ca
--- /dev/null
+++ b/src/rgw/rgw_tag_s3.cc
@@ -0,0 +1,65 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <map>
+#include <string>
+#include <iostream>
+
+#include "include/types.h"
+
+#include "rgw_tag_s3.h"
+
+void RGWObjTagEntry_S3::decode_xml(XMLObj *obj) {
+ RGWXMLDecoder::decode_xml("Key", key, obj, true);
+ RGWXMLDecoder::decode_xml("Value", val, obj, true);
+}
+
+void RGWObjTagEntry_S3::dump_xml(Formatter *f) const {
+ encode_xml("Key", key, f);
+ encode_xml("Value", val, f);
+
+ if (key.empty()) {
+ throw RGWXMLDecoder::err("empty key");
+ }
+
+ if (val.empty()) {
+ throw RGWXMLDecoder::err("empty val");
+ }
+}
+
+void RGWObjTagSet_S3::decode_xml(XMLObj *obj) {
+ vector<RGWObjTagEntry_S3> entries;
+
+ RGWXMLDecoder::decode_xml("Tag", entries, obj, true);
+
+ for (auto& entry : entries) {
+ const std::string& key = entry.get_key();
+ const std::string& val = entry.get_val();
+ if (!add_tag(key,val)) {
+ throw RGWXMLDecoder::err("failed to add tag");
+ }
+ }
+}
+
+int RGWObjTagSet_S3::rebuild(RGWObjTags& dest) {
+ int ret;
+ for (const auto &it : tag_map){
+ ret = dest.check_and_add_tag(it.first, it.second);
+ if (ret < 0)
+ return ret;
+ }
+ return 0;
+}
+
+void RGWObjTagging_S3::decode_xml(XMLObj *obj) {
+ RGWXMLDecoder::decode_xml("TagSet", tagset, obj, true);
+}
+
+void RGWObjTagSet_S3::dump_xml(Formatter *f) const {
+ for (const auto& tag : tag_map){
+ Formatter::ObjectSection os(*f, "Tag");
+ encode_xml("Key", tag.first, f);
+ encode_xml("Value", tag.second, f);
+ }
+}
+
diff --git a/src/rgw/rgw_tag_s3.h b/src/rgw/rgw_tag_s3.h
new file mode 100644
index 00000000..7ed02277
--- /dev/null
+++ b/src/rgw/rgw_tag_s3.h
@@ -0,0 +1,53 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RGW_TAG_S3_H
+#define RGW_TAG_S3_H
+
+#include <map>
+#include <string>
+#include <iostream>
+#include <include/types.h>
+#include <common/Formatter.h>
+#include <expat.h>
+
+#include "rgw_tag.h"
+#include "rgw_xml.h"
+
+class RGWObjTagEntry_S3
+{
+ std::string key;
+ std::string val;
+public:
+ RGWObjTagEntry_S3() {}
+ RGWObjTagEntry_S3(const std::string &k, const std::string &v):key(k),val(v) {};
+ ~RGWObjTagEntry_S3() {}
+
+ const std::string& get_key () const { return key; }
+ const std::string& get_val () const { return val; }
+
+ void dump_xml(Formatter *f) const;
+ void decode_xml(XMLObj *obj);
+};
+
+class RGWObjTagSet_S3: public RGWObjTags
+{
+public:
+ int rebuild(RGWObjTags& dest);
+
+ void dump_xml(Formatter *f) const;
+ void decode_xml(XMLObj *obj);
+};
+
+class RGWObjTagging_S3
+{
+ RGWObjTagSet_S3 tagset;
+public:
+ void decode_xml(XMLObj *obj);
+ int rebuild(RGWObjTags& dest) {
+ return tagset.rebuild(dest);
+ }
+};
+
+
+#endif /* RGW_TAG_S3_H */
diff --git a/src/rgw/rgw_tar.h b/src/rgw/rgw_tar.h
new file mode 100644
index 00000000..b322a291
--- /dev/null
+++ b/src/rgw/rgw_tar.h
@@ -0,0 +1,156 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_TAR_H
+#define CEPH_RGW_TAR_H
+
+#include <algorithm>
+#include <array>
+#include <cstring>
+#include <tuple>
+#include <utility>
+
+#include <boost/optional.hpp>
+#include <boost/range/adaptor/reversed.hpp>
+#include <boost/utility/string_ref.hpp>
+
+namespace rgw {
+namespace tar {
+
+static constexpr size_t BLOCK_SIZE = 512;
+
+
+static inline std::pair<class StatusIndicator,
+ boost::optional<class HeaderView>>
+interpret_block(const StatusIndicator& status, ceph::bufferlist& bl);
+
+
+class StatusIndicator {
+ friend std::pair<class StatusIndicator,
+ boost::optional<class HeaderView>>
+ interpret_block(const StatusIndicator& status, ceph::bufferlist& bl);
+
+ bool is_empty;
+ bool is_eof;
+
+ StatusIndicator()
+ : is_empty(false),
+ is_eof(false) {
+ }
+
+ StatusIndicator(const StatusIndicator& prev_status,
+ const bool is_empty)
+ : is_empty(is_empty),
+ is_eof(is_empty && prev_status.empty()) {
+ }
+
+public:
+ bool empty() const {
+ return is_empty;
+ }
+
+ bool eof() const {
+ return is_eof;
+ }
+
+ static StatusIndicator create() {
+ return StatusIndicator();
+ }
+} /* class StatusIndicator */;
+
+
+enum class FileType : char {
+ UNKNOWN = '\0',
+
+ /* The tar format uses ASCII encoding. */
+ NORMAL_FILE = '0',
+ DIRECTORY = '5'
+}; /* enum class FileType */
+
+class HeaderView {
+protected:
+ /* Everythng is char here (ASCII encoding), so we don't need to worry about
+ * the struct padding. */
+ const struct header_t {
+ char filename[100];
+ char __filemode[8];
+ char __owner_id[8];
+ char __group_id[8];
+ char filesize[12];
+ char lastmod[12];
+ char checksum[8];
+ char filetype;
+ char __padding[355];
+ } *header;
+
+ static_assert(sizeof(*header) == BLOCK_SIZE,
+ "The TAR header must be exactly BLOCK_SIZE length");
+
+ /* The label is far more imporant from what the code really does. */
+ static size_t pos2len(const size_t pos) {
+ return pos + 1;
+ }
+
+public:
+ explicit HeaderView(const char (&header)[BLOCK_SIZE])
+ : header(reinterpret_cast<const header_t*>(header)) {
+ }
+
+ FileType get_filetype() const {
+ switch (header->filetype) {
+ case static_cast<char>(FileType::NORMAL_FILE):
+ return FileType::NORMAL_FILE;
+ case static_cast<char>(FileType::DIRECTORY):
+ return FileType::DIRECTORY;
+ default:
+ return FileType::UNKNOWN;
+ }
+ }
+
+ boost::string_ref get_filename() const {
+ return boost::string_ref(header->filename,
+ std::min(sizeof(header->filename),
+ strlen(header->filename)));
+ }
+
+ size_t get_filesize() const {
+ /* The string_ref is pretty suitable here because tar encodes its
+ * metadata in ASCII. */
+ const boost::string_ref raw(header->filesize, sizeof(header->filesize));
+
+ /* We need to find where the padding ends. */
+ const auto pad_ends_at = std::min(raw.find_last_not_of('\0'),
+ raw.find_last_not_of(' '));
+ const auto trimmed = raw.substr(0,
+ pad_ends_at == boost::string_ref::npos ? boost::string_ref::npos
+ : pos2len(pad_ends_at));
+
+ size_t sum = 0, mul = 1;
+ for (const char c : boost::adaptors::reverse(trimmed)) {
+ sum += (c - '0') * mul;
+ mul *= 8;
+ }
+
+ return sum;
+ }
+}; /* class Header */
+
+
+static inline std::pair<StatusIndicator,
+ boost::optional<HeaderView>>
+interpret_block(const StatusIndicator& status, ceph::bufferlist& bl) {
+ static constexpr std::array<char, BLOCK_SIZE> zero_block = {0, };
+ const char (&block)[BLOCK_SIZE] = \
+ reinterpret_cast<const char (&)[BLOCK_SIZE]>(*bl.c_str());
+
+ if (std::memcmp(zero_block.data(), block, BLOCK_SIZE) == 0) {
+ return std::make_pair(StatusIndicator(status, true), boost::none);
+ } else {
+ return std::make_pair(StatusIndicator(status, false), HeaderView(block));
+ }
+}
+
+} /* namespace tar */
+} /* namespace rgw */
+
+#endif /* CEPH_RGW_TAR_H */
diff --git a/src/rgw/rgw_token.cc b/src/rgw/rgw_token.cc
new file mode 100644
index 00000000..a5c6f76e
--- /dev/null
+++ b/src/rgw/rgw_token.cc
@@ -0,0 +1,143 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2016 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include <errno.h>
+#include <iostream>
+#include <sstream>
+#include <string>
+
+#include "common/config.h"
+#include "common/ceph_argparse.h"
+#include "common/debug.h"
+#include "global/global_init.h"
+#include "include/ceph_assert.h"
+#include "include/str_list.h"
+
+#include "rgw_token.h"
+#include "rgw_b64.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+namespace {
+
+ using namespace rgw;
+ using std::get;
+ using std::string;
+
+ RGWToken::token_type type{RGWToken::TOKEN_NONE};
+ string access_key{""};
+ string secret_key{""};
+
+ Formatter* formatter{nullptr};
+
+ bool verbose {false};
+ bool do_encode {false};
+ bool do_decode {false};
+
+}
+
+void usage()
+{
+ cout << "usage: radosgw-token --encode --ttype=<token type> [options...]" << std::endl;
+ cout << "\t(maybe exporting RGW_ACCESS_KEY_ID and RGW_SECRET_ACCESS_KEY)"
+ << std::endl;
+ cout << "\t <token type> := ad | ldap" << std::endl;
+ cout << "\n";
+ generic_client_usage();
+}
+
+int main(int argc, char **argv)
+{
+ std::string val;
+ vector<const char*> args;
+ argv_to_vec(argc, (const char **)argv, args);
+ if (args.empty()) {
+ cerr << argv[0] << ": -h or --help for usage" << std::endl;
+ exit(1);
+ }
+ if (ceph_argparse_need_usage(args)) {
+ usage();
+ exit(0);
+ }
+
+ auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT,
+ CODE_ENVIRONMENT_UTILITY, 0);
+ common_init_finish(g_ceph_context);
+
+ char *v{nullptr};
+ v = getenv("RGW_ACCESS_KEY_ID");
+ if (v) {
+ access_key = v;
+ }
+
+ v = getenv("RGW_SECRET_ACCESS_KEY");
+ if (v) {
+ secret_key = v;
+ }
+
+ for (auto arg_iter = args.begin(); arg_iter != args.end();) {
+ if (ceph_argparse_witharg(args, arg_iter, &val, "--access",
+ (char*) nullptr)) {
+ access_key = val;
+ } else if (ceph_argparse_witharg(args, arg_iter, &val, "--secret",
+ (char*) nullptr)) {
+ secret_key = val;
+ } else if (ceph_argparse_witharg(args, arg_iter, &val, "--ttype",
+ (char*) nullptr)) {
+ for (const auto& ttype : {"ad", "ldap"}) {
+ if (boost::iequals(val, ttype)) {
+ type = RGWToken::to_type(val);
+ break;
+ }
+ }
+ } else if (ceph_argparse_flag(args, arg_iter, "--encode",
+ (char*) nullptr)) {
+ do_encode = true;
+ } else if (ceph_argparse_flag(args, arg_iter, "--decode",
+ (char*) nullptr)) {
+ do_decode = true;
+ } else if (ceph_argparse_flag(args, arg_iter, "--verbose",
+ (char*) nullptr)) {
+ verbose = true;
+ } else {
+ ++arg_iter;
+ }
+ }
+
+ if ((! do_encode) ||
+ (type == RGWToken::TOKEN_NONE)) {
+ return -EINVAL;
+ }
+
+ formatter = new JSONFormatter(true /* pretty */);
+
+ RGWToken token(type, access_key, secret_key);
+ if (do_encode) {
+ token.encode_json(formatter);
+ std::ostringstream os;
+ formatter->flush(os);
+ string token_str = os.str();
+ if (verbose) {
+ std::cout << "expanded token: " << token_str << std::endl;
+ if (do_decode) {
+ RGWToken token2(token_str);
+ std::cout << "decoded expanded token: " << token2 << std::endl;
+ }
+ }
+ std::cout << to_base64(token_str) << std::endl;
+ }
+
+ return 0;
+}
diff --git a/src/rgw/rgw_token.h b/src/rgw/rgw_token.h
new file mode 100644
index 00000000..8f50133d
--- /dev/null
+++ b/src/rgw/rgw_token.h
@@ -0,0 +1,169 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2016 Red Hat, Inc
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef RGW_TOKEN_H
+#define RGW_TOKEN_H
+
+#include <stdint.h>
+#include <boost/algorithm/string.hpp>
+#include <sstream>
+
+#include "common/ceph_json.h"
+#include "common/Formatter.h"
+#include "rgw/rgw_b64.h"
+
+namespace rgw {
+
+ using std::string;
+
+ class RGWToken {
+ public:
+ static constexpr auto type_name = "RGW_TOKEN";
+
+ enum token_type : uint32_t {
+ TOKEN_NONE,
+ TOKEN_AD,
+ TOKEN_KEYSTONE,
+ TOKEN_LDAP,
+ };
+
+ static enum token_type to_type(const string& s) {
+ if (boost::iequals(s, "ad"))
+ return TOKEN_AD;
+ if (boost::iequals(s, "ldap"))
+ return TOKEN_LDAP;
+ if (boost::iequals(s, "keystone"))
+ return TOKEN_KEYSTONE;
+ return TOKEN_NONE;
+ }
+
+ static const char* from_type(enum token_type type) {
+ switch (type) {
+ case TOKEN_AD:
+ return "ad";
+ break;
+ case TOKEN_LDAP:
+ return "ldap";
+ break;
+ case TOKEN_KEYSTONE:
+ return "keystone";
+ break;
+ default:
+ return "none";
+ };
+ }
+
+ token_type type;
+ string id;
+ string key;
+
+ virtual uint32_t version() const { return 1; };
+
+ bool valid() const{
+ return ((type != TOKEN_NONE) &&
+ (! id.empty()) &&
+ (! key.empty()));
+ }
+
+ RGWToken()
+ : type(TOKEN_NONE) {};
+
+ RGWToken(enum token_type _type, const std::string& _id,
+ const std::string& _key)
+ : type(_type), id(_id), key(_key) {};
+
+ RGWToken(const string& json) {
+ JSONParser p;
+ p.parse(json.c_str(), json.length());
+ JSONDecoder::decode_json(RGWToken::type_name, *this, &p);
+ }
+
+ void encode(bufferlist& bl) const {
+ uint32_t ver = version();
+ string typestr{from_type(type)};
+ ENCODE_START(1, 1, bl);
+ encode(type_name, bl);
+ encode(ver, bl);
+ encode(typestr, bl);
+ encode(id, bl);
+ encode(key, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ string name;
+ string typestr;
+ uint32_t version;
+ DECODE_START(1, bl);
+ decode(name, bl);
+ decode(version, bl);
+ decode(typestr, bl);
+ type = to_type(typestr);
+ decode(id, bl);
+ decode(key, bl);
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter* f) const {
+ ::encode_json("version", uint32_t(version()), f);
+ ::encode_json("type", from_type(type), f);
+ ::encode_json("id", id, f);
+ ::encode_json("key", key, f);
+ }
+
+ void encode_json(Formatter* f) {
+ RGWToken& token = *this;
+ f->open_object_section(type_name);
+ ::encode_json(type_name, token, f);
+ f->close_section();
+ }
+
+ void decode_json(JSONObj* obj) {
+ uint32_t version;
+ string type_name;
+ string typestr;
+ JSONDecoder::decode_json("version", version, obj);
+ JSONDecoder::decode_json("type", typestr, obj);
+ type = to_type(typestr);
+ JSONDecoder::decode_json("id", id, obj);
+ JSONDecoder::decode_json("key", key, obj);
+ }
+
+ std::string encode_json_base64(Formatter* f) {
+ encode_json(f);
+ std::ostringstream os;
+ f->flush(os);
+ return to_base64(std::move(os.str()));
+ }
+
+ friend inline ostream& operator<<(ostream& os, const RGWToken& token);
+
+ virtual ~RGWToken() {};
+ };
+ WRITE_CLASS_ENCODER(RGWToken)
+
+ inline ostream& operator<<(ostream& os, const RGWToken& token)
+ {
+ os << "<<RGWToken"
+ << " type=" << RGWToken::from_type(token.type)
+ << " id=" << token.id
+ << " key=" << token.key
+ << ">>";
+ return os;
+ }
+
+} /* namespace rgw */
+
+#endif /* RGW_TOKEN_H */
diff --git a/src/rgw/rgw_tools.cc b/src/rgw/rgw_tools.cc
new file mode 100644
index 00000000..057535e4
--- /dev/null
+++ b/src/rgw/rgw_tools.cc
@@ -0,0 +1,527 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <errno.h>
+
+#include "common/errno.h"
+#include "common/safe_io.h"
+#include "librados/librados_asio.h"
+#include "common/async/yield_context.h"
+
+#include "include/types.h"
+#include "include/stringify.h"
+
+#include "rgw_common.h"
+#include "rgw_rados.h"
+#include "rgw_tools.h"
+#include "rgw_acl_s3.h"
+#include "rgw_op.h"
+#include "rgw_putobj_processor.h"
+#include "rgw_aio_throttle.h"
+#include "rgw_compression.h"
+#include "rgw_zone.h"
+#include "osd/osd_types.h"
+
+#include "services/svc_sys_obj.h"
+#include "services/svc_zone_utils.h"
+
+#define dout_subsys ceph_subsys_rgw
+#define dout_context g_ceph_context
+
+#define READ_CHUNK_LEN (512 * 1024)
+
+static std::map<std::string, std::string>* ext_mime_map;
+
+int rgw_init_ioctx(librados::Rados *rados, const rgw_pool& pool,
+ librados::IoCtx& ioctx, bool create,
+ bool mostly_omap)
+{
+ int r = rados->ioctx_create(pool.name.c_str(), ioctx);
+ if (r == -ENOENT && create) {
+ r = rados->pool_create(pool.name.c_str());
+ if (r == -ERANGE) {
+ dout(0)
+ << __func__
+ << " ERROR: librados::Rados::pool_create returned " << cpp_strerror(-r)
+ << " (this can be due to a pool or placement group misconfiguration, e.g."
+ << " pg_num < pgp_num or mon_max_pg_per_osd exceeded)"
+ << dendl;
+ }
+ if (r < 0 && r != -EEXIST) {
+ return r;
+ }
+
+ r = rados->ioctx_create(pool.name.c_str(), ioctx);
+ if (r < 0) {
+ return r;
+ }
+
+ r = ioctx.application_enable(pg_pool_t::APPLICATION_NAME_RGW, false);
+ if (r < 0 && r != -EOPNOTSUPP) {
+ return r;
+ }
+
+ if (mostly_omap) {
+ // set pg_autoscale_bias
+ bufferlist inbl;
+ float bias = g_conf().get_val<double>("rgw_rados_pool_autoscale_bias");
+ int r = rados->mon_command(
+ "{\"prefix\": \"osd pool set\", \"pool\": \"" +
+ pool.name + "\", \"var\": \"pg_autoscale_bias\": \"" +
+ stringify(bias) + "\"}",
+ inbl, NULL, NULL);
+ if (r < 0) {
+ dout(10) << __func__ << " warning: failed to set pg_autoscale_bias on "
+ << pool.name << dendl;
+ }
+ // set pg_num_min
+ int min = g_conf().get_val<uint64_t>("rgw_rados_pool_pg_num_min");
+ r = rados->mon_command(
+ "{\"prefix\": \"osd pool set\", \"pool\": \"" +
+ pool.name + "\", \"var\": \"pg_num_min\": \"" +
+ stringify(min) + "\"}",
+ inbl, NULL, NULL);
+ if (r < 0) {
+ dout(10) << __func__ << " warning: failed to set pg_num_min on "
+ << pool.name << dendl;
+ }
+ }
+ } else if (r < 0) {
+ return r;
+ }
+ if (!pool.ns.empty()) {
+ ioctx.set_namespace(pool.ns);
+ }
+ return 0;
+}
+
+int rgw_put_system_obj(RGWRados *rgwstore, const rgw_pool& pool, const string& oid, bufferlist& data, bool exclusive,
+ RGWObjVersionTracker *objv_tracker, real_time set_mtime, map<string, bufferlist> *pattrs)
+{
+ map<string,bufferlist> no_attrs;
+ if (!pattrs) {
+ pattrs = &no_attrs;
+ }
+
+ rgw_raw_obj obj(pool, oid);
+
+ auto obj_ctx = rgwstore->svc.sysobj->init_obj_ctx();
+ auto sysobj = obj_ctx.get_obj(obj);
+ int ret = sysobj.wop()
+ .set_objv_tracker(objv_tracker)
+ .set_exclusive(exclusive)
+ .set_mtime(set_mtime)
+ .set_attrs(*pattrs)
+ .write(data);
+
+ if (ret == -ENOENT) {
+ ret = rgwstore->create_pool(pool);
+ if (ret >= 0) {
+ ret = sysobj.wop()
+ .set_objv_tracker(objv_tracker)
+ .set_exclusive(exclusive)
+ .set_mtime(set_mtime)
+ .set_attrs(*pattrs)
+ .write(data);
+ }
+ }
+
+ return ret;
+}
+
+int rgw_get_system_obj(RGWRados *rgwstore, RGWSysObjectCtx& obj_ctx, const rgw_pool& pool, const string& key, bufferlist& bl,
+ RGWObjVersionTracker *objv_tracker, real_time *pmtime, map<string, bufferlist> *pattrs,
+ rgw_cache_entry_info *cache_info, boost::optional<obj_version> refresh_version)
+{
+ bufferlist::iterator iter;
+ int request_len = READ_CHUNK_LEN;
+ rgw_raw_obj obj(pool, key);
+
+ obj_version original_readv;
+ if (objv_tracker && !objv_tracker->read_version.empty()) {
+ original_readv = objv_tracker->read_version;
+ }
+
+ do {
+ auto sysobj = obj_ctx.get_obj(obj);
+ auto rop = sysobj.rop();
+
+ int ret = rop.set_attrs(pattrs)
+ .set_last_mod(pmtime)
+ .set_objv_tracker(objv_tracker)
+ .stat();
+ if (ret < 0)
+ return ret;
+
+ ret = rop.set_cache_info(cache_info)
+ .set_refresh_version(refresh_version)
+ .read(&bl);
+ if (ret == -ECANCELED) {
+ /* raced, restart */
+ if (!original_readv.empty()) {
+ /* we were asked to read a specific obj_version, failed */
+ return ret;
+ }
+ if (objv_tracker) {
+ objv_tracker->read_version.clear();
+ }
+ sysobj.invalidate();
+ continue;
+ }
+ if (ret < 0)
+ return ret;
+
+ if (ret < request_len)
+ break;
+ bl.clear();
+ request_len *= 2;
+ } while (true);
+
+ return 0;
+}
+
+int rgw_delete_system_obj(RGWRados *rgwstore, const rgw_pool& pool, const string& oid,
+ RGWObjVersionTracker *objv_tracker)
+{
+ auto obj_ctx = rgwstore->svc.sysobj->init_obj_ctx();
+ auto sysobj = obj_ctx.get_obj(rgw_raw_obj{pool, oid});
+ rgw_raw_obj obj(pool, oid);
+ return sysobj.wop()
+ .set_objv_tracker(objv_tracker)
+ .remove();
+}
+
+thread_local bool is_asio_thread = false;
+
+int rgw_rados_operate(librados::IoCtx& ioctx, const std::string& oid,
+ librados::ObjectReadOperation *op, bufferlist* pbl,
+ optional_yield y)
+{
+#ifdef HAVE_BOOST_CONTEXT
+ // given a yield_context, call async_operate() to yield the coroutine instead
+ // of blocking
+ if (y) {
+ auto& context = y.get_io_context();
+ auto& yield = y.get_yield_context();
+ boost::system::error_code ec;
+ auto bl = librados::async_operate(context, ioctx, oid, op, 0, yield[ec]);
+ if (pbl) {
+ *pbl = std::move(bl);
+ }
+ return -ec.value();
+ }
+ // work on asio threads should be asynchronous, so warn when they block
+ if (is_asio_thread) {
+ dout(20) << "WARNING: blocking librados call" << dendl;
+ }
+#endif
+ return ioctx.operate(oid, op, nullptr);
+}
+
+int rgw_rados_operate(librados::IoCtx& ioctx, const std::string& oid,
+ librados::ObjectWriteOperation *op, optional_yield y)
+{
+#ifdef HAVE_BOOST_CONTEXT
+ if (y) {
+ auto& context = y.get_io_context();
+ auto& yield = y.get_yield_context();
+ boost::system::error_code ec;
+ librados::async_operate(context, ioctx, oid, op, 0, yield[ec]);
+ return -ec.value();
+ }
+ if (is_asio_thread) {
+ dout(20) << "WARNING: blocking librados call" << dendl;
+ }
+#endif
+ return ioctx.operate(oid, op);
+}
+
+void parse_mime_map_line(const char *start, const char *end)
+{
+ char line[end - start + 1];
+ strncpy(line, start, end - start);
+ line[end - start] = '\0';
+ char *l = line;
+#define DELIMS " \t\n\r"
+
+ while (isspace(*l))
+ l++;
+
+ char *mime = strsep(&l, DELIMS);
+ if (!mime)
+ return;
+
+ char *ext;
+ do {
+ ext = strsep(&l, DELIMS);
+ if (ext && *ext) {
+ (*ext_mime_map)[ext] = mime;
+ }
+ } while (ext);
+}
+
+
+void parse_mime_map(const char *buf)
+{
+ const char *start = buf, *end = buf;
+ while (*end) {
+ while (*end && *end != '\n') {
+ end++;
+ }
+ parse_mime_map_line(start, end);
+ end++;
+ start = end;
+ }
+}
+
+static int ext_mime_map_init(CephContext *cct, const char *ext_map)
+{
+ int fd = open(ext_map, O_RDONLY);
+ char *buf = NULL;
+ int ret;
+ if (fd < 0) {
+ ret = -errno;
+ ldout(cct, 0) << __func__ << " failed to open file=" << ext_map
+ << " : " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ struct stat st;
+ ret = fstat(fd, &st);
+ if (ret < 0) {
+ ret = -errno;
+ ldout(cct, 0) << __func__ << " failed to stat file=" << ext_map
+ << " : " << cpp_strerror(-ret) << dendl;
+ goto done;
+ }
+
+ buf = (char *)malloc(st.st_size + 1);
+ if (!buf) {
+ ret = -ENOMEM;
+ ldout(cct, 0) << __func__ << " failed to allocate buf" << dendl;
+ goto done;
+ }
+
+ ret = safe_read(fd, buf, st.st_size + 1);
+ if (ret != st.st_size) {
+ // huh? file size has changed?
+ ldout(cct, 0) << __func__ << " raced! will retry.." << dendl;
+ free(buf);
+ close(fd);
+ return ext_mime_map_init(cct, ext_map);
+ }
+ buf[st.st_size] = '\0';
+
+ parse_mime_map(buf);
+ ret = 0;
+done:
+ free(buf);
+ close(fd);
+ return ret;
+}
+
+const char *rgw_find_mime_by_ext(string& ext)
+{
+ map<string, string>::iterator iter = ext_mime_map->find(ext);
+ if (iter == ext_mime_map->end())
+ return NULL;
+
+ return iter->second.c_str();
+}
+
+void rgw_filter_attrset(map<string, bufferlist>& unfiltered_attrset, const string& check_prefix,
+ map<string, bufferlist> *attrset)
+{
+ attrset->clear();
+ map<string, bufferlist>::iterator iter;
+ for (iter = unfiltered_attrset.lower_bound(check_prefix);
+ iter != unfiltered_attrset.end(); ++iter) {
+ if (!boost::algorithm::starts_with(iter->first, check_prefix))
+ break;
+ (*attrset)[iter->first] = iter->second;
+ }
+}
+
+RGWDataAccess::RGWDataAccess(RGWRados *_store) : store(_store)
+{
+ sysobj_ctx = std::make_unique<RGWSysObjectCtx>(store->svc.sysobj->init_obj_ctx());
+}
+
+
+int RGWDataAccess::Bucket::finish_init()
+{
+ auto iter = attrs.find(RGW_ATTR_ACL);
+ if (iter == attrs.end()) {
+ return 0;
+ }
+
+ bufferlist::const_iterator bliter = iter->second.begin();
+ try {
+ policy.decode(bliter);
+ } catch (buffer::error& err) {
+ return -EIO;
+ }
+
+ return 0;
+}
+
+int RGWDataAccess::Bucket::init()
+{
+ int ret = sd->store->get_bucket_info(*sd->sysobj_ctx,
+ tenant, name,
+ bucket_info,
+ &mtime,
+ &attrs);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return finish_init();
+}
+
+int RGWDataAccess::Bucket::init(const RGWBucketInfo& _bucket_info,
+ const map<string, bufferlist>& _attrs)
+{
+ bucket_info = _bucket_info;
+ attrs = _attrs;
+
+ return finish_init();
+}
+
+int RGWDataAccess::Bucket::get_object(const rgw_obj_key& key,
+ ObjectRef *obj) {
+ obj->reset(new Object(sd, shared_from_this(), key));
+ return 0;
+}
+
+int RGWDataAccess::Object::put(bufferlist& data,
+ map<string, bufferlist>& attrs)
+{
+ RGWRados *store = sd->store;
+ CephContext *cct = store->ctx();
+
+ string tag;
+ append_rand_alpha(cct, tag, tag, 32);
+
+ RGWBucketInfo& bucket_info = bucket->bucket_info;
+
+ using namespace rgw::putobj;
+ rgw::AioThrottle aio(store->ctx()->_conf->rgw_put_obj_min_window_size);
+
+ RGWObjectCtx obj_ctx(store);
+ rgw_obj obj(bucket_info.bucket, key);
+
+ auto& owner = bucket->policy.get_owner();
+
+ string req_id = store->svc.zone_utils->unique_id(store->get_new_req_id());
+
+ AtomicObjectProcessor processor(&aio, store, bucket_info,
+ nullptr,
+ owner.get_id(),
+ obj_ctx, obj, olh_epoch, req_id);
+
+ int ret = processor.prepare();
+ if (ret < 0)
+ return ret;
+
+ using namespace rgw::putobj;
+
+ DataProcessor *filter = &processor;
+
+ CompressorRef plugin;
+ boost::optional<RGWPutObj_Compress> compressor;
+
+ const auto& compression_type = store->svc.zone->get_zone_params().get_compression_type(bucket_info.placement_rule);
+ if (compression_type != "none") {
+ plugin = Compressor::create(store->ctx(), compression_type);
+ if (!plugin) {
+ ldout(store->ctx(), 1) << "Cannot load plugin for compression type "
+ << compression_type << dendl;
+ } else {
+ compressor.emplace(store->ctx(), plugin, filter);
+ filter = &*compressor;
+ }
+ }
+
+ off_t ofs = 0;
+ auto obj_size = data.length();
+
+ RGWMD5Etag etag_calc;
+
+ do {
+ size_t read_len = std::min(data.length(), (unsigned int)cct->_conf->rgw_max_chunk_size);
+
+ bufferlist bl;
+
+ data.splice(0, read_len, &bl);
+ etag_calc.update(bl);
+
+ ret = filter->process(std::move(bl), ofs);
+ if (ret < 0)
+ return ret;
+
+ ofs += read_len;
+ } while (data.length() > 0);
+
+ ret = filter->process({}, ofs);
+ if (ret < 0) {
+ return ret;
+ }
+ bool has_etag_attr = false;
+ auto iter = attrs.find(RGW_ATTR_ETAG);
+ if (iter != attrs.end()) {
+ bufferlist& bl = iter->second;
+ etag = bl.to_str();
+ has_etag_attr = true;
+ }
+
+ if (!aclbl) {
+ RGWAccessControlPolicy_S3 policy(cct);
+
+ policy.create_canned(bucket->policy.get_owner(), bucket->policy.get_owner(), string()); /* default private policy */
+
+ policy.encode(aclbl.emplace());
+ }
+
+ if (etag.empty()) {
+ etag_calc.finish(&etag);
+ }
+
+ if (!has_etag_attr) {
+ bufferlist etagbl;
+ etagbl.append(etag);
+ attrs[RGW_ATTR_ETAG] = etagbl;
+ }
+ attrs[RGW_ATTR_ACL] = *aclbl;
+
+ string *puser_data = nullptr;
+ if (user_data) {
+ puser_data = &(*user_data);
+ }
+
+ return processor.complete(obj_size, etag,
+ &mtime, mtime,
+ attrs, delete_at,
+ nullptr, nullptr,
+ puser_data,
+ nullptr, nullptr);
+}
+
+void RGWDataAccess::Object::set_policy(const RGWAccessControlPolicy& policy)
+{
+ policy.encode(aclbl.emplace());
+}
+
+int rgw_tools_init(CephContext *cct)
+{
+ ext_mime_map = new std::map<std::string, std::string>;
+ ext_mime_map_init(cct, cct->_conf->rgw_mime_types_file.c_str());
+ // ignore errors; missing mime.types is not fatal
+ return 0;
+}
+
+void rgw_tools_cleanup()
+{
+ delete ext_mime_map;
+ ext_mime_map = nullptr;
+}
diff --git a/src/rgw/rgw_tools.h b/src/rgw/rgw_tools.h
new file mode 100644
index 00000000..0e8b1621
--- /dev/null
+++ b/src/rgw/rgw_tools.h
@@ -0,0 +1,202 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_TOOLS_H
+#define CEPH_RGW_TOOLS_H
+
+#include <string>
+
+#include "include/types.h"
+#include "common/ceph_time.h"
+#include "rgw_common.h"
+
+class RGWRados;
+class RGWSysObjectCtx;
+struct RGWObjVersionTracker;
+class optional_yield;
+
+struct obj_version;
+
+int rgw_init_ioctx(librados::Rados *rados, const rgw_pool& pool,
+ librados::IoCtx& ioctx,
+ bool create = false,
+ bool mostly_omap = false);
+
+int rgw_put_system_obj(RGWRados *rgwstore, const rgw_pool& pool, const string& oid, bufferlist& data, bool exclusive,
+ RGWObjVersionTracker *objv_tracker, real_time set_mtime, map<string, bufferlist> *pattrs = NULL);
+int rgw_get_system_obj(RGWRados *rgwstore, RGWSysObjectCtx& obj_ctx, const rgw_pool& pool, const string& key, bufferlist& bl,
+ RGWObjVersionTracker *objv_tracker, real_time *pmtime, map<string, bufferlist> *pattrs = NULL,
+ rgw_cache_entry_info *cache_info = NULL,
+ boost::optional<obj_version> refresh_version = boost::none);
+int rgw_delete_system_obj(RGWRados *rgwstore, const rgw_pool& pool, const string& oid,
+ RGWObjVersionTracker *objv_tracker);
+
+const char *rgw_find_mime_by_ext(string& ext);
+
+void rgw_filter_attrset(map<string, bufferlist>& unfiltered_attrset, const string& check_prefix,
+ map<string, bufferlist> *attrset);
+
+/// indicates whether the current thread is in boost::asio::io_context::run(),
+/// used to log warnings if synchronous librados calls are made
+extern thread_local bool is_asio_thread;
+
+/// perform the rados operation, using the yield context when given
+int rgw_rados_operate(librados::IoCtx& ioctx, const std::string& oid,
+ librados::ObjectReadOperation *op, bufferlist* pbl,
+ optional_yield y);
+int rgw_rados_operate(librados::IoCtx& ioctx, const std::string& oid,
+ librados::ObjectWriteOperation *op, optional_yield y);
+
+int rgw_tools_init(CephContext *cct);
+void rgw_tools_cleanup();
+
+template<class H, size_t S>
+class RGWEtag
+{
+ H hash;
+
+public:
+ RGWEtag() {}
+
+ void update(const char *buf, size_t len) {
+ hash.Update((const unsigned char *)buf, len);
+ }
+
+ void update(bufferlist& bl) {
+ if (bl.length() > 0) {
+ update(bl.c_str(), bl.length());
+ }
+ }
+
+ void update(const string& s) {
+ if (!s.empty()) {
+ update(s.c_str(), s.size());
+ }
+ }
+ void finish(string *etag) {
+ char etag_buf[S];
+ char etag_buf_str[S * 2 + 16];
+
+ hash.Final((unsigned char *)etag_buf);
+ buf_to_hex((const unsigned char *)etag_buf, S,
+ etag_buf_str);
+
+ *etag = etag_buf_str;
+ }
+};
+
+using RGWMD5Etag = RGWEtag<MD5, CEPH_CRYPTO_MD5_DIGESTSIZE>;
+
+class RGWDataAccess
+{
+ RGWRados *store;
+ std::unique_ptr<RGWSysObjectCtx> sysobj_ctx;
+
+public:
+ RGWDataAccess(RGWRados *_store);
+
+ class Object;
+ class Bucket;
+
+ using BucketRef = std::shared_ptr<Bucket>;
+ using ObjectRef = std::shared_ptr<Object>;
+
+ class Bucket : public enable_shared_from_this<Bucket> {
+ friend class RGWDataAccess;
+ friend class Object;
+
+ RGWDataAccess *sd{nullptr};
+ RGWBucketInfo bucket_info;
+ string tenant;
+ string name;
+ string bucket_id;
+ ceph::real_time mtime;
+ map<std::string, bufferlist> attrs;
+
+ RGWAccessControlPolicy policy;
+ int finish_init();
+
+ Bucket(RGWDataAccess *_sd,
+ const string& _tenant,
+ const string& _name,
+ const string& _bucket_id) : sd(_sd),
+ tenant(_tenant),
+ name(_name),
+ bucket_id(_bucket_id) {}
+ Bucket(RGWDataAccess *_sd) : sd(_sd) {}
+ int init();
+ int init(const RGWBucketInfo& _bucket_info, const map<string, bufferlist>& _attrs);
+ public:
+ int get_object(const rgw_obj_key& key,
+ ObjectRef *obj);
+
+ };
+
+
+ class Object {
+ RGWDataAccess *sd{nullptr};
+ BucketRef bucket;
+ rgw_obj_key key;
+
+ ceph::real_time mtime;
+ string etag;
+ std::optional<uint64_t> olh_epoch;
+ ceph::real_time delete_at;
+ std::optional<string> user_data;
+
+ std::optional<bufferlist> aclbl;
+
+ Object(RGWDataAccess *_sd,
+ BucketRef&& _bucket,
+ const rgw_obj_key& _key) : sd(_sd),
+ bucket(_bucket),
+ key(_key) {}
+ public:
+ int put(bufferlist& data, map<string, bufferlist>& attrs); /* might modify attrs */
+
+ void set_mtime(const ceph::real_time& _mtime) {
+ mtime = _mtime;
+ }
+
+ void set_etag(const string& _etag) {
+ etag = _etag;
+ }
+
+ void set_olh_epoch(uint64_t epoch) {
+ olh_epoch = epoch;
+ }
+
+ void set_delete_at(ceph::real_time _delete_at) {
+ delete_at = _delete_at;
+ }
+
+ void set_user_data(const string& _user_data) {
+ user_data = _user_data;
+ }
+
+ void set_policy(const RGWAccessControlPolicy& policy);
+
+ friend class Bucket;
+ };
+
+ int get_bucket(const string& tenant,
+ const string name,
+ const string bucket_id,
+ BucketRef *bucket) {
+ bucket->reset(new Bucket(this, tenant, name, bucket_id));
+ return (*bucket)->init();
+ }
+
+ int get_bucket(const RGWBucketInfo& bucket_info,
+ const map<string, bufferlist>& attrs,
+ BucketRef *bucket) {
+ bucket->reset(new Bucket(this));
+ return (*bucket)->init(bucket_info, attrs);
+ }
+ friend class Bucket;
+ friend class Object;
+};
+
+using RGWDataAccessRef = std::shared_ptr<RGWDataAccess>;
+
+#endif
diff --git a/src/rgw/rgw_torrent.cc b/src/rgw/rgw_torrent.cc
new file mode 100644
index 00000000..3fca9ba9
--- /dev/null
+++ b/src/rgw/rgw_torrent.cc
@@ -0,0 +1,266 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <errno.h>
+#include <stdlib.h>
+
+#include <sstream>
+
+#include "rgw_torrent.h"
+#include "include/str_list.h"
+#include "include/rados/librados.hpp"
+
+#include "services/svc_sys_obj.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+using ceph::crypto::MD5;
+using namespace librados;
+using namespace boost;
+using ceph::crypto::SHA1;
+
+seed::seed()
+{
+ seed::info.piece_length = 0;
+ seed::info.len = 0;
+ sha_len = 0;
+ is_torrent = false;
+}
+
+seed::~seed()
+{
+ seed::info.sha1_bl.clear();
+ bl.clear();
+ s = NULL;
+ store = NULL;
+}
+
+void seed::init(struct req_state *p_req, RGWRados *p_store)
+{
+ s = p_req;
+ store = p_store;
+}
+
+int seed::get_torrent_file(RGWRados::Object::Read &read_op,
+ uint64_t &total_len,
+ ceph::bufferlist &bl_data,
+ rgw_obj &obj)
+{
+ /* add other field if config is set */
+ dencode.bencode_dict(bl);
+ set_announce();
+ if (!comment.empty())
+ {
+ dencode.bencode(COMMENT, comment, bl);
+ }
+ if (!create_by.empty())
+ {
+ dencode.bencode(CREATED_BY, create_by, bl);
+ }
+ if (!encoding.empty())
+ {
+ dencode.bencode(ENCODING, encoding, bl);
+ }
+
+ string oid, key;
+ get_obj_bucket_and_oid_loc(obj, oid, key);
+ ldout(s->cct, 20) << "NOTICE: head obj oid= " << oid << dendl;
+
+ const set<string> obj_key{RGW_OBJ_TORRENT};
+ map<string, bufferlist> m;
+ const int r = read_op.state.cur_ioctx->omap_get_vals_by_keys(oid, obj_key, &m);
+ if (r < 0) {
+ ldout(s->cct, 0) << "ERROR: omap_get_vals_by_keys failed: " << r << dendl;
+ return r;
+ }
+ if (m.size() != 1) {
+ ldout(s->cct, 0) << "ERROR: omap key " RGW_OBJ_TORRENT " not found" << dendl;
+ return -EINVAL;
+ }
+ bl.append(std::move(m.begin()->second));
+ dencode.bencode_end(bl);
+
+ bl_data = bl;
+ total_len = bl.length();
+ return 0;
+}
+
+bool seed::get_flag()
+{
+ return is_torrent;
+}
+
+void seed::update(bufferlist &bl)
+{
+ if (!is_torrent)
+ {
+ return;
+ }
+ info.len += bl.length();
+ sha1(&h, bl, bl.length());
+}
+
+int seed::complete()
+{
+ uint64_t remain = info.len%info.piece_length;
+ uint8_t remain_len = ((remain > 0)? 1 : 0);
+ sha_len = (info.len/info.piece_length + remain_len)*CEPH_CRYPTO_SHA1_DIGESTSIZE;
+
+ int ret = 0;
+ /* produce torrent data */
+ do_encode();
+
+ /* save torrent data into OMAP */
+ ret = save_torrent_file();
+ if (0 != ret)
+ {
+ ldout(s->cct, 0) << "ERROR: failed to save_torrent_file() ret= "<< ret << dendl;
+ return ret;
+ }
+
+ return 0;
+}
+
+off_t seed::get_data_len()
+{
+ return info.len;
+}
+
+void seed::set_create_date(ceph::real_time& value)
+{
+ utime_t date = ceph::real_clock::to_timespec(value);
+ create_date = date.sec();
+}
+
+void seed::set_info_pieces(char *buff)
+{
+ info.sha1_bl.append(buff, CEPH_CRYPTO_SHA1_DIGESTSIZE);
+}
+
+void seed::set_info_name(const string& value)
+{
+ info.name = value;
+}
+
+void seed::sha1(SHA1 *h, bufferlist &bl, off_t bl_len)
+{
+ off_t num = bl_len/info.piece_length;
+ off_t remain = 0;
+ remain = bl_len%info.piece_length;
+
+ char *pstr = bl.c_str();
+ char sha[25];
+
+ /* get sha1 */
+ for (off_t i = 0; i < num; i++)
+ {
+ // FIPS zeroization audit 20191116: this memset is not intended to
+ // wipe out a secret after use.
+ memset(sha, 0x00, sizeof(sha));
+ h->Update((unsigned char *)pstr, info.piece_length);
+ h->Final((unsigned char *)sha);
+ set_info_pieces(sha);
+ pstr += info.piece_length;
+ }
+
+ /* process remain */
+ if (0 != remain)
+ {
+ // FIPS zeroization audit 20191116: this memset is not intended to
+ // wipe out a secret after use.
+ memset(sha, 0x00, sizeof(sha));
+ h->Update((unsigned char *)pstr, remain);
+ h->Final((unsigned char *)sha);
+ set_info_pieces(sha);
+ }
+ ::ceph::crypto::zeroize_for_security(sha, sizeof(sha));
+}
+
+int seed::get_params()
+{
+ is_torrent = true;
+ info.piece_length = g_conf()->rgw_torrent_sha_unit;
+ create_by = g_conf()->rgw_torrent_createby;
+ encoding = g_conf()->rgw_torrent_encoding;
+ origin = g_conf()->rgw_torrent_origin;
+ comment = g_conf()->rgw_torrent_comment;
+ announce = g_conf()->rgw_torrent_tracker;
+
+ /* tracker and tracker list is empty, set announce to origin */
+ if (announce.empty() && !origin.empty())
+ {
+ announce = origin;
+ }
+
+ return 0;
+}
+
+void seed::set_announce()
+{
+ list<string> announce_list; // used to get announce list from conf
+ get_str_list(announce, ",", announce_list);
+
+ if (announce_list.empty())
+ {
+ ldout(s->cct, 5) << "NOTICE: announce_list is empty " << dendl;
+ return;
+ }
+
+ list<string>::iterator iter = announce_list.begin();
+ dencode.bencode_key(ANNOUNCE, bl);
+ dencode.bencode_key((*iter), bl);
+
+ dencode.bencode_key(ANNOUNCE_LIST, bl);
+ dencode.bencode_list(bl);
+ for (; iter != announce_list.end(); ++iter)
+ {
+ dencode.bencode_list(bl);
+ dencode.bencode_key((*iter), bl);
+ dencode.bencode_end(bl);
+ }
+ dencode.bencode_end(bl);
+}
+
+void seed::do_encode()
+{
+ /*Only encode create_date and sha1 info*/
+ /*Other field will be added if confi is set when run get torrent*/
+ dencode.bencode(CREATION_DATE, create_date, bl);
+
+ dencode.bencode_key(INFO_PIECES, bl);
+ dencode.bencode_dict(bl);
+ dencode.bencode(LENGTH, info.len, bl);
+ dencode.bencode(NAME, info.name, bl);
+ dencode.bencode(PIECE_LENGTH, info.piece_length, bl);
+
+ char info_sha[100] = { 0 };
+ sprintf(info_sha, "%" PRIu64, sha_len);
+ string sha_len_str = info_sha;
+ dencode.bencode_key(PIECES, bl);
+ bl.append(sha_len_str.c_str(), sha_len_str.length());
+ bl.append(':');
+ bl.append(info.sha1_bl.c_str(), sha_len);
+ dencode.bencode_end(bl);
+}
+
+int seed::save_torrent_file()
+{
+ int op_ret = 0;
+ string key = RGW_OBJ_TORRENT;
+ rgw_obj obj(s->bucket, s->object.name);
+
+ rgw_raw_obj raw_obj;
+ store->obj_to_raw(s->bucket_info.placement_rule, obj, &raw_obj);
+
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+ auto sysobj = obj_ctx.get_obj(raw_obj);
+
+ op_ret = sysobj.omap().set(key, bl);
+ if (op_ret < 0)
+ {
+ ldout(s->cct, 0) << "ERROR: failed to omap_set() op_ret = " << op_ret << dendl;
+ return op_ret;
+ }
+
+ return op_ret;
+}
diff --git a/src/rgw/rgw_torrent.h b/src/rgw/rgw_torrent.h
new file mode 100644
index 00000000..c135323d
--- /dev/null
+++ b/src/rgw/rgw_torrent.h
@@ -0,0 +1,142 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_TORRENT_H
+#define CEPH_RGW_TORRENT_H
+
+#include <string>
+#include <list>
+#include <map>
+#include <set>
+
+#include "common/ceph_time.h"
+
+#include "rgw_rados.h"
+#include "rgw_common.h"
+
+using ceph::crypto::SHA1;
+
+struct req_state;
+
+#define RGW_OBJ_TORRENT "rgw.torrent"
+
+#define ANNOUNCE "announce"
+#define ANNOUNCE_LIST "announce-list"
+#define COMMENT "comment"
+#define CREATED_BY "created by"
+#define CREATION_DATE "creation date"
+#define ENCODING "encoding"
+#define LENGTH "length"
+#define NAME "name"
+#define PIECE_LENGTH "piece length"
+#define PIECES "pieces"
+#define INFO_PIECES "info"
+#define GET_TORRENT "torrent"
+
+class TorrentBencode
+{
+public:
+ TorrentBencode() {}
+ ~TorrentBencode() {}
+
+ //control characters
+ void bencode_dict(bufferlist& bl) { bl.append('d'); }
+ void bencode_list(bufferlist& bl) { bl.append('l'); }
+ void bencode_end(bufferlist& bl) { bl.append('e'); }
+
+ //single values
+ void bencode(int value, bufferlist& bl)
+ {
+ bl.append('i');
+ char info[100] = { 0 };
+ sprintf(info, "%d", value);
+ bl.append(info, strlen(info));
+ bencode_end(bl);
+ }
+
+ //single values
+ void bencode(const std::string& str, bufferlist& bl)
+ {
+ bencode_key(str, bl);
+ }
+
+ //dictionary elements
+ void bencode(const std::string& key, int value, bufferlist& bl)
+ {
+ bencode_key(key, bl);
+ bencode(value, bl);
+ }
+
+ //dictionary elements
+ void bencode(const std::string& key, const std::string& value, bufferlist& bl)
+ {
+ bencode_key(key, bl);
+ bencode(value, bl);
+ }
+
+ //key len
+ void bencode_key(const std::string& key, bufferlist& bl)
+ {
+ int len = key.length();
+ char info[100] = { 0 };
+ sprintf(info, "%d:", len);
+ bl.append(info, strlen(info));
+ bl.append(key.c_str(), len);
+ }
+};
+
+/* torrent file struct */
+class seed
+{
+private:
+ struct
+ {
+ int piece_length; // each piece length
+ bufferlist sha1_bl; // save sha1
+ string name; // file name
+ off_t len; // file total bytes
+ }info;
+
+ string announce; // tracker
+ string origin; // origin
+ time_t create_date{0}; // time of the file created
+ string comment; // comment
+ string create_by; // app name and version
+ string encoding; // if encode use gbk rather than gtf-8 use this field
+ uint64_t sha_len; // sha1 length
+ bool is_torrent; // flag
+ bufferlist bl; // bufflist ready to send
+
+ struct req_state *s{nullptr};
+ RGWRados *store{nullptr};
+ SHA1 h;
+
+ TorrentBencode dencode;
+public:
+ seed();
+ ~seed();
+
+ int get_params();
+ void init(struct req_state *p_req, RGWRados *p_store);
+ int get_torrent_file(RGWRados::Object::Read &read_op,
+ uint64_t &total_len,
+ ceph::bufferlist &bl_data,
+ rgw_obj &obj);
+
+ off_t get_data_len();
+ bool get_flag();
+
+ void set_create_date(ceph::real_time& value);
+ void set_info_name(const string& value);
+ void update(bufferlist &bl);
+ int complete();
+
+private:
+ void do_encode ();
+ void set_announce();
+ void set_exist(bool exist);
+ void set_info_pieces(char *buff);
+ void sha1(SHA1 *h, bufferlist &bl, off_t bl_len);
+ int save_torrent_file();
+};
+#endif /* CEPH_RGW_TORRENT_H */
diff --git a/src/rgw/rgw_url.cc b/src/rgw/rgw_url.cc
new file mode 100644
index 00000000..58f7b549
--- /dev/null
+++ b/src/rgw/rgw_url.cc
@@ -0,0 +1,49 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <string>
+#include <regex>
+
+namespace rgw {
+
+namespace {
+ const auto USER_GROUP_IDX = 3;
+ const auto PASSWORD_GROUP_IDX = 4;
+ const auto HOST_GROUP_IDX = 5;
+
+ const std::string schema_re = "([[:alpha:]]+:\\/\\/)";
+ const std::string user_pass_re = "(([^:\\s]+):([^@\\s]+)@)?";
+ const std::string host_port_re = "([[:alnum:].:-]+)";
+ const std::string path_re = "(/[[:print:]]+)?";
+}
+
+bool parse_url_authority(const std::string& url, std::string& host, std::string& user, std::string& password) {
+ const std::string re = schema_re + user_pass_re + host_port_re + path_re;
+ const std::regex url_regex(re, std::regex::icase);
+ std::smatch url_match_result;
+
+ if (std::regex_match(url, url_match_result, url_regex)) {
+ host = url_match_result[HOST_GROUP_IDX];
+ user = url_match_result[USER_GROUP_IDX];
+ password = url_match_result[PASSWORD_GROUP_IDX];
+ return true;
+ }
+
+ return false;
+}
+
+bool parse_url_userinfo(const std::string& url, std::string& user, std::string& password) {
+ const std::string re = schema_re + user_pass_re + host_port_re + path_re;
+ const std::regex url_regex(re);
+ std::smatch url_match_result;
+
+ if (std::regex_match(url, url_match_result, url_regex)) {
+ user = url_match_result[USER_GROUP_IDX];
+ password = url_match_result[PASSWORD_GROUP_IDX];
+ return true;
+ }
+
+ return false;
+}
+}
+
diff --git a/src/rgw/rgw_url.h b/src/rgw/rgw_url.h
new file mode 100644
index 00000000..089401a4
--- /dev/null
+++ b/src/rgw/rgw_url.h
@@ -0,0 +1,12 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <string>
+namespace rgw {
+// parse a URL of the form: http|https|amqp|amqps|kafka://[user:password@]<host>[:port]
+bool parse_url_authority(const std::string& url, std::string& host, std::string& user, std::string& password);
+bool parse_url_userinfo(const std::string& url, std::string& user, std::string& password);
+}
+
diff --git a/src/rgw/rgw_usage.cc b/src/rgw/rgw_usage.cc
new file mode 100644
index 00000000..a82bc66c
--- /dev/null
+++ b/src/rgw/rgw_usage.cc
@@ -0,0 +1,151 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <string>
+#include <map>
+
+#include "rgw_rados.h"
+#include "rgw_usage.h"
+#include "rgw_formats.h"
+
+
+
+static void dump_usage_categories_info(Formatter *formatter, const rgw_usage_log_entry& entry, map<string, bool> *categories)
+{
+ formatter->open_array_section("categories");
+ map<string, rgw_usage_data>::const_iterator uiter;
+ for (uiter = entry.usage_map.begin(); uiter != entry.usage_map.end(); ++uiter) {
+ if (categories && !categories->empty() && !categories->count(uiter->first))
+ continue;
+ const rgw_usage_data& usage = uiter->second;
+ formatter->open_object_section("entry");
+ formatter->dump_string("category", uiter->first);
+ formatter->dump_int("bytes_sent", usage.bytes_sent);
+ formatter->dump_int("bytes_received", usage.bytes_received);
+ formatter->dump_int("ops", usage.ops);
+ formatter->dump_int("successful_ops", usage.successful_ops);
+ formatter->close_section(); // entry
+ }
+ formatter->close_section(); // categories
+}
+
+int RGWUsage::show(RGWRados *store, const rgw_user& uid, const string& bucket_name, uint64_t start_epoch,
+ uint64_t end_epoch, bool show_log_entries, bool show_log_sum, map<string, bool> *categories,
+ RGWFormatterFlusher& flusher)
+{
+ uint32_t max_entries = 1000;
+
+ bool is_truncated = true;
+
+ RGWUsageIter usage_iter;
+ Formatter *formatter = flusher.get_formatter();
+
+ map<rgw_user_bucket, rgw_usage_log_entry> usage;
+
+ flusher.start(0);
+
+ formatter->open_object_section("usage");
+ if (show_log_entries) {
+ formatter->open_array_section("entries");
+ }
+ string last_owner;
+ bool user_section_open = false;
+ map<string, rgw_usage_log_entry> summary_map;
+ while (is_truncated) {
+ int ret = store->read_usage(uid, bucket_name, start_epoch, end_epoch, max_entries,
+ &is_truncated, usage_iter, usage);
+
+ if (ret == -ENOENT) {
+ ret = 0;
+ is_truncated = false;
+ }
+
+ if (ret < 0) {
+ return ret;
+ }
+
+ map<rgw_user_bucket, rgw_usage_log_entry>::iterator iter;
+ for (iter = usage.begin(); iter != usage.end(); ++iter) {
+ const rgw_user_bucket& ub = iter->first;
+ const rgw_usage_log_entry& entry = iter->second;
+
+ if (show_log_entries) {
+ if (ub.user.compare(last_owner) != 0) {
+ if (user_section_open) {
+ formatter->close_section();
+ formatter->close_section();
+ }
+ formatter->open_object_section("user");
+ formatter->dump_string("user", ub.user);
+ formatter->open_array_section("buckets");
+ user_section_open = true;
+ last_owner = ub.user;
+ }
+ formatter->open_object_section("bucket");
+ formatter->dump_string("bucket", ub.bucket);
+ utime_t ut(entry.epoch, 0);
+ ut.gmtime(formatter->dump_stream("time"));
+ formatter->dump_int("epoch", entry.epoch);
+ string owner = entry.owner.to_str();
+ string payer = entry.payer.to_str();
+ formatter->dump_string("owner", owner);
+ if (!payer.empty() && payer != owner) {
+ formatter->dump_string("payer", payer);
+ }
+ dump_usage_categories_info(formatter, entry, categories);
+ formatter->close_section(); // bucket
+ flusher.flush();
+ }
+
+ summary_map[ub.user].aggregate(entry, categories);
+ }
+ }
+ if (show_log_entries) {
+ if (user_section_open) {
+ formatter->close_section(); // buckets
+ formatter->close_section(); //user
+ }
+ formatter->close_section(); // entries
+ }
+
+ if (show_log_sum) {
+ formatter->open_array_section("summary");
+ map<string, rgw_usage_log_entry>::iterator siter;
+ for (siter = summary_map.begin(); siter != summary_map.end(); ++siter) {
+ const rgw_usage_log_entry& entry = siter->second;
+ formatter->open_object_section("user");
+ formatter->dump_string("user", siter->first);
+ dump_usage_categories_info(formatter, entry, categories);
+ rgw_usage_data total_usage;
+ entry.sum(total_usage, *categories);
+ formatter->open_object_section("total");
+ formatter->dump_int("bytes_sent", total_usage.bytes_sent);
+ formatter->dump_int("bytes_received", total_usage.bytes_received);
+ formatter->dump_int("ops", total_usage.ops);
+ formatter->dump_int("successful_ops", total_usage.successful_ops);
+ formatter->close_section(); // total
+
+ formatter->close_section(); // user
+
+ flusher.flush();
+ }
+
+ formatter->close_section(); // summary
+ }
+
+ formatter->close_section(); // usage
+ flusher.flush();
+
+ return 0;
+}
+
+int RGWUsage::trim(RGWRados *store, const rgw_user& uid, const string& bucket_name, uint64_t start_epoch,
+ uint64_t end_epoch)
+{
+ return store->trim_usage(uid, bucket_name, start_epoch, end_epoch);
+}
+
+int RGWUsage::clear(RGWRados *store)
+{
+ return store->clear_usage();
+}
diff --git a/src/rgw/rgw_usage.h b/src/rgw/rgw_usage.h
new file mode 100644
index 00000000..da39e596
--- /dev/null
+++ b/src/rgw/rgw_usage.h
@@ -0,0 +1,30 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_USAGE_H
+#define CEPH_RGW_USAGE_H
+
+#include <string>
+#include <map>
+
+#include "common/Formatter.h"
+#include "rgw_formats.h"
+
+class RGWRados;
+
+
+class RGWUsage
+{
+public:
+ static int show(RGWRados *store, const rgw_user& uid, const string& bucket_name, uint64_t start_epoch,
+ uint64_t end_epoch, bool show_log_entries, bool show_log_sum,
+ std::map<std::string, bool> *categories, RGWFormatterFlusher& flusher);
+
+ static int trim(RGWRados *store, const rgw_user& uid, const string& bucket_name, uint64_t start_epoch,
+ uint64_t end_epoch);
+
+ static int clear(RGWRados *store);
+};
+
+
+#endif
diff --git a/src/rgw/rgw_user.cc b/src/rgw/rgw_user.cc
new file mode 100644
index 00000000..d70ec879
--- /dev/null
+++ b/src/rgw/rgw_user.cc
@@ -0,0 +1,2958 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <errno.h>
+
+#include <string>
+#include <map>
+#include <boost/algorithm/string.hpp>
+
+#include "common/errno.h"
+#include "common/Formatter.h"
+#include "common/ceph_json.h"
+#include "common/RWLock.h"
+#include "rgw_rados.h"
+#include "rgw_zone.h"
+#include "rgw_acl.h"
+
+#include "include/types.h"
+#include "rgw_user.h"
+#include "rgw_string.h"
+
+// until everything is moved from rgw_common
+#include "rgw_common.h"
+
+#include "rgw_bucket.h"
+#include "rgw_quota.h"
+
+#include "services/svc_zone.h"
+#include "services/svc_sys_obj.h"
+#include "services/svc_sys_obj_cache.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+
+
+static RGWMetadataHandler *user_meta_handler = NULL;
+extern void op_type_to_str(uint32_t mask, char *buf, int len);
+
+/**
+ * Get the anonymous (ie, unauthenticated) user info.
+ */
+void rgw_get_anon_user(RGWUserInfo& info)
+{
+ info.user_id = RGW_USER_ANON_ID;
+ info.display_name.clear();
+ info.access_keys.clear();
+}
+
+int rgw_user_sync_all_stats(RGWRados *store, const rgw_user& user_id)
+{
+ CephContext *cct = store->ctx();
+ size_t max_entries = cct->_conf->rgw_list_buckets_max_chunk;
+ bool is_truncated = false;
+ string marker;
+ int ret;
+ RGWSysObjectCtx obj_ctx = store->svc.sysobj->init_obj_ctx();
+
+ do {
+ RGWUserBuckets user_buckets;
+ ret = rgw_read_user_buckets(store, user_id, user_buckets, marker,
+ string(), max_entries, false, &is_truncated);
+ if (ret < 0) {
+ ldout(cct, 0) << "failed to read user buckets: ret=" << ret << dendl;
+ return ret;
+ }
+ map<string, RGWBucketEnt>& buckets = user_buckets.get_buckets();
+ for (map<string, RGWBucketEnt>::iterator i = buckets.begin();
+ i != buckets.end();
+ ++i) {
+ marker = i->first;
+
+ RGWBucketEnt& bucket_ent = i->second;
+ RGWBucketInfo bucket_info;
+
+ ret = store->get_bucket_info(obj_ctx, user_id.tenant, bucket_ent.bucket.name,
+ bucket_info, nullptr, nullptr);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: could not read bucket info: bucket=" << bucket_ent.bucket << " ret=" << ret << dendl;
+ continue;
+ }
+ ret = rgw_bucket_sync_user_stats(store, user_id, bucket_info);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: could not sync bucket stats: ret=" << ret << dendl;
+ return ret;
+ }
+ RGWQuotaInfo bucket_quota;
+ ret = store->check_bucket_shards(bucket_info, bucket_info.bucket, bucket_quota);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR in check_bucket_shards: " << cpp_strerror(-ret)<< dendl;
+ }
+ }
+ } while (is_truncated);
+
+ ret = store->complete_sync_user_stats(user_id);
+ if (ret < 0) {
+ cerr << "ERROR: failed to complete syncing user stats: ret=" << ret << std::endl;
+ return ret;
+ }
+
+ return 0;
+}
+
+int rgw_user_get_all_buckets_stats(RGWRados *store, const rgw_user& user_id, map<string, cls_user_bucket_entry>&buckets_usage_map)
+{
+ CephContext *cct = store->ctx();
+ size_t max_entries = cct->_conf->rgw_list_buckets_max_chunk;
+ bool done;
+ bool is_truncated;
+ string marker;
+ int ret;
+
+ do {
+ RGWUserBuckets user_buckets;
+ ret = rgw_read_user_buckets(store, user_id, user_buckets, marker,
+ string(), max_entries, false, &is_truncated);
+ if (ret < 0) {
+ ldout(cct, 0) << "failed to read user buckets: ret=" << ret << dendl;
+ return ret;
+ }
+ map<string, RGWBucketEnt>& buckets = user_buckets.get_buckets();
+ for (const auto& i : buckets) {
+ marker = i.first;
+
+ const RGWBucketEnt& bucket_ent = i.second;
+ cls_user_bucket_entry entry;
+ ret = store->cls_user_get_bucket_stats(bucket_ent.bucket, entry);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: could not get bucket stats: ret=" << ret << dendl;
+ return ret;
+ }
+ buckets_usage_map.emplace(bucket_ent.bucket.name, entry);
+ }
+ done = (buckets.size() < max_entries);
+ } while (!done);
+
+ return 0;
+}
+
+/**
+ * Save the given user information to storage.
+ * Returns: 0 on success, -ERR# on failure.
+ */
+int rgw_store_user_info(RGWRados *store,
+ RGWUserInfo& info,
+ RGWUserInfo *old_info,
+ RGWObjVersionTracker *objv_tracker,
+ real_time mtime,
+ bool exclusive,
+ map<string, bufferlist> *pattrs)
+{
+ int ret;
+ RGWObjVersionTracker ot;
+
+ if (objv_tracker) {
+ ot = *objv_tracker;
+ }
+
+ if (ot.write_version.tag.empty()) {
+ if (ot.read_version.tag.empty()) {
+ ot.generate_new_write_ver(store->ctx());
+ } else {
+ ot.write_version = ot.read_version;
+ ot.write_version.ver++;
+ }
+ }
+
+ map<string, RGWAccessKey>::iterator iter;
+ for (iter = info.swift_keys.begin(); iter != info.swift_keys.end(); ++iter) {
+ if (old_info && old_info->swift_keys.count(iter->first) != 0)
+ continue;
+ RGWAccessKey& k = iter->second;
+ /* check if swift mapping exists */
+ RGWUserInfo inf;
+ int r = rgw_get_user_info_by_swift(store, k.id, inf);
+ if (r >= 0 && inf.user_id.compare(info.user_id) != 0) {
+ ldout(store->ctx(), 0) << "WARNING: can't store user info, swift id (" << k.id
+ << ") already mapped to another user (" << info.user_id << ")" << dendl;
+ return -EEXIST;
+ }
+ }
+
+ if (!info.access_keys.empty()) {
+ /* check if access keys already exist */
+ RGWUserInfo inf;
+ map<string, RGWAccessKey>::iterator iter = info.access_keys.begin();
+ for (; iter != info.access_keys.end(); ++iter) {
+ RGWAccessKey& k = iter->second;
+ if (old_info && old_info->access_keys.count(iter->first) != 0)
+ continue;
+ int r = rgw_get_user_info_by_access_key(store, k.id, inf);
+ if (r >= 0 && inf.user_id.compare(info.user_id) != 0) {
+ ldout(store->ctx(), 0) << "WARNING: can't store user info, access key already mapped to another user" << dendl;
+ return -EEXIST;
+ }
+ }
+ }
+
+ RGWUID ui;
+ ui.user_id = info.user_id;
+
+ bufferlist link_bl;
+ encode(ui, link_bl);
+
+ bufferlist data_bl;
+ encode(ui, data_bl);
+ encode(info, data_bl);
+
+ string key;
+ info.user_id.to_str(key);
+
+ ret = store->meta_mgr->put_entry(user_meta_handler, key, data_bl, exclusive, &ot, mtime, pattrs);
+ if (ret < 0)
+ return ret;
+
+ if (!info.user_email.empty()) {
+ if (!old_info ||
+ old_info->user_email.compare(info.user_email) != 0) { /* only if new index changed */
+ ret = rgw_put_system_obj(store, store->svc.zone->get_zone_params().user_email_pool, info.user_email,
+ link_bl, exclusive, NULL, real_time());
+ if (ret < 0)
+ return ret;
+ }
+ }
+
+ if (!info.access_keys.empty()) {
+ map<string, RGWAccessKey>::iterator iter = info.access_keys.begin();
+ for (; iter != info.access_keys.end(); ++iter) {
+ RGWAccessKey& k = iter->second;
+ if (old_info && old_info->access_keys.count(iter->first) != 0)
+ continue;
+
+ ret = rgw_put_system_obj(store, store->svc.zone->get_zone_params().user_keys_pool, k.id,
+ link_bl, exclusive, NULL, real_time());
+ if (ret < 0)
+ return ret;
+ }
+ }
+
+ map<string, RGWAccessKey>::iterator siter;
+ for (siter = info.swift_keys.begin(); siter != info.swift_keys.end(); ++siter) {
+ RGWAccessKey& k = siter->second;
+ if (old_info && old_info->swift_keys.count(siter->first) != 0)
+ continue;
+
+ ret = rgw_put_system_obj(store, store->svc.zone->get_zone_params().user_swift_pool, k.id,
+ link_bl, exclusive, NULL, real_time());
+ if (ret < 0)
+ return ret;
+ }
+
+ return ret;
+}
+
+struct user_info_entry {
+ RGWUserInfo info;
+ RGWObjVersionTracker objv_tracker;
+ real_time mtime;
+};
+
+static RGWChainedCacheImpl<user_info_entry> uinfo_cache;
+
+int rgw_get_user_info_from_index(RGWRados * const store,
+ const string& key,
+ const rgw_pool& pool,
+ RGWUserInfo& info,
+ RGWObjVersionTracker * const objv_tracker,
+ real_time * const pmtime)
+{
+ if (auto e = uinfo_cache.find(key)) {
+ info = e->info;
+ if (objv_tracker)
+ *objv_tracker = e->objv_tracker;
+ if (pmtime)
+ *pmtime = e->mtime;
+ return 0;
+ }
+
+ user_info_entry e;
+ bufferlist bl;
+ RGWUID uid;
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+
+ int ret = rgw_get_system_obj(store, obj_ctx, pool, key, bl, NULL, &e.mtime);
+ if (ret < 0)
+ return ret;
+
+ rgw_cache_entry_info cache_info;
+
+ auto iter = bl.cbegin();
+ try {
+ decode(uid, iter);
+ int ret = rgw_get_user_info_by_uid(store, uid.user_id, e.info, &e.objv_tracker, NULL, &cache_info);
+ if (ret < 0) {
+ return ret;
+ }
+ } catch (buffer::error& err) {
+ ldout(store->ctx(), 0) << "ERROR: failed to decode user info, caught buffer::error" << dendl;
+ return -EIO;
+ }
+
+ uinfo_cache.put(store->svc.cache, key, &e, { &cache_info });
+
+ info = e.info;
+ if (objv_tracker)
+ *objv_tracker = e.objv_tracker;
+ if (pmtime)
+ *pmtime = e.mtime;
+
+ return 0;
+}
+
+/**
+ * Given a uid, finds the user info associated with it.
+ * returns: 0 on success, -ERR# on failure (including nonexistence)
+ */
+int rgw_get_user_info_by_uid(RGWRados *store,
+ const rgw_user& uid,
+ RGWUserInfo& info,
+ RGWObjVersionTracker * const objv_tracker,
+ real_time * const pmtime,
+ rgw_cache_entry_info * const cache_info,
+ map<string, bufferlist> * const pattrs)
+{
+ bufferlist bl;
+ RGWUID user_id;
+
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+ string oid = uid.to_str();
+ int ret = rgw_get_system_obj(store, obj_ctx, store->svc.zone->get_zone_params().user_uid_pool, oid, bl, objv_tracker, pmtime, pattrs, cache_info);
+ if (ret < 0) {
+ return ret;
+ }
+
+ auto iter = bl.cbegin();
+ try {
+ decode(user_id, iter);
+ if (user_id.user_id.compare(uid) != 0) {
+ lderr(store->ctx()) << "ERROR: rgw_get_user_info_by_uid(): user id mismatch: " << user_id.user_id << " != " << uid << dendl;
+ return -EIO;
+ }
+ if (!iter.end()) {
+ decode(info, iter);
+ }
+ } catch (buffer::error& err) {
+ ldout(store->ctx(), 0) << "ERROR: failed to decode user info, caught buffer::error" << dendl;
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/**
+ * Given an email, finds the user info associated with it.
+ * returns: 0 on success, -ERR# on failure (including nonexistence)
+ */
+int rgw_get_user_info_by_email(RGWRados *store, string& email, RGWUserInfo& info,
+ RGWObjVersionTracker *objv_tracker, real_time *pmtime)
+{
+ return rgw_get_user_info_from_index(store, email, store->svc.zone->get_zone_params().user_email_pool, info, objv_tracker, pmtime);
+}
+
+/**
+ * Given an swift username, finds the user_info associated with it.
+ * returns: 0 on success, -ERR# on failure (including nonexistence)
+ */
+extern int rgw_get_user_info_by_swift(RGWRados * const store,
+ const string& swift_name,
+ RGWUserInfo& info, /* out */
+ RGWObjVersionTracker * const objv_tracker,
+ real_time * const pmtime)
+{
+ return rgw_get_user_info_from_index(store, swift_name,
+ store->svc.zone->get_zone_params().user_swift_pool,
+ info, objv_tracker, pmtime);
+}
+
+/**
+ * Given an access key, finds the user info associated with it.
+ * returns: 0 on success, -ERR# on failure (including nonexistence)
+ */
+extern int rgw_get_user_info_by_access_key(RGWRados* store,
+ const std::string& access_key,
+ RGWUserInfo& info,
+ RGWObjVersionTracker* objv_tracker,
+ real_time *pmtime)
+{
+ return rgw_get_user_info_from_index(store, access_key,
+ store->svc.zone->get_zone_params().user_keys_pool,
+ info, objv_tracker, pmtime);
+}
+
+int rgw_get_user_attrs_by_uid(RGWRados *store,
+ const rgw_user& user_id,
+ map<string, bufferlist>& attrs,
+ RGWObjVersionTracker *objv_tracker)
+{
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+ rgw_raw_obj obj(store->svc.zone->get_zone_params().user_uid_pool, user_id.to_str());
+ auto src = obj_ctx.get_obj(obj);
+
+ return src.rop()
+ .set_attrs(&attrs)
+ .set_objv_tracker(objv_tracker)
+ .stat();
+}
+
+int rgw_remove_key_index(RGWRados *store, RGWAccessKey& access_key)
+{
+ rgw_raw_obj obj(store->svc.zone->get_zone_params().user_keys_pool, access_key.id);
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+ auto sysobj = obj_ctx.get_obj(obj);
+ return sysobj.wop().remove();
+}
+
+int rgw_remove_uid_index(RGWRados *store, rgw_user& uid)
+{
+ RGWObjVersionTracker objv_tracker;
+ RGWUserInfo info;
+ int ret = rgw_get_user_info_by_uid(store, uid, info, &objv_tracker, NULL);
+ if (ret < 0)
+ return ret;
+
+ string oid = uid.to_str();
+ ret = store->meta_mgr->remove_entry(user_meta_handler, oid, &objv_tracker);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+int rgw_remove_email_index(RGWRados *store, string& email)
+{
+ if (email.empty()) {
+ return 0;
+ }
+ rgw_raw_obj obj(store->svc.zone->get_zone_params().user_email_pool, email);
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+ auto sysobj = obj_ctx.get_obj(obj);
+ return sysobj.wop().remove();
+}
+
+int rgw_remove_swift_name_index(RGWRados *store, string& swift_name)
+{
+ rgw_raw_obj obj(store->svc.zone->get_zone_params().user_swift_pool, swift_name);
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+ auto sysobj = obj_ctx.get_obj(obj);
+ return sysobj.wop().remove();
+}
+
+/**
+ * delete a user's presence from the RGW system.
+ * First remove their bucket ACLs, then delete them
+ * from the user and user email pools. This leaves the pools
+ * themselves alone, as well as any ACLs embedded in object xattrs.
+ */
+int rgw_delete_user(RGWRados *store, RGWUserInfo& info, RGWObjVersionTracker& objv_tracker) {
+ int ret;
+
+ map<string, RGWAccessKey>::iterator kiter = info.access_keys.begin();
+ for (; kiter != info.access_keys.end(); ++kiter) {
+ ldout(store->ctx(), 10) << "removing key index: " << kiter->first << dendl;
+ ret = rgw_remove_key_index(store, kiter->second);
+ if (ret < 0 && ret != -ENOENT) {
+ ldout(store->ctx(), 0) << "ERROR: could not remove " << kiter->first << " (access key object), should be fixed (err=" << ret << ")" << dendl;
+ return ret;
+ }
+ }
+
+ map<string, RGWAccessKey>::iterator siter = info.swift_keys.begin();
+ for (; siter != info.swift_keys.end(); ++siter) {
+ RGWAccessKey& k = siter->second;
+ ldout(store->ctx(), 10) << "removing swift subuser index: " << k.id << dendl;
+ /* check if swift mapping exists */
+ ret = rgw_remove_swift_name_index(store, k.id);
+ if (ret < 0 && ret != -ENOENT) {
+ ldout(store->ctx(), 0) << "ERROR: could not remove " << k.id << " (swift name object), should be fixed (err=" << ret << ")" << dendl;
+ return ret;
+ }
+ }
+
+ ldout(store->ctx(), 10) << "removing email index: " << info.user_email << dendl;
+ ret = rgw_remove_email_index(store, info.user_email);
+ if (ret < 0 && ret != -ENOENT) {
+ ldout(store->ctx(), 0) << "ERROR: could not remove email index object for "
+ << info.user_email << ", should be fixed (err=" << ret << ")" << dendl;
+ return ret;
+ }
+
+ string buckets_obj_id;
+ rgw_get_buckets_obj(info.user_id, buckets_obj_id);
+ rgw_raw_obj uid_bucks(store->svc.zone->get_zone_params().user_uid_pool, buckets_obj_id);
+ ldout(store->ctx(), 10) << "removing user buckets index" << dendl;
+ auto obj_ctx = store->svc.sysobj->init_obj_ctx();
+ auto sysobj = obj_ctx.get_obj(uid_bucks);
+ ret = sysobj.wop().remove();
+ if (ret < 0 && ret != -ENOENT) {
+ ldout(store->ctx(), 0) << "ERROR: could not remove " << info.user_id << ":" << uid_bucks << ", should be fixed (err=" << ret << ")" << dendl;
+ return ret;
+ }
+
+ string key;
+ info.user_id.to_str(key);
+
+ rgw_raw_obj uid_obj(store->svc.zone->get_zone_params().user_uid_pool, key);
+ ldout(store->ctx(), 10) << "removing user index: " << info.user_id << dendl;
+ ret = store->meta_mgr->remove_entry(user_meta_handler, key, &objv_tracker);
+ if (ret < 0 && ret != -ENOENT && ret != -ECANCELED) {
+ ldout(store->ctx(), 0) << "ERROR: could not remove " << info.user_id << ":" << uid_obj << ", should be fixed (err=" << ret << ")" << dendl;
+ return ret;
+ }
+
+ return 0;
+}
+
+static bool char_is_unreserved_url(char c)
+{
+ if (isalnum(c))
+ return true;
+
+ switch (c) {
+ case '-':
+ case '.':
+ case '_':
+ case '~':
+ return true;
+ default:
+ return false;
+ }
+}
+
+struct rgw_flags_desc {
+ uint32_t mask;
+ const char *str;
+};
+
+static struct rgw_flags_desc rgw_perms[] = {
+ { RGW_PERM_FULL_CONTROL, "full-control" },
+ { RGW_PERM_READ | RGW_PERM_WRITE, "read-write" },
+ { RGW_PERM_READ, "read" },
+ { RGW_PERM_WRITE, "write" },
+ { RGW_PERM_READ_ACP, "read-acp" },
+ { RGW_PERM_WRITE_ACP, "write-acp" },
+ { 0, NULL }
+};
+
+void rgw_perm_to_str(uint32_t mask, char *buf, int len)
+{
+ const char *sep = "";
+ int pos = 0;
+ if (!mask) {
+ snprintf(buf, len, "<none>");
+ return;
+ }
+ while (mask) {
+ uint32_t orig_mask = mask;
+ for (int i = 0; rgw_perms[i].mask; i++) {
+ struct rgw_flags_desc *desc = &rgw_perms[i];
+ if ((mask & desc->mask) == desc->mask) {
+ pos += snprintf(buf + pos, len - pos, "%s%s", sep, desc->str);
+ if (pos == len)
+ return;
+ sep = ", ";
+ mask &= ~desc->mask;
+ if (!mask)
+ return;
+ }
+ }
+ if (mask == orig_mask) // no change
+ break;
+ }
+}
+
+uint32_t rgw_str_to_perm(const char *str)
+{
+ if (strcasecmp(str, "") == 0)
+ return RGW_PERM_NONE;
+ else if (strcasecmp(str, "read") == 0)
+ return RGW_PERM_READ;
+ else if (strcasecmp(str, "write") == 0)
+ return RGW_PERM_WRITE;
+ else if (strcasecmp(str, "readwrite") == 0)
+ return RGW_PERM_READ | RGW_PERM_WRITE;
+ else if (strcasecmp(str, "full") == 0)
+ return RGW_PERM_FULL_CONTROL;
+
+ return RGW_PERM_INVALID;
+}
+
+int rgw_validate_tenant_name(const string& t)
+{
+ struct tench {
+ static bool is_good(char ch) {
+ return isalnum(ch) || ch == '_';
+ }
+ };
+ std::string::const_iterator it =
+ std::find_if_not(t.begin(), t.end(), tench::is_good);
+ return (it == t.end())? 0: -ERR_INVALID_TENANT_NAME;
+}
+
+static bool validate_access_key(string& key)
+{
+ const char *p = key.c_str();
+ while (*p) {
+ if (!char_is_unreserved_url(*p))
+ return false;
+ p++;
+ }
+ return true;
+}
+
+static void set_err_msg(std::string *sink, std::string msg)
+{
+ if (sink && !msg.empty())
+ *sink = msg;
+}
+
+static bool remove_old_indexes(RGWRados *store,
+ RGWUserInfo& old_info, RGWUserInfo& new_info, std::string *err_msg)
+{
+ int ret;
+ bool success = true;
+
+ if (!old_info.user_id.empty() &&
+ old_info.user_id.compare(new_info.user_id) != 0) {
+ if (old_info.user_id.tenant != new_info.user_id.tenant) {
+ ldout(store->ctx(), 0) << "ERROR: tenant mismatch: " << old_info.user_id.tenant << " != " << new_info.user_id.tenant << dendl;
+ return false;
+ }
+ ret = rgw_remove_uid_index(store, old_info.user_id);
+ if (ret < 0 && ret != -ENOENT) {
+ set_err_msg(err_msg, "ERROR: could not remove index for uid " + old_info.user_id.to_str());
+ success = false;
+ }
+ }
+
+ if (!old_info.user_email.empty() &&
+ old_info.user_email.compare(new_info.user_email) != 0) {
+ ret = rgw_remove_email_index(store, old_info.user_email);
+ if (ret < 0 && ret != -ENOENT) {
+ set_err_msg(err_msg, "ERROR: could not remove index for email " + old_info.user_email);
+ success = false;
+ }
+ }
+
+ map<string, RGWAccessKey>::iterator old_iter;
+ for (old_iter = old_info.swift_keys.begin(); old_iter != old_info.swift_keys.end(); ++old_iter) {
+ RGWAccessKey& swift_key = old_iter->second;
+ map<string, RGWAccessKey>::iterator new_iter = new_info.swift_keys.find(swift_key.id);
+ if (new_iter == new_info.swift_keys.end()) {
+ ret = rgw_remove_swift_name_index(store, swift_key.id);
+ if (ret < 0 && ret != -ENOENT) {
+ set_err_msg(err_msg, "ERROR: could not remove index for swift_name " + swift_key.id);
+ success = false;
+ }
+ }
+ }
+
+ return success;
+}
+
+/*
+ * Dump either the full user info or a subset to a formatter.
+ *
+ * NOTE: It is the caller's respnsibility to ensure that the
+ * formatter is flushed at the correct time.
+ */
+
+static void dump_subusers_info(Formatter *f, RGWUserInfo &info)
+{
+ map<string, RGWSubUser>::iterator uiter;
+
+ f->open_array_section("subusers");
+ for (uiter = info.subusers.begin(); uiter != info.subusers.end(); ++uiter) {
+ RGWSubUser& u = uiter->second;
+ f->open_object_section("user");
+ string s;
+ info.user_id.to_str(s);
+ f->dump_format("id", "%s:%s", s.c_str(), u.name.c_str());
+ char buf[256];
+ rgw_perm_to_str(u.perm_mask, buf, sizeof(buf));
+ f->dump_string("permissions", buf);
+ f->close_section();
+ }
+ f->close_section();
+}
+
+static void dump_access_keys_info(Formatter *f, RGWUserInfo &info)
+{
+ map<string, RGWAccessKey>::iterator kiter;
+ f->open_array_section("keys");
+ for (kiter = info.access_keys.begin(); kiter != info.access_keys.end(); ++kiter) {
+ RGWAccessKey& k = kiter->second;
+ const char *sep = (k.subuser.empty() ? "" : ":");
+ const char *subuser = (k.subuser.empty() ? "" : k.subuser.c_str());
+ f->open_object_section("key");
+ string s;
+ info.user_id.to_str(s);
+ f->dump_format("user", "%s%s%s", s.c_str(), sep, subuser);
+ f->dump_string("access_key", k.id);
+ f->dump_string("secret_key", k.key);
+ f->close_section();
+ }
+ f->close_section();
+}
+
+static void dump_swift_keys_info(Formatter *f, RGWUserInfo &info)
+{
+ map<string, RGWAccessKey>::iterator kiter;
+ f->open_array_section("swift_keys");
+ for (kiter = info.swift_keys.begin(); kiter != info.swift_keys.end(); ++kiter) {
+ RGWAccessKey& k = kiter->second;
+ const char *sep = (k.subuser.empty() ? "" : ":");
+ const char *subuser = (k.subuser.empty() ? "" : k.subuser.c_str());
+ f->open_object_section("key");
+ string s;
+ info.user_id.to_str(s);
+ f->dump_format("user", "%s%s%s", s.c_str(), sep, subuser);
+ f->dump_string("secret_key", k.key);
+ f->close_section();
+ }
+ f->close_section();
+}
+
+static void dump_user_info(Formatter *f, RGWUserInfo &info,
+ RGWStorageStats *stats = NULL)
+{
+ f->open_object_section("user_info");
+ encode_json("tenant", info.user_id.tenant, f);
+ encode_json("user_id", info.user_id.id, f);
+ encode_json("display_name", info.display_name, f);
+ encode_json("email", info.user_email, f);
+ encode_json("suspended", (int)info.suspended, f);
+ encode_json("max_buckets", (int)info.max_buckets, f);
+
+ dump_subusers_info(f, info);
+ dump_access_keys_info(f, info);
+ dump_swift_keys_info(f, info);
+
+ encode_json("caps", info.caps, f);
+
+ char buf[256];
+ op_type_to_str(info.op_mask, buf, sizeof(buf));
+ encode_json("op_mask", (const char *)buf, f);
+ encode_json("system", (bool)info.system, f);
+ encode_json("admin", (bool)info.admin, f);
+ encode_json("default_placement", info.default_placement.name, f);
+ encode_json("default_storage_class", info.default_placement.storage_class, f);
+ encode_json("placement_tags", info.placement_tags, f);
+ encode_json("bucket_quota", info.bucket_quota, f);
+ encode_json("user_quota", info.user_quota, f);
+ encode_json("temp_url_keys", info.temp_url_keys, f);
+
+ string user_source_type;
+ switch ((RGWIdentityType)info.type) {
+ case TYPE_RGW:
+ user_source_type = "rgw";
+ break;
+ case TYPE_KEYSTONE:
+ user_source_type = "keystone";
+ break;
+ case TYPE_LDAP:
+ user_source_type = "ldap";
+ break;
+ case TYPE_NONE:
+ user_source_type = "none";
+ break;
+ default:
+ user_source_type = "none";
+ break;
+ }
+ encode_json("type", user_source_type, f);
+ encode_json("mfa_ids", info.mfa_ids, f);
+ if (stats) {
+ encode_json("stats", *stats, f);
+ }
+ f->close_section();
+}
+
+
+RGWAccessKeyPool::RGWAccessKeyPool(RGWUser* usr)
+{
+ user = usr;
+ swift_keys = NULL;
+ access_keys = NULL;
+
+ if (!user) {
+ keys_allowed = false;
+ store = NULL;
+ return;
+ }
+
+ keys_allowed = true;
+
+ store = user->get_store();
+}
+
+RGWAccessKeyPool::~RGWAccessKeyPool()
+{
+
+}
+
+int RGWAccessKeyPool::init(RGWUserAdminOpState& op_state)
+{
+ if (!op_state.is_initialized()) {
+ keys_allowed = false;
+ return -EINVAL;
+ }
+
+ rgw_user& uid = op_state.get_user_id();
+ if (uid.compare(RGW_USER_ANON_ID) == 0) {
+ keys_allowed = false;
+ return -EACCES;
+ }
+
+ swift_keys = op_state.get_swift_keys();
+ access_keys = op_state.get_access_keys();
+
+ keys_allowed = true;
+
+ return 0;
+}
+
+/*
+ * Do a fairly exhaustive search for an existing key matching the parameters
+ * given. Also handles the case where no key type was specified and updates
+ * the operation state if needed.
+ */
+
+bool RGWAccessKeyPool::check_existing_key(RGWUserAdminOpState& op_state)
+{
+ bool existing_key = false;
+
+ int key_type = op_state.get_key_type();
+ std::string kid = op_state.get_access_key();
+ std::map<std::string, RGWAccessKey>::iterator kiter;
+ std::string swift_kid = op_state.build_default_swift_kid();
+
+ RGWUserInfo dup_info;
+
+ if (kid.empty() && swift_kid.empty())
+ return false;
+
+ switch (key_type) {
+ case KEY_TYPE_SWIFT:
+ kiter = swift_keys->find(swift_kid);
+
+ existing_key = (kiter != swift_keys->end());
+ if (existing_key)
+ op_state.set_access_key(swift_kid);
+
+ break;
+ case KEY_TYPE_S3:
+ kiter = access_keys->find(kid);
+ existing_key = (kiter != access_keys->end());
+
+ break;
+ default:
+ kiter = access_keys->find(kid);
+
+ existing_key = (kiter != access_keys->end());
+ if (existing_key) {
+ op_state.set_key_type(KEY_TYPE_S3);
+ break;
+ }
+
+ kiter = swift_keys->find(kid);
+
+ existing_key = (kiter != swift_keys->end());
+ if (existing_key) {
+ op_state.set_key_type(KEY_TYPE_SWIFT);
+ break;
+ }
+
+ // handle the case where the access key was not provided in user:key format
+ if (swift_kid.empty())
+ return false;
+
+ kiter = swift_keys->find(swift_kid);
+
+ existing_key = (kiter != swift_keys->end());
+ if (existing_key) {
+ op_state.set_access_key(swift_kid);
+ op_state.set_key_type(KEY_TYPE_SWIFT);
+ }
+ }
+
+ op_state.set_existing_key(existing_key);
+
+ return existing_key;
+}
+
+int RGWAccessKeyPool::check_op(RGWUserAdminOpState& op_state,
+ std::string *err_msg)
+{
+ RGWUserInfo dup_info;
+
+ if (!op_state.is_populated()) {
+ set_err_msg(err_msg, "user info was not populated");
+ return -EINVAL;
+ }
+
+ if (!keys_allowed) {
+ set_err_msg(err_msg, "keys not allowed for this user");
+ return -EACCES;
+ }
+
+ int32_t key_type = op_state.get_key_type();
+
+ // if a key type wasn't specified
+ if (key_type < 0) {
+ if (op_state.has_subuser()) {
+ key_type = KEY_TYPE_SWIFT;
+ } else {
+ key_type = KEY_TYPE_S3;
+ }
+ }
+
+ op_state.set_key_type(key_type);
+
+ /* see if the access key was specified */
+ if (key_type == KEY_TYPE_S3 && !op_state.will_gen_access() &&
+ op_state.get_access_key().empty()) {
+ set_err_msg(err_msg, "empty access key");
+ return -ERR_INVALID_ACCESS_KEY;
+ }
+
+ // don't check for secret key because we may be doing a removal
+
+ check_existing_key(op_state);
+
+ return 0;
+}
+
+// Generate a new random key
+int RGWAccessKeyPool::generate_key(RGWUserAdminOpState& op_state, std::string *err_msg)
+{
+ std::string id;
+ std::string key;
+
+ std::pair<std::string, RGWAccessKey> key_pair;
+ RGWAccessKey new_key;
+ RGWUserInfo duplicate_check;
+
+ int key_type = op_state.get_key_type();
+ bool gen_access = op_state.will_gen_access();
+ bool gen_secret = op_state.will_gen_secret();
+
+ if (!keys_allowed) {
+ set_err_msg(err_msg, "access keys not allowed for this user");
+ return -EACCES;
+ }
+
+ if (op_state.has_existing_key()) {
+ set_err_msg(err_msg, "cannot create existing key");
+ return -ERR_KEY_EXIST;
+ }
+
+ if (!gen_access) {
+ id = op_state.get_access_key();
+ }
+
+ if (!id.empty()) {
+ switch (key_type) {
+ case KEY_TYPE_SWIFT:
+ if (rgw_get_user_info_by_swift(store, id, duplicate_check) >= 0) {
+ set_err_msg(err_msg, "existing swift key in RGW system:" + id);
+ return -ERR_KEY_EXIST;
+ }
+ break;
+ case KEY_TYPE_S3:
+ if (rgw_get_user_info_by_access_key(store, id, duplicate_check) >= 0) {
+ set_err_msg(err_msg, "existing S3 key in RGW system:" + id);
+ return -ERR_KEY_EXIST;
+ }
+ }
+ }
+
+ //key's subuser
+ if (op_state.has_subuser()) {
+ //create user and subuser at the same time, user's s3 key should not be set this
+ if (!op_state.key_type_setbycontext || (key_type == KEY_TYPE_SWIFT)) {
+ new_key.subuser = op_state.get_subuser();
+ }
+ }
+
+ //Secret key
+ if (!gen_secret) {
+ if (op_state.get_secret_key().empty()) {
+ set_err_msg(err_msg, "empty secret key");
+ return -ERR_INVALID_SECRET_KEY;
+ }
+
+ key = op_state.get_secret_key();
+ } else {
+ char secret_key_buf[SECRET_KEY_LEN + 1];
+ gen_rand_alphanumeric_plain(g_ceph_context, secret_key_buf, sizeof(secret_key_buf));
+ key = secret_key_buf;
+ }
+
+ // Generate the access key
+ if (key_type == KEY_TYPE_S3 && gen_access) {
+ char public_id_buf[PUBLIC_ID_LEN + 1];
+
+ do {
+ int id_buf_size = sizeof(public_id_buf);
+ gen_rand_alphanumeric_upper(g_ceph_context, public_id_buf, id_buf_size);
+ id = public_id_buf;
+ if (!validate_access_key(id))
+ continue;
+
+ } while (!rgw_get_user_info_by_access_key(store, id, duplicate_check));
+ }
+
+ if (key_type == KEY_TYPE_SWIFT) {
+ id = op_state.build_default_swift_kid();
+ if (id.empty()) {
+ set_err_msg(err_msg, "empty swift access key");
+ return -ERR_INVALID_ACCESS_KEY;
+ }
+
+ // check that the access key doesn't exist
+ if (rgw_get_user_info_by_swift(store, id, duplicate_check) >= 0) {
+ set_err_msg(err_msg, "cannot create existing swift key");
+ return -ERR_KEY_EXIST;
+ }
+ }
+
+ // finally create the new key
+ new_key.id = id;
+ new_key.key = key;
+
+ key_pair.first = id;
+ key_pair.second = new_key;
+
+ if (key_type == KEY_TYPE_S3) {
+ access_keys->insert(key_pair);
+ } else if (key_type == KEY_TYPE_SWIFT) {
+ swift_keys->insert(key_pair);
+ }
+
+ return 0;
+}
+
+// modify an existing key
+int RGWAccessKeyPool::modify_key(RGWUserAdminOpState& op_state, std::string *err_msg)
+{
+ std::string id;
+ std::string key = op_state.get_secret_key();
+ int key_type = op_state.get_key_type();
+
+ RGWAccessKey modify_key;
+
+ pair<string, RGWAccessKey> key_pair;
+ map<std::string, RGWAccessKey>::iterator kiter;
+
+ switch (key_type) {
+ case KEY_TYPE_S3:
+ id = op_state.get_access_key();
+ if (id.empty()) {
+ set_err_msg(err_msg, "no access key specified");
+ return -ERR_INVALID_ACCESS_KEY;
+ }
+ break;
+ case KEY_TYPE_SWIFT:
+ id = op_state.build_default_swift_kid();
+ if (id.empty()) {
+ set_err_msg(err_msg, "no subuser specified");
+ return -EINVAL;
+ }
+ break;
+ default:
+ set_err_msg(err_msg, "invalid key type");
+ return -ERR_INVALID_KEY_TYPE;
+ }
+
+ if (!op_state.has_existing_key()) {
+ set_err_msg(err_msg, "key does not exist");
+ return -ERR_INVALID_ACCESS_KEY;
+ }
+
+ key_pair.first = id;
+
+ if (key_type == KEY_TYPE_SWIFT) {
+ modify_key.id = id;
+ modify_key.subuser = op_state.get_subuser();
+ } else if (key_type == KEY_TYPE_S3) {
+ kiter = access_keys->find(id);
+ if (kiter != access_keys->end()) {
+ modify_key = kiter->second;
+ }
+ }
+
+ if (op_state.will_gen_secret()) {
+ char secret_key_buf[SECRET_KEY_LEN + 1];
+ int key_buf_size = sizeof(secret_key_buf);
+ gen_rand_alphanumeric_plain(g_ceph_context, secret_key_buf, key_buf_size);
+ key = secret_key_buf;
+ }
+
+ if (key.empty()) {
+ set_err_msg(err_msg, "empty secret key");
+ return -ERR_INVALID_SECRET_KEY;
+ }
+
+ // update the access key with the new secret key
+ modify_key.key = key;
+
+ key_pair.second = modify_key;
+
+
+ if (key_type == KEY_TYPE_S3) {
+ (*access_keys)[id] = modify_key;
+ } else if (key_type == KEY_TYPE_SWIFT) {
+ (*swift_keys)[id] = modify_key;
+ }
+
+ return 0;
+}
+
+int RGWAccessKeyPool::execute_add(RGWUserAdminOpState& op_state,
+ std::string *err_msg, bool defer_user_update)
+{
+ int ret = 0;
+
+ std::string subprocess_msg;
+ int key_op = GENERATE_KEY;
+
+ // set the op
+ if (op_state.has_existing_key())
+ key_op = MODIFY_KEY;
+
+ switch (key_op) {
+ case GENERATE_KEY:
+ ret = generate_key(op_state, &subprocess_msg);
+ break;
+ case MODIFY_KEY:
+ ret = modify_key(op_state, &subprocess_msg);
+ break;
+ }
+
+ if (ret < 0) {
+ set_err_msg(err_msg, subprocess_msg);
+ return ret;
+ }
+
+ // store the updated info
+ if (!defer_user_update)
+ ret = user->update(op_state, err_msg);
+
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+int RGWAccessKeyPool::add(RGWUserAdminOpState& op_state, std::string *err_msg)
+{
+ return add(op_state, err_msg, false);
+}
+
+int RGWAccessKeyPool::add(RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_user_update)
+{
+ int ret;
+ std::string subprocess_msg;
+
+ ret = check_op(op_state, &subprocess_msg);
+ if (ret < 0) {
+ set_err_msg(err_msg, "unable to parse request, " + subprocess_msg);
+ return ret;
+ }
+
+ ret = execute_add(op_state, &subprocess_msg, defer_user_update);
+ if (ret < 0) {
+ set_err_msg(err_msg, "unable to add access key, " + subprocess_msg);
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWAccessKeyPool::execute_remove(RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_user_update)
+{
+ int ret = 0;
+
+ int key_type = op_state.get_key_type();
+ std::string id = op_state.get_access_key();
+ map<std::string, RGWAccessKey>::iterator kiter;
+ map<std::string, RGWAccessKey> *keys_map;
+
+ if (!op_state.has_existing_key()) {
+ set_err_msg(err_msg, "unable to find access key");
+ return -ERR_INVALID_ACCESS_KEY;
+ }
+
+ if (key_type == KEY_TYPE_S3) {
+ keys_map = access_keys;
+ } else if (key_type == KEY_TYPE_SWIFT) {
+ keys_map = swift_keys;
+ } else {
+ keys_map = NULL;
+ set_err_msg(err_msg, "invalid access key");
+ return -ERR_INVALID_ACCESS_KEY;
+ }
+
+ kiter = keys_map->find(id);
+ if (kiter == keys_map->end()) {
+ set_err_msg(err_msg, "key not found");
+ return -ERR_INVALID_ACCESS_KEY;
+ }
+
+ rgw_remove_key_index(store, kiter->second);
+ keys_map->erase(kiter);
+
+ if (!defer_user_update)
+ ret = user->update(op_state, err_msg);
+
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+int RGWAccessKeyPool::remove(RGWUserAdminOpState& op_state, std::string *err_msg)
+{
+ return remove(op_state, err_msg, false);
+}
+
+int RGWAccessKeyPool::remove(RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_user_update)
+{
+ int ret;
+
+ std::string subprocess_msg;
+
+ ret = check_op(op_state, &subprocess_msg);
+ if (ret < 0) {
+ set_err_msg(err_msg, "unable to parse request, " + subprocess_msg);
+ return ret;
+ }
+
+ ret = execute_remove(op_state, &subprocess_msg, defer_user_update);
+ if (ret < 0) {
+ set_err_msg(err_msg, "unable to remove access key, " + subprocess_msg);
+ return ret;
+ }
+
+ return 0;
+}
+
+// remove all keys associated with a subuser
+int RGWAccessKeyPool::remove_subuser_keys(RGWUserAdminOpState& op_state,
+ std::string *err_msg, bool defer_user_update)
+{
+ int ret = 0;
+
+ if (!op_state.is_populated()) {
+ set_err_msg(err_msg, "user info was not populated");
+ return -EINVAL;
+ }
+
+ if (!op_state.has_subuser()) {
+ set_err_msg(err_msg, "no subuser specified");
+ return -EINVAL;
+ }
+
+ std::string swift_kid = op_state.build_default_swift_kid();
+ if (swift_kid.empty()) {
+ set_err_msg(err_msg, "empty swift access key");
+ return -EINVAL;
+ }
+
+ map<std::string, RGWAccessKey>::iterator kiter;
+ map<std::string, RGWAccessKey> *keys_map;
+
+ // a subuser can have at most one swift key
+ keys_map = swift_keys;
+ kiter = keys_map->find(swift_kid);
+ if (kiter != keys_map->end()) {
+ rgw_remove_key_index(store, kiter->second);
+ keys_map->erase(kiter);
+ }
+
+ // a subuser may have multiple s3 key pairs
+ std::string subuser_str = op_state.get_subuser();
+ keys_map = access_keys;
+ RGWUserInfo user_info = op_state.get_user_info();
+ map<std::string, RGWAccessKey>::iterator user_kiter = user_info.access_keys.begin();
+ for (; user_kiter != user_info.access_keys.end(); ++user_kiter) {
+ if (user_kiter->second.subuser == subuser_str) {
+ kiter = keys_map->find(user_kiter->first);
+ if (kiter != keys_map->end()) {
+ rgw_remove_key_index(store, kiter->second);
+ keys_map->erase(kiter);
+ }
+ }
+ }
+
+ if (!defer_user_update)
+ ret = user->update(op_state, err_msg);
+
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+RGWSubUserPool::RGWSubUserPool(RGWUser *usr)
+{
+ subusers_allowed = (usr != NULL);
+ if (usr)
+ store = usr->get_store();
+ else
+ store = NULL;
+ user = usr;
+ subuser_map = NULL;
+}
+
+RGWSubUserPool::~RGWSubUserPool()
+{
+
+}
+
+int RGWSubUserPool::init(RGWUserAdminOpState& op_state)
+{
+ if (!op_state.is_initialized()) {
+ subusers_allowed = false;
+ return -EINVAL;
+ }
+
+ rgw_user& uid = op_state.get_user_id();
+ if (uid.compare(RGW_USER_ANON_ID) == 0) {
+ subusers_allowed = false;
+ return -EACCES;
+ }
+
+ subuser_map = op_state.get_subusers();
+ if (subuser_map == NULL) {
+ subusers_allowed = false;
+ return -EINVAL;
+ }
+
+ subusers_allowed = true;
+
+ return 0;
+}
+
+bool RGWSubUserPool::exists(std::string subuser)
+{
+ if (subuser.empty())
+ return false;
+
+ if (!subuser_map)
+ return false;
+
+ if (subuser_map->count(subuser))
+ return true;
+
+ return false;
+}
+
+int RGWSubUserPool::check_op(RGWUserAdminOpState& op_state,
+ std::string *err_msg)
+{
+ bool existing = false;
+ std::string subuser = op_state.get_subuser();
+
+ if (!op_state.is_populated()) {
+ set_err_msg(err_msg, "user info was not populated");
+ return -EINVAL;
+ }
+
+ if (!subusers_allowed) {
+ set_err_msg(err_msg, "subusers not allowed for this user");
+ return -EACCES;
+ }
+
+ if (subuser.empty() && !op_state.will_gen_subuser()) {
+ set_err_msg(err_msg, "empty subuser name");
+ return -EINVAL;
+ }
+
+ if (op_state.get_subuser_perm() == RGW_PERM_INVALID) {
+ set_err_msg(err_msg, "invaild subuser access");
+ return -EINVAL;
+ }
+
+ //set key type when it not set or set by context
+ if ((op_state.get_key_type() < 0) || op_state.key_type_setbycontext) {
+ op_state.set_key_type(KEY_TYPE_SWIFT);
+ op_state.key_type_setbycontext = true;
+ }
+
+ // check if the subuser exists
+ if (!subuser.empty())
+ existing = exists(subuser);
+
+ op_state.set_existing_subuser(existing);
+
+ return 0;
+}
+
+int RGWSubUserPool::execute_add(RGWUserAdminOpState& op_state,
+ std::string *err_msg, bool defer_user_update)
+{
+ int ret = 0;
+ std::string subprocess_msg;
+
+ RGWSubUser subuser;
+ std::pair<std::string, RGWSubUser> subuser_pair;
+ std::string subuser_str = op_state.get_subuser();
+
+ subuser_pair.first = subuser_str;
+
+ // assumes key should be created
+ if (op_state.has_key_op()) {
+ ret = user->keys.add(op_state, &subprocess_msg, true);
+ if (ret < 0) {
+ set_err_msg(err_msg, "unable to create subuser key, " + subprocess_msg);
+ return ret;
+ }
+ }
+
+ // create the subuser
+ subuser.name = subuser_str;
+
+ if (op_state.has_subuser_perm())
+ subuser.perm_mask = op_state.get_subuser_perm();
+
+ // insert the subuser into user info
+ subuser_pair.second = subuser;
+ subuser_map->insert(subuser_pair);
+
+ // attempt to save the subuser
+ if (!defer_user_update)
+ ret = user->update(op_state, err_msg);
+
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+int RGWSubUserPool::add(RGWUserAdminOpState& op_state, std::string *err_msg)
+{
+ return add(op_state, err_msg, false);
+}
+
+int RGWSubUserPool::add(RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_user_update)
+{
+ std::string subprocess_msg;
+ int ret;
+ int32_t key_type = op_state.get_key_type();
+
+ ret = check_op(op_state, &subprocess_msg);
+ if (ret < 0) {
+ set_err_msg(err_msg, "unable to parse request, " + subprocess_msg);
+ return ret;
+ }
+
+ if (key_type == KEY_TYPE_S3 && op_state.get_access_key().empty()) {
+ op_state.set_gen_access();
+ }
+
+ if (op_state.get_secret_key().empty()) {
+ op_state.set_gen_secret();
+ }
+
+ ret = execute_add(op_state, &subprocess_msg, defer_user_update);
+ if (ret < 0) {
+ set_err_msg(err_msg, "unable to create subuser, " + subprocess_msg);
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWSubUserPool::execute_remove(RGWUserAdminOpState& op_state,
+ std::string *err_msg, bool defer_user_update)
+{
+ int ret = 0;
+ std::string subprocess_msg;
+
+ std::string subuser_str = op_state.get_subuser();
+
+ map<std::string, RGWSubUser>::iterator siter;
+ siter = subuser_map->find(subuser_str);
+ if (siter == subuser_map->end()){
+ set_err_msg(err_msg, "subuser not found: " + subuser_str);
+ return -ERR_NO_SUCH_SUBUSER;
+ }
+ if (!op_state.has_existing_subuser()) {
+ set_err_msg(err_msg, "subuser not found: " + subuser_str);
+ return -ERR_NO_SUCH_SUBUSER;
+ }
+
+ // always purge all associate keys
+ user->keys.remove_subuser_keys(op_state, &subprocess_msg, true);
+
+ // remove the subuser from the user info
+ subuser_map->erase(siter);
+
+ // attempt to save the subuser
+ if (!defer_user_update)
+ ret = user->update(op_state, err_msg);
+
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+int RGWSubUserPool::remove(RGWUserAdminOpState& op_state, std::string *err_msg)
+{
+ return remove(op_state, err_msg, false);
+}
+
+int RGWSubUserPool::remove(RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_user_update)
+{
+ std::string subprocess_msg;
+ int ret;
+
+ ret = check_op(op_state, &subprocess_msg);
+ if (ret < 0) {
+ set_err_msg(err_msg, "unable to parse request, " + subprocess_msg);
+ return ret;
+ }
+
+ ret = execute_remove(op_state, &subprocess_msg, defer_user_update);
+ if (ret < 0) {
+ set_err_msg(err_msg, "unable to remove subuser, " + subprocess_msg);
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWSubUserPool::execute_modify(RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_user_update)
+{
+ int ret = 0;
+ std::string subprocess_msg;
+ std::map<std::string, RGWSubUser>::iterator siter;
+ std::pair<std::string, RGWSubUser> subuser_pair;
+
+ std::string subuser_str = op_state.get_subuser();
+ RGWSubUser subuser;
+
+ if (!op_state.has_existing_subuser()) {
+ set_err_msg(err_msg, "subuser does not exist");
+ return -ERR_NO_SUCH_SUBUSER;
+ }
+
+ subuser_pair.first = subuser_str;
+
+ siter = subuser_map->find(subuser_str);
+ subuser = siter->second;
+
+ if (op_state.has_key_op()) {
+ ret = user->keys.add(op_state, &subprocess_msg, true);
+ if (ret < 0) {
+ set_err_msg(err_msg, "unable to create subuser keys, " + subprocess_msg);
+ return ret;
+ }
+ }
+
+ if (op_state.has_subuser_perm())
+ subuser.perm_mask = op_state.get_subuser_perm();
+
+ subuser_pair.second = subuser;
+
+ subuser_map->erase(siter);
+ subuser_map->insert(subuser_pair);
+
+ // attempt to save the subuser
+ if (!defer_user_update)
+ ret = user->update(op_state, err_msg);
+
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+int RGWSubUserPool::modify(RGWUserAdminOpState& op_state, std::string *err_msg)
+{
+ return RGWSubUserPool::modify(op_state, err_msg, false);
+}
+
+int RGWSubUserPool::modify(RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_user_update)
+{
+ std::string subprocess_msg;
+ int ret;
+
+ RGWSubUser subuser;
+
+ ret = check_op(op_state, &subprocess_msg);
+ if (ret < 0) {
+ set_err_msg(err_msg, "unable to parse request, " + subprocess_msg);
+ return ret;
+ }
+
+ ret = execute_modify(op_state, &subprocess_msg, defer_user_update);
+ if (ret < 0) {
+ set_err_msg(err_msg, "unable to modify subuser, " + subprocess_msg);
+ return ret;
+ }
+
+ return 0;
+}
+
+RGWUserCapPool::RGWUserCapPool(RGWUser *usr)
+{
+ user = usr;
+ caps = NULL;
+ caps_allowed = (user != NULL);
+}
+
+RGWUserCapPool::~RGWUserCapPool()
+{
+
+}
+
+int RGWUserCapPool::init(RGWUserAdminOpState& op_state)
+{
+ if (!op_state.is_initialized()) {
+ caps_allowed = false;
+ return -EINVAL;
+ }
+
+ rgw_user& uid = op_state.get_user_id();
+ if (uid.compare(RGW_USER_ANON_ID) == 0) {
+ caps_allowed = false;
+ return -EACCES;
+ }
+
+ caps = op_state.get_caps_obj();
+ if (!caps) {
+ caps_allowed = false;
+ return -ERR_INVALID_CAP;
+ }
+
+ caps_allowed = true;
+
+ return 0;
+}
+
+int RGWUserCapPool::add(RGWUserAdminOpState& op_state, std::string *err_msg)
+{
+ return add(op_state, err_msg, false);
+}
+
+int RGWUserCapPool::add(RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_save)
+{
+ int ret = 0;
+ std::string caps_str = op_state.get_caps();
+
+ if (!op_state.is_populated()) {
+ set_err_msg(err_msg, "user info was not populated");
+ return -EINVAL;
+ }
+
+ if (!caps_allowed) {
+ set_err_msg(err_msg, "caps not allowed for this user");
+ return -EACCES;
+ }
+
+ if (caps_str.empty()) {
+ set_err_msg(err_msg, "empty user caps");
+ return -ERR_INVALID_CAP;
+ }
+
+ int r = caps->add_from_string(caps_str);
+ if (r < 0) {
+ set_err_msg(err_msg, "unable to add caps: " + caps_str);
+ return r;
+ }
+
+ if (!defer_save)
+ ret = user->update(op_state, err_msg);
+
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+int RGWUserCapPool::remove(RGWUserAdminOpState& op_state, std::string *err_msg)
+{
+ return remove(op_state, err_msg, false);
+}
+
+int RGWUserCapPool::remove(RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_save)
+{
+ int ret = 0;
+
+ std::string caps_str = op_state.get_caps();
+
+ if (!op_state.is_populated()) {
+ set_err_msg(err_msg, "user info was not populated");
+ return -EINVAL;
+ }
+
+ if (!caps_allowed) {
+ set_err_msg(err_msg, "caps not allowed for this user");
+ return -EACCES;
+ }
+
+ if (caps_str.empty()) {
+ set_err_msg(err_msg, "empty user caps");
+ return -ERR_INVALID_CAP;
+ }
+
+ int r = caps->remove_from_string(caps_str);
+ if (r < 0) {
+ set_err_msg(err_msg, "unable to remove caps: " + caps_str);
+ return r;
+ }
+
+ if (!defer_save)
+ ret = user->update(op_state, err_msg);
+
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+RGWUser::RGWUser() : store(NULL), info_stored(false), caps(this), keys(this), subusers(this)
+{
+ init_default();
+}
+
+int RGWUser::init(RGWRados *storage, RGWUserAdminOpState& op_state)
+{
+ init_default();
+ int ret = init_storage(storage);
+ if (ret < 0)
+ return ret;
+
+ ret = init(op_state);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+RGWUser::~RGWUser()
+{
+}
+
+void RGWUser::init_default()
+{
+ // use anonymous user info as a placeholder
+ rgw_get_anon_user(old_info);
+ user_id = RGW_USER_ANON_ID;
+
+ clear_populated();
+}
+
+int RGWUser::init_storage(RGWRados *storage)
+{
+ if (!storage) {
+ return -EINVAL;
+ }
+
+ store = storage;
+
+ clear_populated();
+
+ /* API wrappers */
+ keys = RGWAccessKeyPool(this);
+ caps = RGWUserCapPool(this);
+ subusers = RGWSubUserPool(this);
+
+ return 0;
+}
+
+int RGWUser::init(RGWUserAdminOpState& op_state)
+{
+ bool found = false;
+ std::string swift_user;
+ user_id = op_state.get_user_id();
+ std::string user_email = op_state.get_user_email();
+ std::string access_key = op_state.get_access_key();
+ std::string subuser = op_state.get_subuser();
+
+ int key_type = op_state.get_key_type();
+ if (key_type == KEY_TYPE_SWIFT) {
+ swift_user = op_state.get_access_key();
+ access_key.clear();
+ }
+
+ RGWUserInfo user_info;
+
+ clear_populated();
+
+ if (user_id.empty() && !subuser.empty()) {
+ size_t pos = subuser.find(':');
+ if (pos != string::npos) {
+ user_id = subuser.substr(0, pos);
+ op_state.set_user_id(user_id);
+ }
+ }
+
+ if (!user_id.empty() && (user_id.compare(RGW_USER_ANON_ID) != 0)) {
+ found = (rgw_get_user_info_by_uid(store, user_id, user_info, &op_state.objv) >= 0);
+ op_state.found_by_uid = found;
+ }
+ if (store->ctx()->_conf.get_val<bool>("rgw_user_unique_email")) {
+ if (!user_email.empty() && !found) {
+ found = (rgw_get_user_info_by_email(store, user_email, user_info, &op_state.objv) >= 0);
+ op_state.found_by_email = found;
+ }
+ }
+ if (!swift_user.empty() && !found) {
+ found = (rgw_get_user_info_by_swift(store, swift_user, user_info, &op_state.objv) >= 0);
+ op_state.found_by_key = found;
+ }
+ if (!access_key.empty() && !found) {
+ found = (rgw_get_user_info_by_access_key(store, access_key, user_info, &op_state.objv) >= 0);
+ op_state.found_by_key = found;
+ }
+
+ op_state.set_existing_user(found);
+ if (found) {
+ op_state.set_user_info(user_info);
+ op_state.set_populated();
+
+ old_info = user_info;
+ set_populated();
+ }
+
+ if (user_id.empty()) {
+ user_id = user_info.user_id;
+ }
+ op_state.set_initialized();
+
+ // this may have been called by a helper object
+ int ret = init_members(op_state);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+int RGWUser::init_members(RGWUserAdminOpState& op_state)
+{
+ int ret = 0;
+
+ ret = keys.init(op_state);
+ if (ret < 0)
+ return ret;
+
+ ret = subusers.init(op_state);
+ if (ret < 0)
+ return ret;
+
+ ret = caps.init(op_state);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+int RGWUser::update(RGWUserAdminOpState& op_state, std::string *err_msg)
+{
+ int ret;
+ std::string subprocess_msg;
+ RGWUserInfo user_info = op_state.get_user_info();
+
+ if (!store) {
+ set_err_msg(err_msg, "couldn't initialize storage");
+ return -EINVAL;
+ }
+
+ if (is_populated()) {
+ ret = rgw_store_user_info(store, user_info, &old_info, &op_state.objv, real_time(), false);
+ if (ret < 0) {
+ set_err_msg(err_msg, "unable to store user info");
+ return ret;
+ }
+
+ ret = remove_old_indexes(store, old_info, user_info, &subprocess_msg);
+ if (ret < 0) {
+ set_err_msg(err_msg, "unable to remove old user info, " + subprocess_msg);
+ return ret;
+ }
+ } else {
+ ret = rgw_store_user_info(store, user_info, NULL, &op_state.objv, real_time(), false);
+ if (ret < 0) {
+ set_err_msg(err_msg, "unable to store user info");
+ return ret;
+ }
+ }
+
+ old_info = user_info;
+ set_populated();
+
+ return 0;
+}
+
+int RGWUser::check_op(RGWUserAdminOpState& op_state, std::string *err_msg)
+{
+ bool same_id;
+ bool populated;
+ rgw_user& op_id = op_state.get_user_id();
+
+ RGWUserInfo user_info;
+
+ same_id = (user_id.compare(op_id) == 0);
+ populated = is_populated();
+
+ if (op_id.compare(RGW_USER_ANON_ID) == 0) {
+ set_err_msg(err_msg, "unable to perform operations on the anonymous user");
+ return -EINVAL;
+ }
+
+ if (populated && !same_id) {
+ set_err_msg(err_msg, "user id mismatch, operation id: " + op_id.to_str()
+ + " does not match: " + user_id.to_str());
+
+ return -EINVAL;
+ }
+
+ int ret = rgw_validate_tenant_name(op_id.tenant);
+ if (ret) {
+ set_err_msg(err_msg,
+ "invalid tenant only alphanumeric and _ characters are allowed");
+ return ret;
+ }
+
+ //set key type when it not set or set by context
+ if ((op_state.get_key_type() < 0) || op_state.key_type_setbycontext) {
+ op_state.set_key_type(KEY_TYPE_S3);
+ op_state.key_type_setbycontext = true;
+ }
+
+ return 0;
+}
+
+int RGWUser::execute_add(RGWUserAdminOpState& op_state, std::string *err_msg)
+{
+ std::string subprocess_msg;
+ int ret = 0;
+ bool defer_user_update = true;
+
+ RGWUserInfo user_info;
+
+ rgw_user& uid = op_state.get_user_id();
+ std::string user_email = op_state.get_user_email();
+ std::string display_name = op_state.get_display_name();
+
+ // fail if the user exists already
+ if (op_state.has_existing_user()) {
+ if (!op_state.exclusive &&
+ (user_email.empty() ||
+ boost::iequals(user_email, old_info.user_email)) &&
+ old_info.display_name == display_name) {
+ return execute_modify(op_state, err_msg);
+ }
+
+ if (op_state.found_by_email) {
+ set_err_msg(err_msg, "email: " + user_email +
+ " is the email address an existing user");
+ ret = -ERR_EMAIL_EXIST;
+ } else if (op_state.found_by_key) {
+ set_err_msg(err_msg, "duplicate key provided");
+ ret = -ERR_KEY_EXIST;
+ } else {
+ set_err_msg(err_msg, "user: " + op_state.user_id.to_str() + " exists");
+ ret = -EEXIST;
+ }
+ return ret;
+ }
+
+ // fail if the user_info has already been populated
+ if (op_state.is_populated()) {
+ set_err_msg(err_msg, "cannot overwrite already populated user");
+ return -EEXIST;
+ }
+
+ // fail if the display name was not included
+ if (display_name.empty()) {
+ set_err_msg(err_msg, "no display name specified");
+ return -EINVAL;
+ }
+
+
+ // set the user info
+ user_id = uid;
+ user_info.user_id = user_id;
+ user_info.display_name = display_name;
+ user_info.type = TYPE_RGW;
+
+ if (!user_email.empty())
+ user_info.user_email = user_email;
+
+ CephContext *cct = store->ctx();
+ if (op_state.max_buckets_specified) {
+ user_info.max_buckets = op_state.get_max_buckets();
+ } else {
+ user_info.max_buckets =
+ cct->_conf.get_val<int64_t>("rgw_user_max_buckets");
+ }
+
+ user_info.suspended = op_state.get_suspension_status();
+ user_info.admin = op_state.admin;
+ user_info.system = op_state.system;
+
+ if (op_state.op_mask_specified)
+ user_info.op_mask = op_state.get_op_mask();
+
+ if (op_state.has_bucket_quota()) {
+ user_info.bucket_quota = op_state.get_bucket_quota();
+ } else {
+ rgw_apply_default_bucket_quota(user_info.bucket_quota, cct->_conf);
+ }
+
+ if (op_state.temp_url_key_specified) {
+ map<int, string>::iterator iter;
+ for (iter = op_state.temp_url_keys.begin();
+ iter != op_state.temp_url_keys.end(); ++iter) {
+ user_info.temp_url_keys[iter->first] = iter->second;
+ }
+ }
+
+ if (op_state.has_user_quota()) {
+ user_info.user_quota = op_state.get_user_quota();
+ } else {
+ rgw_apply_default_user_quota(user_info.user_quota, cct->_conf);
+ }
+
+ // update the request
+ op_state.set_user_info(user_info);
+ op_state.set_populated();
+
+ // update the helper objects
+ ret = init_members(op_state);
+ if (ret < 0) {
+ set_err_msg(err_msg, "unable to initialize user");
+ return ret;
+ }
+
+ // see if we need to add an access key
+ if (op_state.has_key_op()) {
+ ret = keys.add(op_state, &subprocess_msg, defer_user_update);
+ if (ret < 0) {
+ set_err_msg(err_msg, "unable to create access key, " + subprocess_msg);
+ return ret;
+ }
+ }
+
+ // see if we need to add some caps
+ if (op_state.has_caps_op()) {
+ ret = caps.add(op_state, &subprocess_msg, defer_user_update);
+ if (ret < 0) {
+ set_err_msg(err_msg, "unable to add user capabilities, " + subprocess_msg);
+ return ret;
+ }
+ }
+
+ ret = update(op_state, err_msg);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+int RGWUser::add(RGWUserAdminOpState& op_state, std::string *err_msg)
+{
+ std::string subprocess_msg;
+ int ret;
+
+ ret = check_op(op_state, &subprocess_msg);
+ if (ret < 0) {
+ set_err_msg(err_msg, "unable to parse parameters, " + subprocess_msg);
+ return ret;
+ }
+
+ ret = execute_add(op_state, &subprocess_msg);
+ if (ret < 0) {
+ set_err_msg(err_msg, "unable to create user, " + subprocess_msg);
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWUser::execute_remove(RGWUserAdminOpState& op_state, std::string *err_msg)
+{
+ int ret;
+
+ bool purge_data = op_state.will_purge_data();
+ rgw_user& uid = op_state.get_user_id();
+ RGWUserInfo user_info = op_state.get_user_info();
+
+ if (!op_state.has_existing_user()) {
+ set_err_msg(err_msg, "user does not exist");
+ return -ENOENT;
+ }
+
+ bool is_truncated = false;
+ string marker;
+ CephContext *cct = store->ctx();
+ size_t max_buckets = cct->_conf->rgw_list_buckets_max_chunk;
+ do {
+ RGWUserBuckets buckets;
+ ret = rgw_read_user_buckets(store, uid, buckets, marker, string(),
+ max_buckets, false, &is_truncated);
+ if (ret < 0) {
+ set_err_msg(err_msg, "unable to read user bucket info");
+ return ret;
+ }
+
+ map<std::string, RGWBucketEnt>& m = buckets.get_buckets();
+ if (!m.empty() && !purge_data) {
+ set_err_msg(err_msg, "must specify purge data to remove user with buckets");
+ return -EEXIST; // change to code that maps to 409: conflict
+ }
+
+ std::map<std::string, RGWBucketEnt>::iterator it;
+ for (it = m.begin(); it != m.end(); ++it) {
+ ret = rgw_remove_bucket(store, ((*it).second).bucket, true);
+ if (ret < 0) {
+ set_err_msg(err_msg, "unable to delete user data");
+ return ret;
+ }
+
+ marker = it->first;
+ }
+
+ } while (is_truncated);
+
+ ret = rgw_delete_user(store, user_info, op_state.objv);
+ if (ret < 0) {
+ set_err_msg(err_msg, "unable to remove user from RADOS");
+ return ret;
+ }
+
+ op_state.clear_populated();
+ clear_populated();
+
+ return 0;
+}
+
+int RGWUser::remove(RGWUserAdminOpState& op_state, std::string *err_msg)
+{
+ std::string subprocess_msg;
+ int ret;
+
+ ret = check_op(op_state, &subprocess_msg);
+ if (ret < 0) {
+ set_err_msg(err_msg, "unable to parse parameters, " + subprocess_msg);
+ return ret;
+ }
+
+ ret = execute_remove(op_state, &subprocess_msg);
+ if (ret < 0) {
+ set_err_msg(err_msg, "unable to remove user, " + subprocess_msg);
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWUser::execute_modify(RGWUserAdminOpState& op_state, std::string *err_msg)
+{
+ bool populated = op_state.is_populated();
+ int ret = 0;
+ std::string subprocess_msg;
+ std::string op_email = op_state.get_user_email();
+ std::string display_name = op_state.get_display_name();
+
+ RGWUserInfo user_info;
+ RGWUserInfo duplicate_check;
+
+ // ensure that the user info has been populated or is populate-able
+ if (!op_state.has_existing_user() && !populated) {
+ set_err_msg(err_msg, "user not found");
+ return -ENOENT;
+ }
+
+ // if the user hasn't already been populated...attempt to
+ if (!populated) {
+ ret = init(op_state);
+ if (ret < 0) {
+ set_err_msg(err_msg, "unable to retrieve user info");
+ return ret;
+ }
+ }
+
+ // ensure that we can modify the user's attributes
+ if (user_id.compare(RGW_USER_ANON_ID) == 0) {
+ set_err_msg(err_msg, "unable to modify anonymous user's info");
+ return -EACCES;
+ }
+
+ user_info = old_info;
+
+ std::string old_email = old_info.user_email;
+ if (!op_email.empty()) {
+ // make sure we are not adding a duplicate email
+ if (old_email.compare(op_email) != 0) {
+ ret = rgw_get_user_info_by_email(store, op_email, duplicate_check);
+ if (ret >= 0 && duplicate_check.user_id.compare(user_id) != 0) {
+ set_err_msg(err_msg, "cannot add duplicate email");
+ return -ERR_EMAIL_EXIST;
+ }
+ }
+ user_info.user_email = op_email;
+ } else if (op_email.empty() && op_state.user_email_specified) {
+
+ ldout(store->ctx(), 10) << "removing email index: " << user_info.user_email << dendl;
+ ret = rgw_remove_email_index(store, user_info.user_email);
+ if (ret < 0 && ret != -ENOENT) {
+ ldout(store->ctx(), 0) << "ERROR: could not remove " << user_info.user_id << " index (err=" << ret << ")" << dendl;
+ return ret;
+ }
+ user_info.user_email = "";
+ }
+
+ // update the remaining user info
+ if (!display_name.empty())
+ user_info.display_name = display_name;
+
+ if (op_state.max_buckets_specified)
+ user_info.max_buckets = op_state.get_max_buckets();
+
+ if (op_state.admin_specified)
+ user_info.admin = op_state.admin;
+
+ if (op_state.system_specified)
+ user_info.system = op_state.system;
+
+ if (op_state.temp_url_key_specified) {
+ map<int, string>::iterator iter;
+ for (iter = op_state.temp_url_keys.begin();
+ iter != op_state.temp_url_keys.end(); ++iter) {
+ user_info.temp_url_keys[iter->first] = iter->second;
+ }
+ }
+
+ if (op_state.op_mask_specified)
+ user_info.op_mask = op_state.get_op_mask();
+
+ if (op_state.has_bucket_quota())
+ user_info.bucket_quota = op_state.get_bucket_quota();
+
+ if (op_state.has_user_quota())
+ user_info.user_quota = op_state.get_user_quota();
+
+ if (op_state.has_suspension_op()) {
+ __u8 suspended = op_state.get_suspension_status();
+ user_info.suspended = suspended;
+
+ RGWUserBuckets buckets;
+
+ if (user_id.empty()) {
+ set_err_msg(err_msg, "empty user id passed...aborting");
+ return -EINVAL;
+ }
+
+ bool is_truncated = false;
+ string marker;
+ CephContext *cct = store->ctx();
+ size_t max_buckets = cct->_conf->rgw_list_buckets_max_chunk;
+ do {
+ ret = rgw_read_user_buckets(store, user_id, buckets, marker, string(),
+ max_buckets, false, &is_truncated);
+ if (ret < 0) {
+ set_err_msg(err_msg, "could not get buckets for uid: " + user_id.to_str());
+ return ret;
+ }
+
+ map<string, RGWBucketEnt>& m = buckets.get_buckets();
+ map<string, RGWBucketEnt>::iterator iter;
+
+ vector<rgw_bucket> bucket_names;
+ for (iter = m.begin(); iter != m.end(); ++iter) {
+ RGWBucketEnt obj = iter->second;
+ bucket_names.push_back(obj.bucket);
+
+ marker = iter->first;
+ }
+
+ ret = store->set_buckets_enabled(bucket_names, !suspended);
+ if (ret < 0) {
+ set_err_msg(err_msg, "failed to modify bucket");
+ return ret;
+ }
+
+ } while (is_truncated);
+ }
+
+ if (op_state.mfa_ids_specified) {
+ user_info.mfa_ids = op_state.mfa_ids;
+ }
+ op_state.set_user_info(user_info);
+
+ // if we're supposed to modify keys, do so
+ if (op_state.has_key_op()) {
+ ret = keys.add(op_state, &subprocess_msg, true);
+ if (ret < 0) {
+ set_err_msg(err_msg, "unable to create or modify keys, " + subprocess_msg);
+ return ret;
+ }
+ }
+
+ ret = update(op_state, err_msg);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+int RGWUser::modify(RGWUserAdminOpState& op_state, std::string *err_msg)
+{
+ std::string subprocess_msg;
+ int ret;
+
+ ret = check_op(op_state, &subprocess_msg);
+ if (ret < 0) {
+ if (is_populated() && (user_id.compare(op_state.get_user_id()) != 0)) {
+ set_err_msg(err_msg, "unable to create user " + user_id.to_str()
+ + " because user id " + op_state.get_user_id().to_str()
+ + " already exists with email "
+ + op_state.get_user_email());
+ } else {
+ set_err_msg(err_msg, "unable to parse parameters, " + subprocess_msg);
+ }
+ return ret;
+ }
+
+ ret = execute_modify(op_state, &subprocess_msg);
+ if (ret < 0) {
+ set_err_msg(err_msg, "unable to modify user, " + subprocess_msg);
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWUser::info(RGWUserAdminOpState& op_state, RGWUserInfo& fetched_info, std::string *err_msg)
+{
+ int ret = init(op_state);
+ if (ret < 0) {
+ set_err_msg(err_msg, "unable to fetch user info");
+ return ret;
+ }
+
+ fetched_info = op_state.get_user_info();
+
+ return 0;
+}
+
+int RGWUser::info(RGWUserInfo& fetched_info, std::string *err_msg)
+{
+ if (!is_populated()) {
+ set_err_msg(err_msg, "no user info saved");
+ return -EINVAL;
+ }
+
+ fetched_info = old_info;
+
+ return 0;
+}
+
+int RGWUser::list(RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher)
+{
+ Formatter *formatter = flusher.get_formatter();
+ void *handle = nullptr;
+ std::string metadata_key = "user";
+ if (op_state.max_entries > 1000) {
+ op_state.max_entries = 1000;
+ }
+
+ int ret = store->meta_mgr->list_keys_init(metadata_key, op_state.marker, &handle);
+ if (ret < 0) {
+ return ret;
+ }
+
+ bool truncated = false;
+ uint64_t count = 0;
+ uint64_t left = 0;
+ flusher.start(0);
+
+ // open the result object section
+ formatter->open_object_section("result");
+
+ // open the user id list array section
+ formatter->open_array_section("keys");
+ do {
+ std::list<std::string> keys;
+ left = op_state.max_entries - count;
+ ret = store->meta_mgr->list_keys_next(handle, left, keys, &truncated);
+ if (ret < 0 && ret != -ENOENT) {
+ return ret;
+ } if (ret != -ENOENT) {
+ for (std::list<std::string>::iterator iter = keys.begin(); iter != keys.end(); ++iter) {
+ formatter->dump_string("key", *iter);
+ ++count;
+ }
+ }
+ } while (truncated && left > 0);
+ // close user id list section
+ formatter->close_section();
+
+ formatter->dump_bool("truncated", truncated);
+ formatter->dump_int("count", count);
+ if (truncated) {
+ formatter->dump_string("marker", store->meta_mgr->get_marker(handle));
+ }
+
+ // close result object section
+ formatter->close_section();
+
+ store->meta_mgr->list_keys_complete(handle);
+
+ flusher.flush();
+ return 0;
+}
+
+int RGWUserAdminOp_User::list(RGWRados *store, RGWUserAdminOpState& op_state,
+ RGWFormatterFlusher& flusher)
+{
+ RGWUser user;
+
+ int ret = user.init_storage(store);
+ if (ret < 0)
+ return ret;
+
+ ret = user.list(op_state, flusher);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+int RGWUserAdminOp_User::info(RGWRados *store, RGWUserAdminOpState& op_state,
+ RGWFormatterFlusher& flusher)
+{
+ RGWUserInfo info;
+ RGWUser user;
+
+ int ret = user.init(store, op_state);
+ if (ret < 0)
+ return ret;
+
+ if (!op_state.has_existing_user())
+ return -ERR_NO_SUCH_USER;
+
+ Formatter *formatter = flusher.get_formatter();
+
+ ret = user.info(info, NULL);
+ if (ret < 0)
+ return ret;
+
+ if (op_state.sync_stats) {
+ ret = rgw_user_sync_all_stats(store, info.user_id);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ RGWStorageStats stats;
+ RGWStorageStats *arg_stats = NULL;
+ if (op_state.fetch_stats) {
+ int ret = store->get_user_stats(info.user_id, stats);
+ if (ret < 0 && ret != -ENOENT) {
+ return ret;
+ }
+
+ arg_stats = &stats;
+ }
+
+ if (formatter) {
+ flusher.start(0);
+
+ dump_user_info(formatter, info, arg_stats);
+ flusher.flush();
+ }
+
+ return 0;
+}
+
+int RGWUserAdminOp_User::create(RGWRados *store, RGWUserAdminOpState& op_state,
+ RGWFormatterFlusher& flusher)
+{
+ RGWUserInfo info;
+ RGWUser user;
+ int ret = user.init(store, op_state);
+ if (ret < 0)
+ return ret;
+
+ Formatter *formatter = flusher.get_formatter();
+
+ ret = user.add(op_state, NULL);
+ if (ret < 0) {
+ if (ret == -EEXIST)
+ ret = -ERR_USER_EXIST;
+ return ret;
+ }
+
+ ret = user.info(info, NULL);
+ if (ret < 0)
+ return ret;
+
+ if (formatter) {
+ flusher.start(0);
+
+ dump_user_info(formatter, info);
+ flusher.flush();
+ }
+
+ return 0;
+}
+
+int RGWUserAdminOp_User::modify(RGWRados *store, RGWUserAdminOpState& op_state,
+ RGWFormatterFlusher& flusher)
+{
+ RGWUserInfo info;
+ RGWUser user;
+ int ret = user.init(store, op_state);
+ if (ret < 0)
+ return ret;
+ Formatter *formatter = flusher.get_formatter();
+
+ ret = user.modify(op_state, NULL);
+ if (ret < 0) {
+ if (ret == -ENOENT)
+ ret = -ERR_NO_SUCH_USER;
+ return ret;
+ }
+
+ ret = user.info(info, NULL);
+ if (ret < 0)
+ return ret;
+
+ if (formatter) {
+ flusher.start(0);
+
+ dump_user_info(formatter, info);
+ flusher.flush();
+ }
+
+ return 0;
+}
+
+int RGWUserAdminOp_User::remove(RGWRados *store, RGWUserAdminOpState& op_state,
+ RGWFormatterFlusher& flusher)
+{
+ RGWUserInfo info;
+ RGWUser user;
+ int ret = user.init(store, op_state);
+ if (ret < 0)
+ return ret;
+
+
+ ret = user.remove(op_state, NULL);
+
+ if (ret == -ENOENT)
+ ret = -ERR_NO_SUCH_USER;
+ return ret;
+}
+
+int RGWUserAdminOp_Subuser::create(RGWRados *store, RGWUserAdminOpState& op_state,
+ RGWFormatterFlusher& flusher)
+{
+ RGWUserInfo info;
+ RGWUser user;
+ int ret = user.init(store, op_state);
+ if (ret < 0)
+ return ret;
+
+ if (!op_state.has_existing_user())
+ return -ERR_NO_SUCH_USER;
+
+ Formatter *formatter = flusher.get_formatter();
+
+ ret = user.subusers.add(op_state, NULL);
+ if (ret < 0)
+ return ret;
+
+ ret = user.info(info, NULL);
+ if (ret < 0)
+ return ret;
+
+ if (formatter) {
+ flusher.start(0);
+
+ dump_subusers_info(formatter, info);
+ flusher.flush();
+ }
+
+ return 0;
+}
+
+int RGWUserAdminOp_Subuser::modify(RGWRados *store, RGWUserAdminOpState& op_state,
+ RGWFormatterFlusher& flusher)
+{
+ RGWUserInfo info;
+ RGWUser user;
+ int ret = user.init(store, op_state);
+ if (ret < 0)
+ return ret;
+
+ if (!op_state.has_existing_user())
+ return -ERR_NO_SUCH_USER;
+
+ Formatter *formatter = flusher.get_formatter();
+
+ ret = user.subusers.modify(op_state, NULL);
+ if (ret < 0)
+ return ret;
+
+ ret = user.info(info, NULL);
+ if (ret < 0)
+ return ret;
+
+ if (formatter) {
+ flusher.start(0);
+
+ dump_subusers_info(formatter, info);
+ flusher.flush();
+ }
+
+ return 0;
+}
+
+int RGWUserAdminOp_Subuser::remove(RGWRados *store, RGWUserAdminOpState& op_state,
+ RGWFormatterFlusher& flusher)
+{
+ RGWUserInfo info;
+ RGWUser user;
+ int ret = user.init(store, op_state);
+ if (ret < 0)
+ return ret;
+
+
+ if (!op_state.has_existing_user())
+ return -ERR_NO_SUCH_USER;
+
+ ret = user.subusers.remove(op_state, NULL);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+int RGWUserAdminOp_Key::create(RGWRados *store, RGWUserAdminOpState& op_state,
+ RGWFormatterFlusher& flusher)
+{
+ RGWUserInfo info;
+ RGWUser user;
+ int ret = user.init(store, op_state);
+ if (ret < 0)
+ return ret;
+
+ if (!op_state.has_existing_user())
+ return -ERR_NO_SUCH_USER;
+
+ Formatter *formatter = flusher.get_formatter();
+
+ ret = user.keys.add(op_state, NULL);
+ if (ret < 0)
+ return ret;
+
+ ret = user.info(info, NULL);
+ if (ret < 0)
+ return ret;
+
+ if (formatter) {
+ flusher.start(0);
+
+ int key_type = op_state.get_key_type();
+
+ if (key_type == KEY_TYPE_SWIFT)
+ dump_swift_keys_info(formatter, info);
+
+ else if (key_type == KEY_TYPE_S3)
+ dump_access_keys_info(formatter, info);
+
+ flusher.flush();
+ }
+
+ return 0;
+}
+
+int RGWUserAdminOp_Key::remove(RGWRados *store, RGWUserAdminOpState& op_state,
+ RGWFormatterFlusher& flusher)
+{
+ RGWUserInfo info;
+ RGWUser user;
+ int ret = user.init(store, op_state);
+ if (ret < 0)
+ return ret;
+
+ if (!op_state.has_existing_user())
+ return -ERR_NO_SUCH_USER;
+
+
+ ret = user.keys.remove(op_state, NULL);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+int RGWUserAdminOp_Caps::add(RGWRados *store, RGWUserAdminOpState& op_state,
+ RGWFormatterFlusher& flusher)
+{
+ RGWUserInfo info;
+ RGWUser user;
+ int ret = user.init(store, op_state);
+ if (ret < 0)
+ return ret;
+
+ if (!op_state.has_existing_user())
+ return -ERR_NO_SUCH_USER;
+
+ Formatter *formatter = flusher.get_formatter();
+
+ ret = user.caps.add(op_state, NULL);
+ if (ret < 0)
+ return ret;
+
+ ret = user.info(info, NULL);
+ if (ret < 0)
+ return ret;
+
+ if (formatter) {
+ flusher.start(0);
+
+ info.caps.dump(formatter);
+ flusher.flush();
+ }
+
+ return 0;
+}
+
+
+int RGWUserAdminOp_Caps::remove(RGWRados *store, RGWUserAdminOpState& op_state,
+ RGWFormatterFlusher& flusher)
+{
+ RGWUserInfo info;
+ RGWUser user;
+ int ret = user.init(store, op_state);
+ if (ret < 0)
+ return ret;
+
+ if (!op_state.has_existing_user())
+ return -ERR_NO_SUCH_USER;
+
+ Formatter *formatter = flusher.get_formatter();
+
+ ret = user.caps.remove(op_state, NULL);
+ if (ret < 0)
+ return ret;
+
+ ret = user.info(info, NULL);
+ if (ret < 0)
+ return ret;
+
+ if (formatter) {
+ flusher.start(0);
+
+ info.caps.dump(formatter);
+ flusher.flush();
+ }
+
+ return 0;
+}
+
+struct RGWUserCompleteInfo {
+ RGWUserInfo info;
+ map<string, bufferlist> attrs;
+ bool has_attrs;
+
+ RGWUserCompleteInfo()
+ : has_attrs(false)
+ {}
+
+ void dump(Formatter * const f) const {
+ info.dump(f);
+ encode_json("attrs", attrs, f);
+ }
+
+ void decode_json(JSONObj *obj) {
+ decode_json_obj(info, obj);
+ has_attrs = JSONDecoder::decode_json("attrs", attrs, obj);
+ }
+};
+
+class RGWUserMetadataObject : public RGWMetadataObject {
+ RGWUserCompleteInfo uci;
+public:
+ RGWUserMetadataObject(const RGWUserCompleteInfo& _uci, obj_version& v, real_time m)
+ : uci(_uci) {
+ objv = v;
+ mtime = m;
+ }
+
+ void dump(Formatter *f) const override {
+ uci.dump(f);
+ }
+};
+
+class RGWUserMetadataHandler : public RGWMetadataHandler {
+public:
+ string get_type() override { return "user"; }
+
+ int get(RGWRados *store, string& entry, RGWMetadataObject **obj) override {
+ RGWUserCompleteInfo uci;
+ RGWObjVersionTracker objv_tracker;
+ real_time mtime;
+
+ rgw_user uid(entry);
+
+ int ret = rgw_get_user_info_by_uid(store, uid, uci.info, &objv_tracker,
+ &mtime, NULL, &uci.attrs);
+ if (ret < 0) {
+ return ret;
+ }
+
+ RGWUserMetadataObject *mdo = new RGWUserMetadataObject(uci, objv_tracker.read_version, mtime);
+ *obj = mdo;
+
+ return 0;
+ }
+
+ int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker,
+ real_time mtime, JSONObj *obj, sync_type_t sync_mode) override {
+ RGWUserCompleteInfo uci;
+
+ try {
+ decode_json_obj(uci, obj);
+ } catch (JSONDecoder::err& e) {
+ return -EINVAL;
+ }
+
+ map<string, bufferlist> *pattrs = NULL;
+ if (uci.has_attrs) {
+ pattrs = &uci.attrs;
+ }
+
+ rgw_user uid(entry);
+
+ RGWUserInfo old_info;
+ real_time orig_mtime;
+ int ret = rgw_get_user_info_by_uid(store, uid, old_info, &objv_tracker, &orig_mtime);
+ if (ret < 0 && ret != -ENOENT)
+ return ret;
+
+ // are we actually going to perform this put, or is it too old?
+ if (ret != -ENOENT &&
+ !check_versions(objv_tracker.read_version, orig_mtime,
+ objv_tracker.write_version, mtime, sync_mode)) {
+ return STATUS_NO_APPLY;
+ }
+
+ ret = rgw_store_user_info(store, uci.info, &old_info, &objv_tracker, mtime, false, pattrs);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return STATUS_APPLIED;
+ }
+
+ struct list_keys_info {
+ RGWRados *store;
+ RGWListRawObjsCtx ctx;
+ };
+
+ int remove(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker) override {
+ RGWUserInfo info;
+
+ rgw_user uid(entry);
+
+ int ret = rgw_get_user_info_by_uid(store, uid, info, &objv_tracker);
+ if (ret < 0)
+ return ret;
+
+ return rgw_delete_user(store, info, objv_tracker);
+ }
+
+ void get_pool_and_oid(RGWRados *store, const string& key, rgw_pool& pool, string& oid) override {
+ oid = key;
+ pool = store->svc.zone->get_zone_params().user_uid_pool;
+ }
+
+ int list_keys_init(RGWRados *store, const string& marker, void **phandle) override
+ {
+ auto info = std::make_unique<list_keys_info>();
+
+ info->store = store;
+
+ int ret = store->list_raw_objects_init(store->svc.zone->get_zone_params().user_uid_pool, marker,
+ &info->ctx);
+ if (ret < 0) {
+ return ret;
+ }
+
+ *phandle = (void *)info.release();
+
+ return 0;
+ }
+
+ int list_keys_next(void *handle, int max, list<string>& keys, bool *truncated) override {
+ list_keys_info *info = static_cast<list_keys_info *>(handle);
+
+ string no_filter;
+
+ keys.clear();
+
+ RGWRados *store = info->store;
+
+ list<string> unfiltered_keys;
+
+ int ret = store->list_raw_objects_next(no_filter, max, info->ctx,
+ unfiltered_keys, truncated);
+ if (ret < 0 && ret != -ENOENT)
+ return ret;
+ if (ret == -ENOENT) {
+ if (truncated)
+ *truncated = false;
+ return 0;
+ }
+
+ // now filter out the buckets entries
+ list<string>::iterator iter;
+ for (iter = unfiltered_keys.begin(); iter != unfiltered_keys.end(); ++iter) {
+ string& k = *iter;
+
+ if (k.find(".buckets") == string::npos) {
+ keys.push_back(k);
+ }
+ }
+
+ return 0;
+ }
+
+ void list_keys_complete(void *handle) override {
+ list_keys_info *info = static_cast<list_keys_info *>(handle);
+ delete info;
+ }
+
+ string get_marker(void *handle) override {
+ list_keys_info *info = static_cast<list_keys_info *>(handle);
+ return info->store->list_raw_objs_get_cursor(info->ctx);
+ }
+};
+
+void rgw_user_init(RGWRados *store)
+{
+ uinfo_cache.init(store->svc.cache);
+
+ user_meta_handler = new RGWUserMetadataHandler;
+ store->meta_mgr->register_handler(user_meta_handler);
+}
diff --git a/src/rgw/rgw_user.h b/src/rgw/rgw_user.h
new file mode 100644
index 00000000..942648b5
--- /dev/null
+++ b/src/rgw/rgw_user.h
@@ -0,0 +1,774 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_USER_H
+#define CEPH_RGW_USER_H
+
+#include <string>
+#include <boost/algorithm/string.hpp>
+#include "include/ceph_assert.h"
+
+#include "include/types.h"
+#include "rgw_common.h"
+#include "rgw_tools.h"
+
+#include "rgw_rados.h"
+
+#include "rgw_string.h"
+
+#include "common/Formatter.h"
+#include "rgw_formats.h"
+
+#define RGW_USER_ANON_ID "anonymous"
+
+#define SECRET_KEY_LEN 40
+#define PUBLIC_ID_LEN 20
+#define RAND_SUBUSER_LEN 5
+
+#define XMLNS_AWS_S3 "http://s3.amazonaws.com/doc/2006-03-01/"
+
+/**
+ * A string wrapper that includes encode/decode functions
+ * for easily accessing a UID in all forms
+ */
+struct RGWUID
+{
+ rgw_user user_id;
+ void encode(bufferlist& bl) const {
+ string s;
+ user_id.to_str(s);
+ using ceph::encode;
+ encode(s, bl);
+ }
+ void decode(bufferlist::const_iterator& bl) {
+ string s;
+ using ceph::decode;
+ decode(s, bl);
+ user_id.from_str(s);
+ }
+};
+WRITE_CLASS_ENCODER(RGWUID)
+
+extern int rgw_user_sync_all_stats(RGWRados *store, const rgw_user& user_id);
+extern int rgw_user_get_all_buckets_stats(RGWRados *store, const rgw_user& user_id, map<string, cls_user_bucket_entry>&buckets_usage_map);
+
+/**
+ * Get the anonymous (ie, unauthenticated) user info.
+ */
+extern void rgw_get_anon_user(RGWUserInfo& info);
+
+/**
+ * Save the given user information to storage.
+ * Returns: 0 on success, -ERR# on failure.
+ */
+extern int rgw_store_user_info(RGWRados *store,
+ RGWUserInfo& info,
+ RGWUserInfo *old_info,
+ RGWObjVersionTracker *objv_tracker,
+ real_time mtime,
+ bool exclusive,
+ map<string, bufferlist> *pattrs = NULL);
+
+/**
+ * Given an user_id, finds the user info associated with it.
+ * returns: 0 on success, -ERR# on failure (including nonexistence)
+ */
+extern int rgw_get_user_info_by_uid(RGWRados *store,
+ const rgw_user& user_id,
+ RGWUserInfo& info,
+ RGWObjVersionTracker *objv_tracker = NULL,
+ real_time *pmtime = NULL,
+ rgw_cache_entry_info *cache_info = NULL,
+ map<string, bufferlist> *pattrs = NULL);
+/**
+ * Given an email, finds the user info associated with it.
+ * returns: 0 on success, -ERR# on failure (including nonexistence)
+ */
+extern int rgw_get_user_info_by_email(RGWRados *store, string& email, RGWUserInfo& info,
+ RGWObjVersionTracker *objv_tracker = NULL, real_time *pmtime = NULL);
+/**
+ * Given an swift username, finds the user info associated with it.
+ * returns: 0 on success, -ERR# on failure (including nonexistence)
+ */
+extern int rgw_get_user_info_by_swift(RGWRados *store,
+ const string& swift_name,
+ RGWUserInfo& info, /* out */
+ RGWObjVersionTracker *objv_tracker = nullptr,
+ real_time *pmtime = nullptr);
+/**
+ * Given an access key, finds the user info associated with it.
+ * returns: 0 on success, -ERR# on failure (including nonexistence)
+ */
+extern int rgw_get_user_info_by_access_key(RGWRados* store,
+ const std::string& access_key,
+ RGWUserInfo& info,
+ RGWObjVersionTracker* objv_tracker = nullptr,
+ real_time* pmtime = nullptr);
+/**
+ * Get all the custom metadata stored for user specified in @user_id
+ * and put it into @attrs.
+ * Returns: 0 on success, -ERR# on failure.
+ */
+extern int rgw_get_user_attrs_by_uid(RGWRados *store,
+ const rgw_user& user_id,
+ map<string, bufferlist>& attrs,
+ RGWObjVersionTracker *objv_tracker = NULL);
+/**
+ * Given an RGWUserInfo, deletes the user and its bucket ACLs.
+ */
+extern int rgw_delete_user(RGWRados *store, RGWUserInfo& user, RGWObjVersionTracker& objv_tracker);
+
+/*
+ * remove the different indexes
+ */
+extern int rgw_remove_key_index(RGWRados *store, RGWAccessKey& access_key);
+extern int rgw_remove_uid_index(RGWRados *store, rgw_user& uid);
+extern int rgw_remove_email_index(RGWRados *store, string& email);
+extern int rgw_remove_swift_name_index(RGWRados *store, string& swift_name);
+
+extern void rgw_perm_to_str(uint32_t mask, char *buf, int len);
+extern uint32_t rgw_str_to_perm(const char *str);
+
+extern int rgw_validate_tenant_name(const string& t);
+
+enum ObjectKeyType {
+ KEY_TYPE_SWIFT,
+ KEY_TYPE_S3,
+ KEY_TYPE_UNDEFINED
+};
+
+enum RGWKeyPoolOp {
+ GENERATE_KEY,
+ MODIFY_KEY
+};
+
+enum RGWUserId {
+ RGW_USER_ID,
+ RGW_SWIFT_USERNAME,
+ RGW_USER_EMAIL,
+ RGW_ACCESS_KEY,
+};
+
+/*
+ * An RGWUser class along with supporting classes created
+ * to support the creation of an RESTful administrative API
+ */
+struct RGWUserAdminOpState {
+ // user attributes
+ RGWUserInfo info;
+ rgw_user user_id;
+ std::string user_email;
+ std::string display_name;
+ int32_t max_buckets;
+ __u8 suspended;
+ __u8 admin;
+ __u8 system;
+ __u8 exclusive;
+ __u8 fetch_stats;
+ __u8 sync_stats;
+ std::string caps;
+ RGWObjVersionTracker objv;
+ uint32_t op_mask;
+ map<int, string> temp_url_keys;
+
+ // subuser attributes
+ std::string subuser;
+ uint32_t perm_mask;
+
+ // key_attributes
+ std::string id; // access key
+ std::string key; // secret key
+ int32_t key_type;
+
+ std::set<string> mfa_ids;
+
+ // operation attributes
+ bool existing_user;
+ bool existing_key;
+ bool existing_subuser;
+ bool existing_email;
+ bool subuser_specified;
+ bool gen_secret;
+ bool gen_access;
+ bool gen_subuser;
+ bool id_specified;
+ bool key_specified;
+ bool type_specified;
+ bool key_type_setbycontext; // key type set by user or subuser context
+ bool purge_data;
+ bool purge_keys;
+ bool display_name_specified;
+ bool user_email_specified;
+ bool max_buckets_specified;
+ bool perm_specified;
+ bool op_mask_specified;
+ bool caps_specified;
+ bool suspension_op;
+ bool admin_specified = false;
+ bool system_specified;
+ bool key_op;
+ bool temp_url_key_specified;
+ bool found_by_uid;
+ bool found_by_email;
+ bool found_by_key;
+ bool mfa_ids_specified;
+
+ // req parameters
+ bool populated;
+ bool initialized;
+ bool key_params_checked;
+ bool subuser_params_checked;
+ bool user_params_checked;
+
+ bool bucket_quota_specified;
+ bool user_quota_specified;
+
+ RGWQuotaInfo bucket_quota;
+ RGWQuotaInfo user_quota;
+
+ // req parameters for listing user
+ std::string marker;
+ uint32_t max_entries;
+
+ void set_access_key(const std::string& access_key) {
+ if (access_key.empty())
+ return;
+
+ id = access_key;
+ id_specified = true;
+ gen_access = false;
+ key_op = true;
+ }
+
+ void set_secret_key(const std::string& secret_key) {
+ if (secret_key.empty())
+ return;
+
+ key = secret_key;
+ key_specified = true;
+ gen_secret = false;
+ key_op = true;
+ }
+
+ void set_user_id(rgw_user& id) {
+ if (id.empty())
+ return;
+
+ user_id = id;
+ }
+
+ void set_user_email(std::string& email) {
+ /* always lowercase email address */
+ boost::algorithm::to_lower(email);
+ user_email = email;
+ user_email_specified = true;
+ }
+
+ void set_display_name(const std::string& name) {
+ if (name.empty())
+ return;
+
+ display_name = name;
+ display_name_specified = true;
+ }
+
+ void set_subuser(std::string& _subuser) {
+ if (_subuser.empty())
+ return;
+
+ size_t pos = _subuser.find(":");
+ if (pos != string::npos) {
+ rgw_user tmp_id;
+ tmp_id.from_str(_subuser.substr(0, pos));
+ if (tmp_id.tenant.empty()) {
+ user_id.id = tmp_id.id;
+ } else {
+ user_id = tmp_id;
+ }
+ subuser = _subuser.substr(pos+1);
+ } else {
+ subuser = _subuser;
+ }
+
+ subuser_specified = true;
+ }
+
+ void set_caps(const std::string& _caps) {
+ if (_caps.empty())
+ return;
+
+ caps = _caps;
+ caps_specified = true;
+ }
+
+ void set_perm(uint32_t perm) {
+ perm_mask = perm;
+ perm_specified = true;
+ }
+
+ void set_op_mask(uint32_t mask) {
+ op_mask = mask;
+ op_mask_specified = true;
+ }
+
+ void set_temp_url_key(const string& key, int index) {
+ temp_url_keys[index] = key;
+ temp_url_key_specified = true;
+ }
+
+ void set_key_type(int32_t type) {
+ key_type = type;
+ type_specified = true;
+ }
+
+ void set_suspension(__u8 is_suspended) {
+ suspended = is_suspended;
+ suspension_op = true;
+ }
+
+ void set_admin(__u8 is_admin) {
+ admin = is_admin;
+ admin_specified = true;
+ }
+
+ void set_system(__u8 is_system) {
+ system = is_system;
+ system_specified = true;
+ }
+
+ void set_exclusive(__u8 is_exclusive) {
+ exclusive = is_exclusive;
+ }
+
+ void set_fetch_stats(__u8 is_fetch_stats) {
+ fetch_stats = is_fetch_stats;
+ }
+
+ void set_sync_stats(__u8 is_sync_stats) {
+ sync_stats = is_sync_stats;
+ }
+
+ void set_user_info(RGWUserInfo& user_info) {
+ user_id = user_info.user_id;
+ info = user_info;
+ }
+
+ void set_max_buckets(int32_t mb) {
+ max_buckets = mb;
+ max_buckets_specified = true;
+ }
+
+ void set_gen_access() {
+ gen_access = true;
+ key_op = true;
+ }
+
+ void set_gen_secret() {
+ gen_secret = true;
+ key_op = true;
+ }
+
+ void set_generate_key() {
+ if (id.empty())
+ gen_access = true;
+ if (key.empty())
+ gen_secret = true;
+ key_op = true;
+ }
+
+ void clear_generate_key() {
+ gen_access = false;
+ gen_secret = false;
+ }
+
+ void set_purge_keys() {
+ purge_keys = true;
+ key_op = true;
+ }
+
+ void set_bucket_quota(RGWQuotaInfo& quota) {
+ bucket_quota = quota;
+ bucket_quota_specified = true;
+ }
+
+ void set_user_quota(RGWQuotaInfo& quota) {
+ user_quota = quota;
+ user_quota_specified = true;
+ }
+
+ void set_mfa_ids(const std::set<string>& ids) {
+ mfa_ids = ids;
+ mfa_ids_specified = true;
+ }
+
+ bool is_populated() { return populated; }
+ bool is_initialized() { return initialized; }
+ bool has_existing_user() { return existing_user; }
+ bool has_existing_key() { return existing_key; }
+ bool has_existing_subuser() { return existing_subuser; }
+ bool has_existing_email() { return existing_email; }
+ bool has_subuser() { return subuser_specified; }
+ bool has_key_op() { return key_op; }
+ bool has_caps_op() { return caps_specified; }
+ bool has_suspension_op() { return suspension_op; }
+ bool has_subuser_perm() { return perm_specified; }
+ bool has_op_mask() { return op_mask_specified; }
+ bool will_gen_access() { return gen_access; }
+ bool will_gen_secret() { return gen_secret; }
+ bool will_gen_subuser() { return gen_subuser; }
+ bool will_purge_keys() { return purge_keys; }
+ bool will_purge_data() { return purge_data; }
+ bool will_generate_subuser() { return gen_subuser; }
+ bool has_bucket_quota() { return bucket_quota_specified; }
+ bool has_user_quota() { return user_quota_specified; }
+ void set_populated() { populated = true; }
+ void clear_populated() { populated = false; }
+ void set_initialized() { initialized = true; }
+ void set_existing_user(bool flag) { existing_user = flag; }
+ void set_existing_key(bool flag) { existing_key = flag; }
+ void set_existing_subuser(bool flag) { existing_subuser = flag; }
+ void set_existing_email(bool flag) { existing_email = flag; }
+ void set_purge_data(bool flag) { purge_data = flag; }
+ void set_generate_subuser(bool flag) { gen_subuser = flag; }
+ __u8 get_suspension_status() { return suspended; }
+ int32_t get_key_type() {return key_type; }
+ uint32_t get_subuser_perm() { return perm_mask; }
+ int32_t get_max_buckets() { return max_buckets; }
+ uint32_t get_op_mask() { return op_mask; }
+ RGWQuotaInfo& get_bucket_quota() { return bucket_quota; }
+ RGWQuotaInfo& get_user_quota() { return user_quota; }
+ set<string>& get_mfa_ids() { return mfa_ids; }
+
+ rgw_user& get_user_id() { return user_id; }
+ std::string get_subuser() { return subuser; }
+ std::string get_access_key() { return id; }
+ std::string get_secret_key() { return key; }
+ std::string get_caps() { return caps; }
+ std::string get_user_email() { return user_email; }
+ std::string get_display_name() { return display_name; }
+ map<int, std::string>& get_temp_url_keys() { return temp_url_keys; }
+
+ RGWUserInfo& get_user_info() { return info; }
+
+ map<std::string, RGWAccessKey> *get_swift_keys() { return &info.swift_keys; }
+ map<std::string, RGWAccessKey> *get_access_keys() { return &info.access_keys; }
+ map<std::string, RGWSubUser> *get_subusers() { return &info.subusers; }
+
+ RGWUserCaps *get_caps_obj() { return &info.caps; }
+
+ std::string build_default_swift_kid() {
+ if (user_id.empty() || subuser.empty())
+ return "";
+
+ std::string kid;
+ user_id.to_str(kid);
+ kid.append(":");
+ kid.append(subuser);
+
+ return kid;
+ }
+
+ std::string generate_subuser() {
+ if (user_id.empty())
+ return "";
+
+ std::string generated_subuser;
+ user_id.to_str(generated_subuser);
+ std::string rand_suffix;
+
+ int sub_buf_size = RAND_SUBUSER_LEN + 1;
+ char sub_buf[RAND_SUBUSER_LEN + 1];
+
+ gen_rand_alphanumeric_upper(g_ceph_context, sub_buf, sub_buf_size);
+
+ rand_suffix = sub_buf;
+ if (rand_suffix.empty())
+ return "";
+
+ generated_subuser.append(rand_suffix);
+ subuser = generated_subuser;
+
+ return generated_subuser;
+ }
+
+ RGWUserAdminOpState() : user_id(RGW_USER_ANON_ID)
+ {
+ max_buckets = RGW_DEFAULT_MAX_BUCKETS;
+ key_type = -1;
+ perm_mask = RGW_PERM_NONE;
+ suspended = 0;
+ admin = 0;
+ system = 0;
+ exclusive = 0;
+ fetch_stats = 0;
+ op_mask = 0;
+
+ existing_user = false;
+ existing_key = false;
+ existing_subuser = false;
+ existing_email = false;
+ subuser_specified = false;
+ caps_specified = false;
+ purge_keys = false;
+ gen_secret = false;
+ gen_access = false;
+ gen_subuser = false;
+ id_specified = false;
+ key_specified = false;
+ type_specified = false;
+ key_type_setbycontext = false;
+ purge_data = false;
+ display_name_specified = false;
+ user_email_specified = false;
+ max_buckets_specified = false;
+ perm_specified = false;
+ op_mask_specified = false;
+ suspension_op = false;
+ system_specified = false;
+ key_op = false;
+ populated = false;
+ initialized = false;
+ key_params_checked = false;
+ subuser_params_checked = false;
+ user_params_checked = false;
+ bucket_quota_specified = false;
+ temp_url_key_specified = false;
+ user_quota_specified = false;
+ found_by_uid = false;
+ found_by_email = false;
+ found_by_key = false;
+ mfa_ids_specified = false;
+ max_entries = 1000;
+ marker = "";
+ }
+};
+
+class RGWUser;
+
+class RGWAccessKeyPool
+{
+ RGWUser *user;
+
+ std::map<std::string, int, ltstr_nocase> key_type_map;
+ rgw_user user_id;
+ RGWRados *store;
+
+ map<std::string, RGWAccessKey> *swift_keys;
+ map<std::string, RGWAccessKey> *access_keys;
+
+ // we don't want to allow keys for the anonymous user or a null user
+ bool keys_allowed;
+
+private:
+ int create_key(RGWUserAdminOpState& op_state, std::string *err_msg = NULL);
+ int generate_key(RGWUserAdminOpState& op_state, std::string *err_msg = NULL);
+ int modify_key(RGWUserAdminOpState& op_state, std::string *err_msg = NULL);
+
+ int check_key_owner(RGWUserAdminOpState& op_state);
+ bool check_existing_key(RGWUserAdminOpState& op_state);
+ int check_op(RGWUserAdminOpState& op_state, std::string *err_msg = NULL);
+
+ /* API Contract Fulfilment */
+ int execute_add(RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_save);
+ int execute_remove(RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_save);
+ int remove_subuser_keys(RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_save);
+
+ int add(RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_save);
+ int remove(RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_save);
+public:
+ explicit RGWAccessKeyPool(RGWUser* usr);
+ ~RGWAccessKeyPool();
+
+ int init(RGWUserAdminOpState& op_state);
+
+ /* API Contracted Methods */
+ int add(RGWUserAdminOpState& op_state, std::string *err_msg = NULL);
+ int remove(RGWUserAdminOpState& op_state, std::string *err_msg = NULL);
+
+ friend class RGWUser;
+ friend class RGWSubUserPool;
+};
+
+class RGWSubUserPool
+{
+ RGWUser *user;
+
+ rgw_user user_id;
+ RGWRados *store;
+ bool subusers_allowed;
+
+ map<string, RGWSubUser> *subuser_map;
+
+private:
+ int check_op(RGWUserAdminOpState& op_state, std::string *err_msg = NULL);
+
+ /* API Contract Fulfillment */
+ int execute_add(RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_save);
+ int execute_remove(RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_save);
+ int execute_modify(RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_save);
+
+ int add(RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_save);
+ int remove(RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_save);
+ int modify(RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_save);
+public:
+ explicit RGWSubUserPool(RGWUser *user);
+ ~RGWSubUserPool();
+
+ bool exists(std::string subuser);
+ int init(RGWUserAdminOpState& op_state);
+
+ /* API contracted methods */
+ int add(RGWUserAdminOpState& op_state, std::string *err_msg = NULL);
+ int remove(RGWUserAdminOpState& op_state, std::string *err_msg = NULL);
+ int modify(RGWUserAdminOpState& op_state, std::string *err_msg = NULL);
+
+ friend class RGWUser;
+};
+
+class RGWUserCapPool
+{
+ RGWUserCaps *caps;
+ bool caps_allowed;
+ RGWUser *user;
+
+private:
+ int add(RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_save);
+ int remove(RGWUserAdminOpState& op_state, std::string *err_msg, bool defer_save);
+
+public:
+ explicit RGWUserCapPool(RGWUser *user);
+ ~RGWUserCapPool();
+
+ int init(RGWUserAdminOpState& op_state);
+
+ /* API contracted methods */
+ int add(RGWUserAdminOpState& op_state, std::string *err_msg = NULL);
+ int remove(RGWUserAdminOpState& op_state, std::string *err_msg = NULL);
+
+ friend class RGWUser;
+};
+
+class RGWUser
+{
+
+private:
+ RGWUserInfo old_info;
+ RGWRados *store;
+
+ rgw_user user_id;
+ bool info_stored;
+
+ void set_populated() { info_stored = true; }
+ void clear_populated() { info_stored = false; }
+ bool is_populated() { return info_stored; }
+
+ int check_op(RGWUserAdminOpState& req, std::string *err_msg);
+ int update(RGWUserAdminOpState& op_state, std::string *err_msg);
+
+ void clear_members();
+ void init_default();
+
+ /* API Contract Fulfillment */
+ int execute_add(RGWUserAdminOpState& op_state, std::string *err_msg);
+ int execute_remove(RGWUserAdminOpState& op_state, std::string *err_msg);
+ int execute_modify(RGWUserAdminOpState& op_state, std::string *err_msg);
+
+public:
+ RGWUser();
+ ~RGWUser();
+
+ int init(RGWRados *storage, RGWUserAdminOpState& op_state);
+
+ int init_storage(RGWRados *storage);
+ int init(RGWUserAdminOpState& op_state);
+ int init_members(RGWUserAdminOpState& op_state);
+
+ RGWRados *get_store() { return store; }
+
+ /* API Contracted Members */
+ RGWUserCapPool caps;
+ RGWAccessKeyPool keys;
+ RGWSubUserPool subusers;
+
+ /* API Contracted Methods */
+ int add(RGWUserAdminOpState& op_state, std::string *err_msg = NULL);
+ int remove(RGWUserAdminOpState& op_state, std::string *err_msg = NULL);
+
+ /* remove an already populated RGWUser */
+ int remove(std::string *err_msg = NULL);
+
+ int modify(RGWUserAdminOpState& op_state, std::string *err_msg = NULL);
+
+ /* retrieve info from an existing user in the RGW system */
+ int info(RGWUserAdminOpState& op_state, RGWUserInfo& fetched_info, std::string *err_msg = NULL);
+
+ /* info from an already populated RGWUser */
+ int info (RGWUserInfo& fetched_info, std::string *err_msg = NULL);
+
+ /* list the existing users */
+ int list(RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher);
+
+ friend class RGWAccessKeyPool;
+ friend class RGWSubUserPool;
+ friend class RGWUserCapPool;
+};
+
+/* Wrappers for admin API functionality */
+
+class RGWUserAdminOp_User
+{
+public:
+ static int list(RGWRados *store,
+ RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher);
+
+ static int info(RGWRados *store,
+ RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher);
+
+ static int create(RGWRados *store,
+ RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher);
+
+ static int modify(RGWRados *store,
+ RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher);
+
+ static int remove(RGWRados *store,
+ RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher);
+};
+
+class RGWUserAdminOp_Subuser
+{
+public:
+ static int create(RGWRados *store,
+ RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher);
+
+ static int modify(RGWRados *store,
+ RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher);
+
+ static int remove(RGWRados *store,
+ RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher);
+};
+
+class RGWUserAdminOp_Key
+{
+public:
+ static int create(RGWRados *store,
+ RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher);
+
+ static int remove(RGWRados *store,
+ RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher);
+};
+
+class RGWUserAdminOp_Caps
+{
+public:
+ static int add(RGWRados *store,
+ RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher);
+
+ static int remove(RGWRados *store,
+ RGWUserAdminOpState& op_state, RGWFormatterFlusher& flusher);
+};
+
+class RGWMetadataManager;
+
+extern void rgw_user_init(RGWRados *store);
+
+#endif
diff --git a/src/rgw/rgw_web_idp.h b/src/rgw/rgw_web_idp.h
new file mode 100644
index 00000000..b357338c
--- /dev/null
+++ b/src/rgw/rgw_web_idp.h
@@ -0,0 +1,29 @@
+#ifndef CEPH_RGW_WEB_IDP_H
+#define CEPH_RGW_WEB_IDP_H
+
+#include <utility>
+#include <boost/optional.hpp>
+#include <boost/utility/string_view.hpp>
+
+#include "rgw_auth.h"
+#include "rgw_common.h"
+
+namespace rgw {
+namespace web_idp {
+
+//WebToken contains some claims from the decoded token which are of interest to us.
+struct WebTokenClaims {
+ //Subject of the token
+ string sub;
+ //Intended audience for this token
+ string aud;
+ //Issuer of this token
+ string iss;
+ //Human-readable id for the resource owner
+ string user_name;
+};
+
+}; /* namespace web_idp */
+}; /* namespace rgw */
+
+#endif /* CEPH_RGW_WEB_IDP_H */
diff --git a/src/rgw/rgw_website.cc b/src/rgw/rgw_website.cc
new file mode 100644
index 00000000..13a3b1de
--- /dev/null
+++ b/src/rgw/rgw_website.cc
@@ -0,0 +1,127 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2015 Yehuda Sadeh <yehuda@redhat.com>
+ * Copyright (C) 2015 Robin H. Johnson <robin.johnson@dreamhost.com>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "common/debug.h"
+#include "common/ceph_json.h"
+
+#include "acconfig.h"
+
+#include <errno.h>
+#include <string>
+#include <list>
+#include "include/types.h"
+#include "rgw_website.h"
+
+
+
+bool RGWBWRoutingRuleCondition::check_key_condition(const string& key) {
+ return (key.size() >= key_prefix_equals.size() &&
+ key.compare(0, key_prefix_equals.size(), key_prefix_equals) == 0);
+}
+
+
+void RGWBWRoutingRule::apply_rule(const string& default_protocol, const string& default_hostname,
+ const string& key, string *new_url, int *redirect_code)
+{
+ RGWRedirectInfo& redirect = redirect_info.redirect;
+
+ string protocol = (!redirect.protocol.empty() ? redirect.protocol : default_protocol);
+ string hostname = (!redirect.hostname.empty() ? redirect.hostname : default_hostname);
+
+ *new_url = protocol + "://" + hostname + "/";
+
+ if (!redirect_info.replace_key_prefix_with.empty()) {
+ *new_url += redirect_info.replace_key_prefix_with;
+ *new_url += key.substr(condition.key_prefix_equals.size());
+ } else if (!redirect_info.replace_key_with.empty()) {
+ *new_url += redirect_info.replace_key_with;
+ } else {
+ *new_url += key;
+ }
+
+ if(redirect.http_redirect_code > 0)
+ *redirect_code = redirect.http_redirect_code;
+}
+
+bool RGWBWRoutingRules::check_key_and_error_code_condition(const string &key, int error_code, RGWBWRoutingRule **rule)
+{
+ for (list<RGWBWRoutingRule>::iterator iter = rules.begin(); iter != rules.end(); ++iter) {
+ if (iter->check_key_condition(key) && iter->check_error_code_condition(error_code)) {
+ *rule = &(*iter);
+ return true;
+ }
+ }
+ return false;
+}
+
+bool RGWBWRoutingRules::check_key_condition(const string& key, RGWBWRoutingRule **rule)
+{
+ for (list<RGWBWRoutingRule>::iterator iter = rules.begin(); iter != rules.end(); ++iter) {
+ if (iter->check_key_condition(key)) {
+ *rule = &(*iter);
+ return true;
+ }
+ }
+ return false;
+}
+
+bool RGWBWRoutingRules::check_error_code_condition(const int http_error_code, RGWBWRoutingRule **rule)
+{
+ for (list<RGWBWRoutingRule>::iterator iter = rules.begin(); iter != rules.end(); ++iter) {
+ if (iter->check_error_code_condition(http_error_code)) {
+ *rule = &(*iter);
+ return true;
+ }
+ }
+ return false;
+}
+
+bool RGWBucketWebsiteConf::should_redirect(const string& key, const int http_error_code, RGWBWRoutingRule *redirect)
+{
+ RGWBWRoutingRule *rule;
+ if(!redirect_all.hostname.empty()) {
+ RGWBWRoutingRule redirect_all_rule;
+ redirect_all_rule.redirect_info.redirect = redirect_all;
+ redirect_all.http_redirect_code = 301;
+ *redirect = redirect_all_rule;
+ return true;
+ } else if (!routing_rules.check_key_and_error_code_condition(key, http_error_code, &rule)) {
+ return false;
+ }
+
+ *redirect = *rule;
+
+ return true;
+}
+
+bool RGWBucketWebsiteConf::get_effective_key(const string& key, string *effective_key, bool is_file) const
+{
+ if (index_doc_suffix.empty()) {
+ return false;
+ }
+
+ if (key.empty()) {
+ *effective_key = index_doc_suffix;
+ } else if (key[key.size() - 1] == '/') {
+ *effective_key = key + index_doc_suffix;
+ } else if (! is_file) {
+ *effective_key = key + "/" + index_doc_suffix;
+ } else {
+ *effective_key = key;
+ }
+
+ return true;
+}
diff --git a/src/rgw/rgw_website.h b/src/rgw/rgw_website.h
new file mode 100644
index 00000000..8366f39c
--- /dev/null
+++ b/src/rgw/rgw_website.h
@@ -0,0 +1,246 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2015 Yehuda Sadeh <yehuda@redhat.com>
+ * Copyright (C) 2015 Robin H. Johnson <robin.johnson@dreamhost.com>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef RGW_WEBSITE_H
+#define RGW_WEBSITE_H
+
+#include <list>
+#include <string>
+
+#include "common/ceph_json.h"
+
+#include "rgw_xml.h"
+
+struct RGWRedirectInfo
+{
+ std::string protocol;
+ std::string hostname;
+ uint16_t http_redirect_code = 0;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(protocol, bl);
+ encode(hostname, bl);
+ encode(http_redirect_code, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(protocol, bl);
+ decode(hostname, bl);
+ decode(http_redirect_code, bl);
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+};
+WRITE_CLASS_ENCODER(RGWRedirectInfo)
+
+
+struct RGWBWRedirectInfo
+{
+ RGWRedirectInfo redirect;
+ std::string replace_key_prefix_with;
+ std::string replace_key_with;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(redirect, bl);
+ encode(replace_key_prefix_with, bl);
+ encode(replace_key_with, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(redirect, bl);
+ decode(replace_key_prefix_with, bl);
+ decode(replace_key_with, bl);
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const;
+ void dump_xml(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+ void decode_xml(XMLObj *obj);
+};
+WRITE_CLASS_ENCODER(RGWBWRedirectInfo)
+
+struct RGWBWRoutingRuleCondition
+{
+ std::string key_prefix_equals;
+ uint16_t http_error_code_returned_equals = 0;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(key_prefix_equals, bl);
+ encode(http_error_code_returned_equals, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(key_prefix_equals, bl);
+ decode(http_error_code_returned_equals, bl);
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const;
+ void dump_xml(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+ void decode_xml(XMLObj *obj);
+
+ bool check_key_condition(const std::string& key);
+ bool check_error_code_condition(const int error_code) {
+ return (uint16_t)error_code == http_error_code_returned_equals;
+ }
+};
+WRITE_CLASS_ENCODER(RGWBWRoutingRuleCondition)
+
+struct RGWBWRoutingRule
+{
+ RGWBWRoutingRuleCondition condition;
+ RGWBWRedirectInfo redirect_info;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(condition, bl);
+ encode(redirect_info, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(condition, bl);
+ decode(redirect_info, bl);
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const;
+ void dump_xml(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+ void decode_xml(XMLObj *obj);
+
+ bool check_key_condition(const std::string& key) {
+ return condition.check_key_condition(key);
+ }
+ bool check_error_code_condition(int error_code) {
+ return condition.check_error_code_condition(error_code);
+ }
+
+ void apply_rule(const std::string& default_protocol,
+ const std::string& default_hostname,
+ const std::string& key,
+ std::string *redirect,
+ int *redirect_code);
+};
+WRITE_CLASS_ENCODER(RGWBWRoutingRule)
+
+struct RGWBWRoutingRules
+{
+ std::list<RGWBWRoutingRule> rules;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(rules, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(rules, bl);
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const;
+ void dump_xml(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+
+ bool check_key_condition(const std::string& key, RGWBWRoutingRule **rule);
+ bool check_error_code_condition(int error_code, RGWBWRoutingRule **rule);
+ bool check_key_and_error_code_condition(const std::string& key,
+ const int error_code,
+ RGWBWRoutingRule **rule);
+};
+WRITE_CLASS_ENCODER(RGWBWRoutingRules)
+
+struct RGWBucketWebsiteConf
+{
+ RGWRedirectInfo redirect_all;
+ std::string index_doc_suffix;
+ std::string error_doc;
+ std::string subdir_marker;
+ std::string listing_css_doc;
+ bool listing_enabled;
+ bool is_redirect_all;
+ bool is_set_index_doc;
+ RGWBWRoutingRules routing_rules;
+
+ RGWBucketWebsiteConf()
+ : listing_enabled(false) {
+ is_redirect_all = false;
+ is_set_index_doc = false;
+ }
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(2, 1, bl);
+ encode(index_doc_suffix, bl);
+ encode(error_doc, bl);
+ encode(routing_rules, bl);
+ encode(redirect_all, bl);
+ encode(subdir_marker, bl);
+ encode(listing_css_doc, bl);
+ encode(listing_enabled, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(2, bl);
+ decode(index_doc_suffix, bl);
+ decode(error_doc, bl);
+ decode(routing_rules, bl);
+ decode(redirect_all, bl);
+ if (struct_v >= 2) {
+ decode(subdir_marker, bl);
+ decode(listing_css_doc, bl);
+ decode(listing_enabled, bl);
+ }
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+ void decode_xml(XMLObj *obj);
+ void dump_xml(Formatter *f) const;
+
+ bool should_redirect(const std::string& key,
+ const int http_error_code,
+ RGWBWRoutingRule *redirect);
+
+ bool get_effective_key(const std::string& key,
+ std::string *effective_key, bool is_file) const;
+
+ const std::string& get_index_doc() const {
+ return index_doc_suffix;
+ }
+
+ bool is_empty() const {
+ return index_doc_suffix.empty() &&
+ error_doc.empty() &&
+ subdir_marker.empty() &&
+ listing_css_doc.empty() &&
+ ! listing_enabled;
+ }
+};
+WRITE_CLASS_ENCODER(RGWBucketWebsiteConf)
+
+#endif
diff --git a/src/rgw/rgw_xml.cc b/src/rgw/rgw_xml.cc
new file mode 100755
index 00000000..4ecd9d66
--- /dev/null
+++ b/src/rgw/rgw_xml.cc
@@ -0,0 +1,500 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <string.h>
+
+#include <iostream>
+#include <map>
+
+#include <expat.h>
+
+#include "include/types.h"
+#include "include/utime.h"
+
+#include "rgw_xml.h"
+
+XMLObjIter::
+XMLObjIter()
+{
+}
+
+XMLObjIter::
+~XMLObjIter()
+{
+}
+
+void XMLObjIter::
+set(const XMLObjIter::map_iter_t &_cur, const XMLObjIter::map_iter_t &_end)
+{
+ cur = _cur;
+ end = _end;
+}
+
+XMLObj *XMLObjIter::
+get_next()
+{
+ XMLObj *obj = NULL;
+ if (cur != end) {
+ obj = cur->second;
+ ++cur;
+ }
+ return obj;
+}
+
+bool XMLObjIter::get_name(std::string& name) const
+{
+ if (cur == end) {
+ return false;
+ }
+
+ name = cur->first;
+ return true;
+}
+
+ostream& operator<<(ostream &out, const XMLObj &obj) {
+ out << obj.obj_type << ": " << obj.data;
+ return out;
+}
+
+XMLObj::
+~XMLObj()
+{
+}
+
+bool XMLObj::
+xml_start(XMLObj *parent, const char *el, const char **attr)
+{
+ this->parent = parent;
+ obj_type = el;
+ for (int i = 0; attr[i]; i += 2) {
+ attr_map[attr[i]] = std::string(attr[i + 1]);
+ }
+ return true;
+}
+
+bool XMLObj::
+xml_end(const char *el)
+{
+ return true;
+}
+
+void XMLObj::
+xml_handle_data(const char *s, int len)
+{
+ data.append(s, len);
+}
+
+const std::string& XMLObj::
+XMLObj::get_data() const
+{
+ return data;
+}
+
+const std::string& XMLObj::
+XMLObj::get_obj_type() const
+{
+ return obj_type;
+}
+
+XMLObj *XMLObj::
+XMLObj::get_parent()
+{
+ return parent;
+}
+
+void XMLObj::
+add_child(const std::string& el, XMLObj *obj)
+{
+ children.insert(std::pair<std::string, XMLObj *>(el, obj));
+}
+
+bool XMLObj::
+get_attr(const std::string& name, std::string& attr) const
+{
+ const std::map<std::string, std::string>::const_iterator iter = attr_map.find(name);
+ if (iter == attr_map.end())
+ return false;
+ attr = iter->second;
+ return true;
+}
+
+XMLObjIter XMLObj::
+find(const std::string& name)
+{
+ XMLObjIter iter;
+ const XMLObjIter::const_map_iter_t first = children.find(name);
+ XMLObjIter::const_map_iter_t last;
+ if (first != children.end()) {
+ last = children.upper_bound(name);
+ }else
+ last = children.end();
+ iter.set(first, last);
+ return iter;
+}
+
+XMLObjIter XMLObj::find_first()
+{
+ XMLObjIter iter;
+ const XMLObjIter::const_map_iter_t first = children.begin();
+ const XMLObjIter::const_map_iter_t last = children.end();
+ iter.set(first, last);
+ return iter;
+}
+
+XMLObj *XMLObj::
+find_first(const std::string& name)
+{
+ const XMLObjIter::const_map_iter_t first = children.find(name);
+ if (first != children.end())
+ return first->second;
+ return nullptr;
+}
+
+RGWXMLParser::
+RGWXMLParser() : buf(nullptr), buf_len(0), cur_obj(nullptr), success(true), init_called(false)
+{
+ p = XML_ParserCreate(nullptr);
+}
+
+RGWXMLParser::
+~RGWXMLParser()
+{
+ XML_ParserFree(p);
+
+ free(buf);
+ std::list<XMLObj *>::const_iterator iter;
+ for (iter = allocated_objs.begin(); iter != allocated_objs.end(); ++iter) {
+ XMLObj *obj = *iter;
+ delete obj;
+ }
+}
+
+void RGWXMLParser::call_xml_start(void* user_data, const char *el, const char **attr) {
+ RGWXMLParser *handler = static_cast<RGWXMLParser *>(user_data);
+ XMLObj * obj = handler->alloc_obj(el);
+ if (!obj) {
+ handler->unallocated_objs.push_back(XMLObj());
+ obj = &handler->unallocated_objs.back();
+ } else {
+ handler->allocated_objs.push_back(obj);
+ }
+ if (!obj->xml_start(handler->cur_obj, el, attr)) {
+ handler->success = false;
+ return;
+ }
+ if (handler->cur_obj) {
+ handler->cur_obj->add_child(el, obj);
+ } else {
+ handler->children.insert(std::pair<std::string, XMLObj *>(el, obj));
+ }
+ handler->cur_obj = obj;
+
+ handler->objs.push_back(obj);
+}
+
+void RGWXMLParser::call_xml_end(void* user_data, const char *el) {
+ RGWXMLParser *handler = static_cast<RGWXMLParser *>(user_data);
+ XMLObj *parent_obj = handler->cur_obj->get_parent();
+ if (!handler->cur_obj->xml_end(el)) {
+ handler->success = false;
+ return;
+ }
+ handler->cur_obj = parent_obj;
+}
+
+void RGWXMLParser::call_xml_handle_data(void* user_data, const char *s, int len)
+{
+ RGWXMLParser *handler = static_cast<RGWXMLParser *>(user_data);
+ handler->cur_obj->xml_handle_data(s, len);
+}
+
+bool RGWXMLParser::init()
+{
+ if (!p) {
+ return false;
+ }
+ init_called = true;
+ XML_SetElementHandler(p, RGWXMLParser::call_xml_start, RGWXMLParser::call_xml_end);
+ XML_SetCharacterDataHandler(p, RGWXMLParser::call_xml_handle_data);
+ XML_SetUserData(p, (void *)this);
+ return true;
+}
+
+bool RGWXMLParser::parse(const char *_buf, int len, int done)
+{
+ ceph_assert(init_called);
+ int pos = buf_len;
+ char *tmp_buf;
+ tmp_buf = (char *)realloc(buf, buf_len + len);
+ if (tmp_buf == NULL){
+ free(buf);
+ buf = NULL;
+ return false;
+ } else {
+ buf = tmp_buf;
+ }
+
+ memcpy(&buf[buf_len], _buf, len);
+ buf_len += len;
+
+ success = true;
+ if (!XML_Parse(p, &buf[pos], len, done)) {
+ fprintf(stderr, "Parse error at line %d:\n%s\n",
+ (int)XML_GetCurrentLineNumber(p),
+ XML_ErrorString(XML_GetErrorCode(p)));
+ success = false;
+ }
+
+ return success;
+}
+
+void decode_xml_obj(unsigned long& val, XMLObj *obj)
+{
+ auto& s = obj->get_data();
+ const char *start = s.c_str();
+ char *p;
+
+ errno = 0;
+ val = strtoul(start, &p, 10);
+
+ /* Check for various possible errors */
+
+ if ((errno == ERANGE && val == ULONG_MAX) ||
+ (errno != 0 && val == 0)) {
+ throw RGWXMLDecoder::err("failed to number");
+ }
+
+ if (p == start) {
+ throw RGWXMLDecoder::err("failed to parse number");
+ }
+
+ while (*p != '\0') {
+ if (!isspace(*p)) {
+ throw RGWXMLDecoder::err("failed to parse number");
+ }
+ p++;
+ }
+}
+
+
+void decode_xml_obj(long& val, XMLObj *obj)
+{
+ const std::string s = obj->get_data();
+ const char *start = s.c_str();
+ char *p;
+
+ errno = 0;
+ val = strtol(start, &p, 10);
+
+ /* Check for various possible errors */
+
+ if ((errno == ERANGE && (val == LONG_MAX || val == LONG_MIN)) ||
+ (errno != 0 && val == 0)) {
+ throw RGWXMLDecoder::err("failed to parse number");
+ }
+
+ if (p == start) {
+ throw RGWXMLDecoder::err("failed to parse number");
+ }
+
+ while (*p != '\0') {
+ if (!isspace(*p)) {
+ throw RGWXMLDecoder::err("failed to parse number");
+ }
+ p++;
+ }
+}
+
+void decode_xml_obj(long long& val, XMLObj *obj)
+{
+ const std::string s = obj->get_data();
+ const char *start = s.c_str();
+ char *p;
+
+ errno = 0;
+ val = strtoll(start, &p, 10);
+
+ /* Check for various possible errors */
+
+ if ((errno == ERANGE && (val == LLONG_MAX || val == LLONG_MIN)) ||
+ (errno != 0 && val == 0)) {
+ throw RGWXMLDecoder::err("failed to parse number");
+ }
+
+ if (p == start) {
+ throw RGWXMLDecoder::err("failed to parse number");
+ }
+
+ while (*p != '\0') {
+ if (!isspace(*p)) {
+ throw RGWXMLDecoder::err("failed to parse number");
+ }
+ p++;
+ }
+}
+
+void decode_xml_obj(unsigned long long& val, XMLObj *obj)
+{
+ const std::string s = obj->get_data();
+ const char *start = s.c_str();
+ char *p;
+
+ errno = 0;
+ val = strtoull(start, &p, 10);
+
+ /* Check for various possible errors */
+
+ if ((errno == ERANGE && val == ULLONG_MAX) ||
+ (errno != 0 && val == 0)) {
+ throw RGWXMLDecoder::err("failed to parse number");
+ }
+
+ if (p == start) {
+ throw RGWXMLDecoder::err("failed to parse number");
+ }
+
+ while (*p != '\0') {
+ if (!isspace(*p)) {
+ throw RGWXMLDecoder::err("failed to parse number");
+ }
+ p++;
+ }
+}
+
+void decode_xml_obj(int& val, XMLObj *obj)
+{
+ long l;
+ decode_xml_obj(l, obj);
+#if LONG_MAX > INT_MAX
+ if (l > INT_MAX || l < INT_MIN) {
+ throw RGWXMLDecoder::err("integer out of range");
+ }
+#endif
+
+ val = (int)l;
+}
+
+void decode_xml_obj(unsigned& val, XMLObj *obj)
+{
+ unsigned long l;
+ decode_xml_obj(l, obj);
+#if ULONG_MAX > UINT_MAX
+ if (l > UINT_MAX) {
+ throw RGWXMLDecoder::err("unsigned integer out of range");
+ }
+#endif
+
+ val = (unsigned)l;
+}
+
+void decode_xml_obj(bool& val, XMLObj *obj)
+{
+ const std::string s = obj->get_data();
+ if (strncasecmp(s.c_str(), "true", 8) == 0) {
+ val = true;
+ return;
+ }
+ if (strncasecmp(s.c_str(), "false", 8) == 0) {
+ val = false;
+ return;
+ }
+ int i;
+ decode_xml_obj(i, obj);
+ val = (bool)i;
+}
+
+void decode_xml_obj(bufferlist& val, XMLObj *obj)
+{
+ const std::string s = obj->get_data();
+
+ bufferlist bl;
+ bl.append(s.c_str(), s.size());
+ try {
+ val.decode_base64(bl);
+ } catch (buffer::error& err) {
+ throw RGWXMLDecoder::err("failed to decode base64");
+ }
+}
+
+void decode_xml_obj(utime_t& val, XMLObj *obj)
+{
+ const std::string s = obj->get_data();
+ uint64_t epoch;
+ uint64_t nsec;
+ int r = utime_t::parse_date(s, &epoch, &nsec);
+ if (r == 0) {
+ val = utime_t(epoch, nsec);
+ } else {
+ throw RGWXMLDecoder::err("failed to decode utime_t");
+ }
+}
+
+void encode_xml(const char *name, const string& val, Formatter *f)
+{
+ f->dump_string(name, val);
+}
+
+void encode_xml(const char *name, const char *val, Formatter *f)
+{
+ f->dump_string(name, val);
+}
+
+void encode_xml(const char *name, bool val, Formatter *f)
+{
+ std::string s;
+ if (val)
+ s = "True";
+ else
+ s = "False";
+
+ f->dump_string(name, s);
+}
+
+void encode_xml(const char *name, int val, Formatter *f)
+{
+ f->dump_int(name, val);
+}
+
+void encode_xml(const char *name, long val, Formatter *f)
+{
+ f->dump_int(name, val);
+}
+
+void encode_xml(const char *name, unsigned val, Formatter *f)
+{
+ f->dump_unsigned(name, val);
+}
+
+void encode_xml(const char *name, unsigned long val, Formatter *f)
+{
+ f->dump_unsigned(name, val);
+}
+
+void encode_xml(const char *name, unsigned long long val, Formatter *f)
+{
+ f->dump_unsigned(name, val);
+}
+
+void encode_xml(const char *name, long long val, Formatter *f)
+{
+ f->dump_int(name, val);
+}
+
+void encode_xml(const char *name, const utime_t& val, Formatter *f)
+{
+ val.gmtime(f->dump_stream(name));
+}
+
+void encode_xml(const char *name, const bufferlist& bl, Formatter *f)
+{
+ /* need to copy data from bl, as it is const bufferlist */
+ bufferlist src = bl;
+
+ bufferlist b64;
+ src.encode_base64(b64);
+
+ const std::string s(b64.c_str(), b64.length());
+
+ encode_xml(name, s, f);
+}
+
diff --git a/src/rgw/rgw_xml.h b/src/rgw/rgw_xml.h
new file mode 100644
index 00000000..227e1cba
--- /dev/null
+++ b/src/rgw/rgw_xml.h
@@ -0,0 +1,352 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_XML_H
+#define CEPH_RGW_XML_H
+
+#include <map>
+#include <string>
+#include <iosfwd>
+#include <include/types.h>
+#include <common/Formatter.h>
+
+class XMLObj;
+class RGWXMLParser;
+
+class XMLObjIter {
+public:
+ typedef map<std::string, XMLObj *>::iterator map_iter_t;
+ typedef map<std::string, XMLObj *>::iterator const_map_iter_t;
+
+ XMLObjIter();
+ ~XMLObjIter();
+ void set(const XMLObjIter::const_map_iter_t &_cur, const XMLObjIter::const_map_iter_t &_end);
+ XMLObj *get_next();
+ bool get_name(std::string& name) const;
+
+private:
+ map_iter_t cur;
+ map_iter_t end;
+};
+
+/**
+ * Represents a block of XML.
+ * Give the class an XML blob, and it will parse the blob into
+ * an attr_name->value map.
+ * It shouldn't be the start point for any parsing. Look at RGWXMLParser for that.
+ */
+class XMLObj
+{
+private:
+ XMLObj *parent;
+ std::string obj_type;
+
+protected:
+ std::string data;
+ std::multimap<std::string, XMLObj *> children;
+ std::map<std::string, std::string> attr_map;
+
+ // invoked at the beginning of the XML tag, and populate any attributes
+ bool xml_start(XMLObj *parent, const char *el, const char **attr);
+ // callback invoked at the end of the XML tag
+ // if objects are created while parsing, this should be overwritten in the drived class
+ virtual bool xml_end(const char *el);
+ // callback invoked for storing the data of the XML tag
+ // if data manipulation is needed this could be overwritten in the drived class
+ virtual void xml_handle_data(const char *s, int len);
+ // get the parent object
+ XMLObj *get_parent();
+ // add a child XML object
+ void add_child(const std::string& el, XMLObj *obj);
+
+public:
+ XMLObj() : parent(nullptr) {}
+ virtual ~XMLObj();
+
+ // get the data (as string)
+ const std::string& get_data() const;
+ // get the type of the object (as string)
+ const std::string& get_obj_type() const;
+ bool get_attr(const std::string& name, std::string& attr) const;
+ // return a list of sub-tags matching the name
+ XMLObjIter find(const std::string& name);
+ // return the first sub-tag
+ XMLObjIter find_first();
+ // return the first sub-tags matching the name
+ XMLObj *find_first(const std::string& name);
+
+ friend ostream& operator<<(ostream &out, const XMLObj &obj);
+ friend RGWXMLParser;
+};
+
+struct XML_ParserStruct;
+
+// an XML parser is an XML object without a parent (root of the tree)
+// the parser could be used in 2 ways:
+//
+// (1) lazy object creation/intrusive API: usually used within the RGWXMLDecode namespace (as RGWXMLDecode::XMLParser)
+// the parser will parse the input and store info, but will not generate the target object. The object can be allocated outside
+// of the parser (stack or heap), and require to implement the decode_xml() API for the values to be populated.
+// note that the decode_xml() calls may throw exceptions if parsing fails
+//
+// (2) object creation while parsing: a new class needs to be derived from RGWXMLParser and implement alloc_obj()
+// API that should create a set of classes derived from XMLObj implementing xml_end() to create the actual target objects
+//
+// There could be a mix-and-match of the 2 types, control over that is in the alloc_obj() call
+// deciding for which tags objects are allocate during parsing and for which tags object allocation is external
+
+class RGWXMLParser : public XMLObj
+{
+private:
+ XML_ParserStruct *p;
+ char *buf;
+ int buf_len;
+ XMLObj *cur_obj;
+ std::vector<XMLObj *> objs;
+ std::list<XMLObj *> allocated_objs;
+ std::list<XMLObj> unallocated_objs;
+ bool success;
+ bool init_called;
+
+ // calls xml_start() on each parsed object
+ // passed as static callback to actual parser, passes itself as user_data
+ static void call_xml_start(void* user_data, const char *el, const char **attr);
+ // calls xml_end() on each parsed object
+ // passed as static callback to actual parser, passes itself as user_data
+ static void call_xml_end(void* user_data, const char *el);
+ // calls xml_handle_data() on each parsed object
+ // passed as static callback to actual parser, passes itself as user_data
+ static void call_xml_handle_data(void* user_data, const char *s, int len);
+
+protected:
+ // if objects are created while parsing, this should be implemented in the derived class
+ // and be a factory for creating the classes derived from XMLObj
+ // note that not all sub-tags has to be constructed here, any such tag which is not
+ // constructed will be lazily created when decode_xml() is invoked on it
+ //
+ // note that in case of different tags sharing the same name at different levels
+ // this method should not be used
+ virtual XMLObj *alloc_obj(const char *el) {
+ return nullptr;
+ }
+
+public:
+ RGWXMLParser();
+ ~RGWXMLParser() override;
+
+ // initialize the parser, must be called before parsing
+ bool init();
+ // parse the XML buffer (can be invoked multiple times for incremental parsing)
+ // receives the buffer to parse, its length, and boolean indication (0,1)
+ // whether this is the final chunk of the buffer
+ bool parse(const char *buf, int len, int done);
+ // get the XML blob being parsed
+ const char *get_xml() const { return buf; }
+};
+
+namespace RGWXMLDecoder {
+ struct err {
+ std::string message;
+
+ explicit err(const std::string& m) : message(m) {}
+ };
+
+ typedef RGWXMLParser XMLParser;
+
+ template<class T>
+ bool decode_xml(const char *name, T& val, XMLObj* obj, bool mandatory = false);
+
+ template<class T>
+ bool decode_xml(const char *name, std::vector<T>& v, XMLObj* obj, bool mandatory = false);
+
+ template<class C>
+ bool decode_xml(const char *name, C& container, void (*cb)(C&, XMLObj *obj), XMLObj *obj, bool mandatory = false);
+
+ template<class T>
+ void decode_xml(const char *name, T& val, T& default_val, XMLObj* obj);
+}
+
+static inline ostream& operator<<(ostream &out, RGWXMLDecoder::err& err)
+{
+ return out << err.message;
+}
+
+template<class T>
+void decode_xml_obj(T& val, XMLObj *obj)
+{
+ val.decode_xml(obj);
+}
+
+static inline void decode_xml_obj(string& val, XMLObj *obj)
+{
+ val = obj->get_data();
+}
+
+void decode_xml_obj(unsigned long long& val, XMLObj *obj);
+void decode_xml_obj(long long& val, XMLObj *obj);
+void decode_xml_obj(unsigned long& val, XMLObj *obj);
+void decode_xml_obj(long& val, XMLObj *obj);
+void decode_xml_obj(unsigned& val, XMLObj *obj);
+void decode_xml_obj(int& val, XMLObj *obj);
+void decode_xml_obj(bool& val, XMLObj *obj);
+void decode_xml_obj(bufferlist& val, XMLObj *obj);
+class utime_t;
+void decode_xml_obj(utime_t& val, XMLObj *obj);
+
+template<class T>
+void do_decode_xml_obj(list<T>& l, const string& name, XMLObj *obj)
+{
+ l.clear();
+
+ XMLObjIter iter = obj->find(name);
+ XMLObj *o;
+
+ while ((o = iter.get_next())) {
+ T val;
+ decode_xml_obj(val, o);
+ l.push_back(val);
+ }
+}
+
+template<class T>
+bool RGWXMLDecoder::decode_xml(const char *name, T& val, XMLObj *obj, bool mandatory)
+{
+ XMLObjIter iter = obj->find(name);
+ XMLObj *o = iter.get_next();
+ if (!o) {
+ if (mandatory) {
+ string s = "missing mandatory field " + string(name);
+ throw err(s);
+ }
+ val = T();
+ return false;
+ }
+
+ try {
+ decode_xml_obj(val, o);
+ } catch (err& e) {
+ string s = string(name) + ": ";
+ s.append(e.message);
+ throw err(s);
+ }
+
+ return true;
+}
+
+template<class T>
+bool RGWXMLDecoder::decode_xml(const char *name, std::vector<T>& v, XMLObj *obj, bool mandatory)
+{
+ XMLObjIter iter = obj->find(name);
+ XMLObj *o = iter.get_next();
+
+ v.clear();
+
+ if (!o) {
+ if (mandatory) {
+ string s = "missing mandatory field " + string(name);
+ throw err(s);
+ }
+ return false;
+ }
+
+ do {
+ T val;
+ try {
+ decode_xml_obj(val, o);
+ } catch (err& e) {
+ string s = string(name) + ": ";
+ s.append(e.message);
+ throw err(s);
+ }
+ v.push_back(val);
+ } while ((o = iter.get_next()));
+ return true;
+}
+
+template<class C>
+bool RGWXMLDecoder::decode_xml(const char *name, C& container, void (*cb)(C&, XMLObj *), XMLObj *obj, bool mandatory)
+{
+ container.clear();
+
+ XMLObjIter iter = obj->find(name);
+ XMLObj *o = iter.get_next();
+ if (!o) {
+ if (mandatory) {
+ string s = "missing mandatory field " + string(name);
+ throw err(s);
+ }
+ return false;
+ }
+
+ try {
+ decode_xml_obj(container, cb, o);
+ } catch (err& e) {
+ string s = string(name) + ": ";
+ s.append(e.message);
+ throw err(s);
+ }
+
+ return true;
+}
+
+template<class T>
+void RGWXMLDecoder::decode_xml(const char *name, T& val, T& default_val, XMLObj *obj)
+{
+ XMLObjIter iter = obj->find(name);
+ XMLObj *o = iter.get_next();
+ if (!o) {
+ val = default_val;
+ return;
+ }
+
+ try {
+ decode_xml_obj(val, o);
+ } catch (err& e) {
+ val = default_val;
+ string s = string(name) + ": ";
+ s.append(e.message);
+ throw err(s);
+ }
+}
+
+template<class T>
+static void encode_xml(const char *name, const T& val, ceph::Formatter *f)
+{
+ f->open_object_section(name);
+ val.dump_xml(f);
+ f->close_section();
+}
+
+template<class T>
+static void encode_xml(const char *name, const char *ns, const T& val, ceph::Formatter *f)
+{
+ f->open_object_section_in_ns(name, ns);
+ val.dump_xml(f);
+ f->close_section();
+}
+
+void encode_xml(const char *name, const string& val, ceph::Formatter *f);
+void encode_xml(const char *name, const char *val, ceph::Formatter *f);
+void encode_xml(const char *name, bool val, ceph::Formatter *f);
+void encode_xml(const char *name, int val, ceph::Formatter *f);
+void encode_xml(const char *name, unsigned val, ceph::Formatter *f);
+void encode_xml(const char *name, long val, ceph::Formatter *f);
+void encode_xml(const char *name, unsigned long val, ceph::Formatter *f);
+void encode_xml(const char *name, long long val, ceph::Formatter *f);
+void encode_xml(const char *name, const utime_t& val, ceph::Formatter *f);
+void encode_xml(const char *name, const bufferlist& bl, ceph::Formatter *f);
+void encode_xml(const char *name, long long unsigned val, ceph::Formatter *f);
+
+template<class T>
+static void do_encode_xml(const char *name, const std::list<T>& l, const char *entry_name, ceph::Formatter *f)
+{
+ f->open_array_section(name);
+ for (typename std::list<T>::const_iterator iter = l.begin(); iter != l.end(); ++iter) {
+ encode_xml(entry_name, *iter, f);
+ }
+ f->close_section();
+}
+
+
+
+#endif
diff --git a/src/rgw/rgw_xml_enc.cc b/src/rgw/rgw_xml_enc.cc
new file mode 100644
index 00000000..5473c2f6
--- /dev/null
+++ b/src/rgw/rgw_xml_enc.cc
@@ -0,0 +1,152 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2015 Yehuda Sadeh <yehuda@redhat.com>
+ * Copyright (C) 2015 Robin H. Johnson <robin.johnson@dreamhost.com>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "rgw_common.h"
+#include "rgw_xml.h"
+
+#include "common/Formatter.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+void RGWBWRedirectInfo::dump_xml(Formatter *f) const
+{
+ if (!redirect.protocol.empty()) {
+ encode_xml("Protocol", redirect.protocol, f);
+ }
+ if (!redirect.hostname.empty()) {
+ encode_xml("HostName", redirect.hostname, f);
+ }
+ if (redirect.http_redirect_code > 0) {
+ encode_xml("HttpRedirectCode", (int)redirect.http_redirect_code, f);
+ }
+ if (!replace_key_prefix_with.empty()) {
+ encode_xml("ReplaceKeyPrefixWith", replace_key_prefix_with, f);
+ }
+ if (!replace_key_with.empty()) {
+ encode_xml("ReplaceKeyWith", replace_key_with, f);
+ }
+}
+
+#define WEBSITE_HTTP_REDIRECT_CODE_MIN 300
+#define WEBSITE_HTTP_REDIRECT_CODE_MAX 400
+void RGWBWRedirectInfo::decode_xml(XMLObj *obj) {
+ RGWXMLDecoder::decode_xml("Protocol", redirect.protocol, obj);
+ RGWXMLDecoder::decode_xml("HostName", redirect.hostname, obj);
+ int code = 0;
+ bool has_http_redirect_code = RGWXMLDecoder::decode_xml("HttpRedirectCode", code, obj);
+ if (has_http_redirect_code &&
+ !(code > WEBSITE_HTTP_REDIRECT_CODE_MIN &&
+ code < WEBSITE_HTTP_REDIRECT_CODE_MAX)) {
+ throw RGWXMLDecoder::err("The provided HTTP redirect code is not valid. Valid codes are 3XX except 300.");
+ }
+ redirect.http_redirect_code = code;
+ bool has_replace_key_prefix_with = RGWXMLDecoder::decode_xml("ReplaceKeyPrefixWith", replace_key_prefix_with, obj);
+ bool has_replace_key_with = RGWXMLDecoder::decode_xml("ReplaceKeyWith", replace_key_with, obj);
+ if (has_replace_key_prefix_with && has_replace_key_with) {
+ throw RGWXMLDecoder::err("You can only define ReplaceKeyPrefix or ReplaceKey but not both.");
+ }
+}
+
+void RGWBWRoutingRuleCondition::dump_xml(Formatter *f) const
+{
+ if (!key_prefix_equals.empty()) {
+ encode_xml("KeyPrefixEquals", key_prefix_equals, f);
+ }
+ if (http_error_code_returned_equals > 0) {
+ encode_xml("HttpErrorCodeReturnedEquals", (int)http_error_code_returned_equals, f);
+ }
+}
+
+#define WEBSITE_HTTP_ERROR_CODE_RETURNED_EQUALS_MIN 400
+#define WEBSITE_HTTP_ERROR_CODE_RETURNED_EQUALS_MAX 600
+void RGWBWRoutingRuleCondition::decode_xml(XMLObj *obj) {
+ RGWXMLDecoder::decode_xml("KeyPrefixEquals", key_prefix_equals, obj);
+ int code = 0;
+ bool has_http_error_code_returned_equals = RGWXMLDecoder::decode_xml("HttpErrorCodeReturnedEquals", code, obj);
+ if (has_http_error_code_returned_equals &&
+ !(code >= WEBSITE_HTTP_ERROR_CODE_RETURNED_EQUALS_MIN &&
+ code < WEBSITE_HTTP_ERROR_CODE_RETURNED_EQUALS_MAX)) {
+ throw RGWXMLDecoder::err("The provided HTTP redirect code is not valid. Valid codes are 4XX or 5XX.");
+ }
+ http_error_code_returned_equals = code;
+}
+
+void RGWBWRoutingRule::dump_xml(Formatter *f) const
+{
+ encode_xml("Condition", condition, f);
+ encode_xml("Redirect", redirect_info, f);
+}
+
+void RGWBWRoutingRule::decode_xml(XMLObj *obj) {
+ RGWXMLDecoder::decode_xml("Condition", condition, obj);
+ RGWXMLDecoder::decode_xml("Redirect", redirect_info, obj);
+}
+
+static void encode_xml(const char *name, const std::list<RGWBWRoutingRule>& l, ceph::Formatter *f)
+{
+ do_encode_xml("RoutingRules", l, "RoutingRule", f);
+}
+
+void RGWBucketWebsiteConf::dump_xml(Formatter *f) const
+{
+ if (!redirect_all.hostname.empty()) {
+ f->open_object_section("RedirectAllRequestsTo");
+ encode_xml("HostName", redirect_all.hostname, f);
+ if (!redirect_all.protocol.empty()) {
+ encode_xml("Protocol", redirect_all.protocol, f);
+ }
+ f->close_section();
+ }
+ if (!index_doc_suffix.empty()) {
+ f->open_object_section("IndexDocument");
+ encode_xml("Suffix", index_doc_suffix, f);
+ f->close_section();
+ }
+ if (!error_doc.empty()) {
+ f->open_object_section("ErrorDocument");
+ encode_xml("Key", error_doc, f);
+ f->close_section();
+ }
+ if (!routing_rules.rules.empty()) {
+ encode_xml("RoutingRules", routing_rules.rules, f);
+ }
+}
+
+void decode_xml_obj(list<RGWBWRoutingRule>& l, XMLObj *obj)
+{
+ do_decode_xml_obj(l, "RoutingRule", obj);
+}
+
+void RGWBucketWebsiteConf::decode_xml(XMLObj *obj) {
+ XMLObj *o = obj->find_first("RedirectAllRequestsTo");
+ if (o) {
+ is_redirect_all = true;
+ RGWXMLDecoder::decode_xml("HostName", redirect_all.hostname, o, true);
+ RGWXMLDecoder::decode_xml("Protocol", redirect_all.protocol, o);
+ } else {
+ o = obj->find_first("IndexDocument");
+ if (o) {
+ is_set_index_doc = true;
+ RGWXMLDecoder::decode_xml("Suffix", index_doc_suffix, o);
+ }
+ o = obj->find_first("ErrorDocument");
+ if (o) {
+ RGWXMLDecoder::decode_xml("Key", error_doc, o);
+ }
+ RGWXMLDecoder::decode_xml("RoutingRules", routing_rules.rules, obj);
+ }
+}
+
diff --git a/src/rgw/rgw_zone.cc b/src/rgw/rgw_zone.cc
new file mode 100644
index 00000000..667782d1
--- /dev/null
+++ b/src/rgw/rgw_zone.cc
@@ -0,0 +1,1937 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/errno.h"
+
+#include "rgw_zone.h"
+#include "rgw_realm_watcher.h"
+#include "rgw_meta_sync_status.h"
+#include "rgw_sync.h"
+
+#include "services/svc_zone.h"
+#include "services/svc_sys_obj.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+namespace rgw_zone_defaults {
+
+std::string zone_info_oid_prefix = "zone_info.";
+std::string zone_names_oid_prefix = "zone_names.";
+std::string region_info_oid_prefix = "region_info.";
+std::string realm_names_oid_prefix = "realms_names.";
+std::string zone_group_info_oid_prefix = "zonegroup_info.";
+std::string realm_info_oid_prefix = "realms.";
+std::string default_region_info_oid = "default.region";
+std::string default_zone_group_info_oid = "default.zonegroup";
+std::string period_info_oid_prefix = "periods.";
+std::string period_latest_epoch_info_oid = ".latest_epoch";
+std::string region_map_oid = "region_map";
+std::string default_realm_info_oid = "default.realm";
+std::string default_zonegroup_name = "default";
+std::string default_zone_name = "default";
+std::string zonegroup_names_oid_prefix = "zonegroups_names.";
+std::string RGW_DEFAULT_ZONE_ROOT_POOL = "rgw.root";
+std::string RGW_DEFAULT_ZONEGROUP_ROOT_POOL = "rgw.root";
+std::string RGW_DEFAULT_REALM_ROOT_POOL = "rgw.root";
+std::string RGW_DEFAULT_PERIOD_ROOT_POOL = "rgw.root";
+std::string default_bucket_index_pool_suffix = "rgw.buckets.index";
+std::string default_storage_extra_pool_suffix = "rgw.buckets.non-ec";
+std::string avail_pools = ".pools.avail";
+std::string default_storage_pool_suffix = "rgw.buckets.data";
+
+}
+
+using namespace rgw_zone_defaults;
+
+#define FIRST_EPOCH 1
+
+void RGWDefaultZoneGroupInfo::dump(Formatter *f) const {
+ encode_json("default_zonegroup", default_zonegroup, f);
+}
+
+void RGWDefaultZoneGroupInfo::decode_json(JSONObj *obj) {
+
+ JSONDecoder::decode_json("default_zonegroup", default_zonegroup, obj);
+ /* backward compatability with region */
+ if (default_zonegroup.empty()) {
+ JSONDecoder::decode_json("default_region", default_zonegroup, obj);
+ }
+}
+
+rgw_pool RGWZoneGroup::get_pool(CephContext *cct_) const
+{
+ if (cct_->_conf->rgw_zonegroup_root_pool.empty()) {
+ return rgw_pool(RGW_DEFAULT_ZONEGROUP_ROOT_POOL);
+ }
+
+ return rgw_pool(cct_->_conf->rgw_zonegroup_root_pool);
+}
+
+int RGWZoneGroup::create_default(bool old_format)
+{
+ name = default_zonegroup_name;
+ api_name = default_zonegroup_name;
+ is_master = true;
+
+ RGWZoneGroupPlacementTarget placement_target;
+ placement_target.name = "default-placement";
+ placement_targets[placement_target.name] = placement_target;
+ default_placement.name = "default-placement";
+
+ RGWZoneParams zone_params(default_zone_name);
+
+ int r = zone_params.init(cct, sysobj_svc, false);
+ if (r < 0) {
+ ldout(cct, 0) << "create_default: error initializing zone params: " << cpp_strerror(-r) << dendl;
+ return r;
+ }
+
+ r = zone_params.create_default();
+ if (r < 0 && r != -EEXIST) {
+ ldout(cct, 0) << "create_default: error in create_default zone params: " << cpp_strerror(-r) << dendl;
+ return r;
+ } else if (r == -EEXIST) {
+ ldout(cct, 10) << "zone_params::create_default() returned -EEXIST, we raced with another default zone_params creation" << dendl;
+ zone_params.clear_id();
+ r = zone_params.init(cct, sysobj_svc);
+ if (r < 0) {
+ ldout(cct, 0) << "create_default: error in init existing zone params: " << cpp_strerror(-r) << dendl;
+ return r;
+ }
+ ldout(cct, 20) << "zone_params::create_default() " << zone_params.get_name() << " id " << zone_params.get_id()
+ << dendl;
+ }
+
+ RGWZone& default_zone = zones[zone_params.get_id()];
+ default_zone.name = zone_params.get_name();
+ default_zone.id = zone_params.get_id();
+ master_zone = default_zone.id;
+
+ r = create();
+ if (r < 0 && r != -EEXIST) {
+ ldout(cct, 0) << "error storing zone group info: " << cpp_strerror(-r) << dendl;
+ return r;
+ }
+
+ if (r == -EEXIST) {
+ ldout(cct, 10) << "create_default() returned -EEXIST, we raced with another zonegroup creation" << dendl;
+ id.clear();
+ r = init(cct, sysobj_svc);
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ if (old_format) {
+ name = id;
+ }
+
+ post_process_params();
+
+ return 0;
+}
+
+const string RGWZoneGroup::get_default_oid(bool old_region_format) const
+{
+ if (old_region_format) {
+ if (cct->_conf->rgw_default_region_info_oid.empty()) {
+ return default_region_info_oid;
+ }
+ return cct->_conf->rgw_default_region_info_oid;
+ }
+
+ string default_oid = cct->_conf->rgw_default_zonegroup_info_oid;
+
+ if (cct->_conf->rgw_default_zonegroup_info_oid.empty()) {
+ default_oid = default_zone_group_info_oid;
+ }
+
+ default_oid += "." + realm_id;
+
+ return default_oid;
+}
+
+const string& RGWZoneGroup::get_info_oid_prefix(bool old_region_format) const
+{
+ if (old_region_format) {
+ return region_info_oid_prefix;
+ }
+ return zone_group_info_oid_prefix;
+}
+
+const string& RGWZoneGroup::get_names_oid_prefix() const
+{
+ return zonegroup_names_oid_prefix;
+}
+
+const string& RGWZoneGroup::get_predefined_name(CephContext *cct) const {
+ return cct->_conf->rgw_zonegroup;
+}
+
+int RGWZoneGroup::equals(const string& other_zonegroup) const
+{
+ if (is_master && other_zonegroup.empty())
+ return true;
+
+ return (id == other_zonegroup);
+}
+
+int RGWZoneGroup::add_zone(const RGWZoneParams& zone_params, bool *is_master, bool *read_only,
+ const list<string>& endpoints, const string *ptier_type,
+ bool *psync_from_all, list<string>& sync_from, list<string>& sync_from_rm,
+ string *predirect_zone, RGWSyncModulesManager *sync_mgr)
+{
+ auto& zone_id = zone_params.get_id();
+ auto& zone_name = zone_params.get_name();
+
+ // check for duplicate zone name on insert
+ if (!zones.count(zone_id)) {
+ for (const auto& zone : zones) {
+ if (zone.second.name == zone_name) {
+ ldout(cct, 0) << "ERROR: found existing zone name " << zone_name
+ << " (" << zone.first << ") in zonegroup " << get_name() << dendl;
+ return -EEXIST;
+ }
+ }
+ }
+
+ if (is_master) {
+ if (*is_master) {
+ if (!master_zone.empty() && master_zone != zone_id) {
+ ldout(cct, 0) << "NOTICE: overriding master zone: " << master_zone << dendl;
+ }
+ master_zone = zone_id;
+ } else if (master_zone == zone_id) {
+ master_zone.clear();
+ }
+ }
+
+ RGWZone& zone = zones[zone_id];
+ zone.name = zone_name;
+ zone.id = zone_id;
+ if (!endpoints.empty()) {
+ zone.endpoints = endpoints;
+ }
+ if (read_only) {
+ zone.read_only = *read_only;
+ }
+ if (ptier_type) {
+ zone.tier_type = *ptier_type;
+ if (!sync_mgr->get_module(*ptier_type, nullptr)) {
+ ldout(cct, 0) << "ERROR: could not found sync module: " << *ptier_type
+ << ", valid sync modules: "
+ << sync_mgr->get_registered_module_names()
+ << dendl;
+ return -ENOENT;
+ }
+ }
+
+ if (psync_from_all) {
+ zone.sync_from_all = *psync_from_all;
+ }
+
+ if (predirect_zone) {
+ zone.redirect_zone = *predirect_zone;
+ }
+
+ for (auto add : sync_from) {
+ zone.sync_from.insert(add);
+ }
+
+ for (auto rm : sync_from_rm) {
+ zone.sync_from.erase(rm);
+ }
+
+ post_process_params();
+
+ return update();
+}
+
+
+int RGWZoneGroup::rename_zone(const RGWZoneParams& zone_params)
+{
+ RGWZone& zone = zones[zone_params.get_id()];
+ zone.name = zone_params.get_name();
+
+ return update();
+}
+
+void RGWZoneGroup::post_process_params()
+{
+ bool log_data = zones.size() > 1;
+
+ if (master_zone.empty()) {
+ map<string, RGWZone>::iterator iter = zones.begin();
+ if (iter != zones.end()) {
+ master_zone = iter->first;
+ }
+ }
+
+ for (map<string, RGWZone>::iterator iter = zones.begin(); iter != zones.end(); ++iter) {
+ RGWZone& zone = iter->second;
+ zone.log_data = log_data;
+
+ RGWZoneParams zone_params(zone.id, zone.name);
+ int ret = zone_params.init(cct, sysobj_svc);
+ if (ret < 0) {
+ ldout(cct, 0) << "WARNING: could not read zone params for zone id=" << zone.id << " name=" << zone.name << dendl;
+ continue;
+ }
+
+ for (map<string, RGWZonePlacementInfo>::iterator iter = zone_params.placement_pools.begin();
+ iter != zone_params.placement_pools.end(); ++iter) {
+ const string& placement_name = iter->first;
+ if (placement_targets.find(placement_name) == placement_targets.end()) {
+ RGWZoneGroupPlacementTarget placement_target;
+ placement_target.name = placement_name;
+ placement_targets[placement_name] = placement_target;
+ }
+ }
+ }
+
+ if (default_placement.empty() && !placement_targets.empty()) {
+ default_placement.init(placement_targets.begin()->first, RGW_STORAGE_CLASS_STANDARD);
+ }
+}
+
+int RGWZoneGroup::remove_zone(const std::string& zone_id)
+{
+ map<string, RGWZone>::iterator iter = zones.find(zone_id);
+ if (iter == zones.end()) {
+ ldout(cct, 0) << "zone id " << zone_id << " is not a part of zonegroup "
+ << name << dendl;
+ return -ENOENT;
+ }
+
+ zones.erase(iter);
+
+ post_process_params();
+
+ return update();
+}
+
+int RGWZoneGroup::read_default_id(string& default_id, bool old_format)
+{
+ if (realm_id.empty()) {
+ /* try using default realm */
+ RGWRealm realm;
+ int ret = realm.init(cct, sysobj_svc);
+ // no default realm exist
+ if (ret < 0) {
+ return read_id(default_zonegroup_name, default_id);
+ }
+ realm_id = realm.get_id();
+ }
+
+ return RGWSystemMetaObj::read_default_id(default_id, old_format);
+}
+
+int RGWZoneGroup::set_as_default(bool exclusive)
+{
+ if (realm_id.empty()) {
+ /* try using default realm */
+ RGWRealm realm;
+ int ret = realm.init(cct, sysobj_svc);
+ if (ret < 0) {
+ ldout(cct, 10) << "could not read realm id: " << cpp_strerror(-ret) << dendl;
+ return -EINVAL;
+ }
+ realm_id = realm.get_id();
+ }
+
+ return RGWSystemMetaObj::set_as_default(exclusive);
+}
+
+void RGWSystemMetaObj::reinit_instance(CephContext *_cct, RGWSI_SysObj *_sysobj_svc)
+{
+ cct = _cct;
+ sysobj_svc = _sysobj_svc;
+ zone_svc = _sysobj_svc->get_zone_svc();
+}
+
+int RGWSystemMetaObj::init(CephContext *_cct, RGWSI_SysObj *_sysobj_svc, bool setup_obj, bool old_format)
+{
+ reinit_instance(_cct, _sysobj_svc);
+
+ if (!setup_obj)
+ return 0;
+
+ if (old_format && id.empty()) {
+ id = name;
+ }
+
+ if (id.empty()) {
+ int r;
+ if (name.empty()) {
+ name = get_predefined_name(cct);
+ }
+ if (name.empty()) {
+ r = use_default(old_format);
+ if (r < 0) {
+ return r;
+ }
+ } else if (!old_format) {
+ r = read_id(name, id);
+ if (r < 0) {
+ if (r != -ENOENT) {
+ ldout(cct, 0) << "error in read_id for object name: " << name << " : " << cpp_strerror(-r) << dendl;
+ }
+ return r;
+ }
+ }
+ }
+
+ return read_info(id, old_format);
+}
+
+int RGWSystemMetaObj::read_default(RGWDefaultSystemMetaObjInfo& default_info, const string& oid)
+{
+ using ceph::decode;
+ auto pool = get_pool(cct);
+ bufferlist bl;
+
+ auto obj_ctx = sysobj_svc->init_obj_ctx();
+ auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj(pool, oid));
+ int ret = sysobj.rop().read(&bl);
+ if (ret < 0)
+ return ret;
+
+ try {
+ auto iter = bl.cbegin();
+ decode(default_info, iter);
+ } catch (buffer::error& err) {
+ ldout(cct, 0) << "error decoding data from " << pool << ":" << oid << dendl;
+ return -EIO;
+ }
+
+ return 0;
+}
+
+int RGWSystemMetaObj::read_default_id(string& default_id, bool old_format)
+{
+ RGWDefaultSystemMetaObjInfo default_info;
+
+ int ret = read_default(default_info, get_default_oid(old_format));
+ if (ret < 0) {
+ return ret;
+ }
+
+ default_id = default_info.default_id;
+
+ return 0;
+}
+
+int RGWSystemMetaObj::use_default(bool old_format)
+{
+ return read_default_id(id, old_format);
+}
+
+int RGWSystemMetaObj::set_as_default(bool exclusive)
+{
+ using ceph::encode;
+ string oid = get_default_oid();
+
+ rgw_pool pool(get_pool(cct));
+ bufferlist bl;
+
+ RGWDefaultSystemMetaObjInfo default_info;
+ default_info.default_id = id;
+
+ encode(default_info, bl);
+
+ auto obj_ctx = sysobj_svc->init_obj_ctx();
+ auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj(pool, oid));
+ int ret = sysobj.wop()
+ .set_exclusive(exclusive)
+ .write(bl);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+int RGWSystemMetaObj::read_id(const string& obj_name, string& object_id)
+{
+ using ceph::decode;
+ rgw_pool pool(get_pool(cct));
+ bufferlist bl;
+
+ string oid = get_names_oid_prefix() + obj_name;
+
+ auto obj_ctx = sysobj_svc->init_obj_ctx();
+ auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj(pool, oid));
+ int ret = sysobj.rop().read(&bl);
+ if (ret < 0) {
+ return ret;
+ }
+
+ RGWNameToId nameToId;
+ try {
+ auto iter = bl.cbegin();
+ decode(nameToId, iter);
+ } catch (buffer::error& err) {
+ ldout(cct, 0) << "ERROR: failed to decode obj from " << pool << ":" << oid << dendl;
+ return -EIO;
+ }
+ object_id = nameToId.obj_id;
+ return 0;
+}
+
+int RGWSystemMetaObj::delete_obj(bool old_format)
+{
+ rgw_pool pool(get_pool(cct));
+
+ auto obj_ctx = sysobj_svc->init_obj_ctx();
+
+ /* check to see if obj is the default */
+ RGWDefaultSystemMetaObjInfo default_info;
+ int ret = read_default(default_info, get_default_oid(old_format));
+ if (ret < 0 && ret != -ENOENT)
+ return ret;
+ if (default_info.default_id == id || (old_format && default_info.default_id == name)) {
+ string oid = get_default_oid(old_format);
+ rgw_raw_obj default_named_obj(pool, oid);
+ auto sysobj = sysobj_svc->get_obj(obj_ctx, default_named_obj);
+ ret = sysobj.wop().remove();
+ if (ret < 0) {
+ ldout(cct, 0) << "Error delete default obj name " << name << ": " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ }
+ if (!old_format) {
+ string oid = get_names_oid_prefix() + name;
+ rgw_raw_obj object_name(pool, oid);
+ auto sysobj = sysobj_svc->get_obj(obj_ctx, object_name);
+ ret = sysobj.wop().remove();
+ if (ret < 0) {
+ ldout(cct, 0) << "Error delete obj name " << name << ": " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ }
+
+ string oid = get_info_oid_prefix(old_format);
+ if (old_format) {
+ oid += name;
+ } else {
+ oid += id;
+ }
+
+ rgw_raw_obj object_id(pool, oid);
+ auto sysobj = sysobj_svc->get_obj(obj_ctx, object_id);
+ ret = sysobj.wop().remove();
+ if (ret < 0) {
+ ldout(cct, 0) << "Error delete object id " << id << ": " << cpp_strerror(-ret) << dendl;
+ }
+
+ return ret;
+}
+
+int RGWSystemMetaObj::store_name(bool exclusive)
+{
+ rgw_pool pool(get_pool(cct));
+ string oid = get_names_oid_prefix() + name;
+
+ RGWNameToId nameToId;
+ nameToId.obj_id = id;
+
+ bufferlist bl;
+ using ceph::encode;
+ encode(nameToId, bl);
+ auto obj_ctx = sysobj_svc->init_obj_ctx();
+ auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj(pool, oid));
+ return sysobj.wop()
+ .set_exclusive(exclusive)
+ .write(bl);
+}
+
+int RGWSystemMetaObj::rename(const string& new_name)
+{
+ string new_id;
+ int ret = read_id(new_name, new_id);
+ if (!ret) {
+ return -EEXIST;
+ }
+ if (ret < 0 && ret != -ENOENT) {
+ ldout(cct, 0) << "Error read_id " << new_name << ": " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ string old_name = name;
+ name = new_name;
+ ret = update();
+ if (ret < 0) {
+ ldout(cct, 0) << "Error storing new obj info " << new_name << ": " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ ret = store_name(true);
+ if (ret < 0) {
+ ldout(cct, 0) << "Error storing new name " << new_name << ": " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ /* delete old name */
+ rgw_pool pool(get_pool(cct));
+ string oid = get_names_oid_prefix() + old_name;
+ rgw_raw_obj old_name_obj(pool, oid);
+ auto obj_ctx = sysobj_svc->init_obj_ctx();
+ auto sysobj = sysobj_svc->get_obj(obj_ctx, old_name_obj);
+ ret = sysobj.wop().remove();
+ if (ret < 0) {
+ ldout(cct, 0) << "Error delete old obj name " << old_name << ": " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ return ret;
+}
+
+int RGWSystemMetaObj::read_info(const string& obj_id, bool old_format)
+{
+ rgw_pool pool(get_pool(cct));
+
+ bufferlist bl;
+
+ string oid = get_info_oid_prefix(old_format) + obj_id;
+
+ auto obj_ctx = sysobj_svc->init_obj_ctx();
+ auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj{pool, oid});
+ int ret = sysobj.rop().read(&bl);
+ if (ret < 0) {
+ ldout(cct, 0) << "failed reading obj info from " << pool << ":" << oid << ": " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ using ceph::decode;
+
+ try {
+ auto iter = bl.cbegin();
+ decode(*this, iter);
+ } catch (buffer::error& err) {
+ ldout(cct, 0) << "ERROR: failed to decode obj from " << pool << ":" << oid << dendl;
+ return -EIO;
+ }
+
+ return 0;
+}
+
+int RGWSystemMetaObj::read()
+{
+ int ret = read_id(name, id);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return read_info(id);
+}
+
+int RGWSystemMetaObj::create(bool exclusive)
+{
+ int ret;
+
+ /* check to see the name is not used */
+ ret = read_id(name, id);
+ if (exclusive && ret == 0) {
+ ldout(cct, 10) << "ERROR: name " << name << " already in use for obj id " << id << dendl;
+ return -EEXIST;
+ } else if ( ret < 0 && ret != -ENOENT) {
+ ldout(cct, 0) << "failed reading obj id " << id << ": " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ if (id.empty()) {
+ /* create unique id */
+ uuid_d new_uuid;
+ char uuid_str[37];
+ new_uuid.generate_random();
+ new_uuid.print(uuid_str);
+ id = uuid_str;
+ }
+
+ ret = store_info(exclusive);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: storing info for " << id << ": " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ return store_name(exclusive);
+}
+
+int RGWSystemMetaObj::store_info(bool exclusive)
+{
+ rgw_pool pool(get_pool(cct));
+
+ string oid = get_info_oid_prefix() + id;
+
+ bufferlist bl;
+ using ceph::encode;
+ encode(*this, bl);
+ auto obj_ctx = sysobj_svc->init_obj_ctx();
+ auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj{pool, oid});
+ return sysobj.wop()
+ .set_exclusive(exclusive)
+ .write(bl);
+}
+
+int RGWSystemMetaObj::write(bool exclusive)
+{
+ int ret = store_info(exclusive);
+ if (ret < 0) {
+ ldout(cct, 20) << __func__ << "(): store_info() returned ret=" << ret << dendl;
+ return ret;
+ }
+ ret = store_name(exclusive);
+ if (ret < 0) {
+ ldout(cct, 20) << __func__ << "(): store_name() returned ret=" << ret << dendl;
+ return ret;
+ }
+ return 0;
+}
+
+
+const string& RGWRealm::get_predefined_name(CephContext *cct) const {
+ return cct->_conf->rgw_realm;
+}
+
+int RGWRealm::create(bool exclusive)
+{
+ int ret = RGWSystemMetaObj::create(exclusive);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR creating new realm object " << name << ": " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ // create the control object for watch/notify
+ ret = create_control(exclusive);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR creating control for new realm " << name << ": " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ RGWPeriod period;
+ if (current_period.empty()) {
+ /* create new period for the realm */
+ ret = period.init(cct, sysobj_svc, id, name, false);
+ if (ret < 0 ) {
+ return ret;
+ }
+ ret = period.create(true);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: creating new period for realm " << name << ": " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ } else {
+ period = RGWPeriod(current_period, 0);
+ int ret = period.init(cct, sysobj_svc, id, name);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: failed to init period " << current_period << dendl;
+ return ret;
+ }
+ }
+ ret = set_current_period(period);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: failed set current period " << current_period << dendl;
+ return ret;
+ }
+ // try to set as default. may race with another create, so pass exclusive=true
+ // so we don't override an existing default
+ ret = set_as_default(true);
+ if (ret < 0 && ret != -EEXIST) {
+ ldout(cct, 0) << "WARNING: failed to set realm as default realm, ret=" << ret << dendl;
+ }
+
+ return 0;
+}
+
+int RGWRealm::delete_obj()
+{
+ int ret = RGWSystemMetaObj::delete_obj();
+ if (ret < 0) {
+ return ret;
+ }
+ return delete_control();
+}
+
+int RGWRealm::create_control(bool exclusive)
+{
+ auto pool = rgw_pool{get_pool(cct)};
+ auto oid = get_control_oid();
+ bufferlist bl;
+ auto obj_ctx = sysobj_svc->init_obj_ctx();
+ auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj{pool, oid});
+ return sysobj.wop()
+ .set_exclusive(exclusive)
+ .write(bl);
+}
+
+int RGWRealm::delete_control()
+{
+ auto pool = rgw_pool{get_pool(cct)};
+ auto obj = rgw_raw_obj{pool, get_control_oid()};
+ auto obj_ctx = sysobj_svc->init_obj_ctx();
+ auto sysobj = sysobj_svc->get_obj(obj_ctx, obj);
+ return sysobj.wop().remove();
+}
+
+rgw_pool RGWRealm::get_pool(CephContext *cct) const
+{
+ if (cct->_conf->rgw_realm_root_pool.empty()) {
+ return rgw_pool(RGW_DEFAULT_REALM_ROOT_POOL);
+ }
+ return rgw_pool(cct->_conf->rgw_realm_root_pool);
+}
+
+const string RGWRealm::get_default_oid(bool old_format) const
+{
+ if (cct->_conf->rgw_default_realm_info_oid.empty()) {
+ return default_realm_info_oid;
+ }
+ return cct->_conf->rgw_default_realm_info_oid;
+}
+
+const string& RGWRealm::get_names_oid_prefix() const
+{
+ return realm_names_oid_prefix;
+}
+
+const string& RGWRealm::get_info_oid_prefix(bool old_format) const
+{
+ return realm_info_oid_prefix;
+}
+
+int RGWRealm::set_current_period(RGWPeriod& period)
+{
+ // update realm epoch to match the period's
+ if (epoch > period.get_realm_epoch()) {
+ ldout(cct, 0) << "ERROR: set_current_period with old realm epoch "
+ << period.get_realm_epoch() << ", current epoch=" << epoch << dendl;
+ return -EINVAL;
+ }
+ if (epoch == period.get_realm_epoch() && current_period != period.get_id()) {
+ ldout(cct, 0) << "ERROR: set_current_period with same realm epoch "
+ << period.get_realm_epoch() << ", but different period id "
+ << period.get_id() << " != " << current_period << dendl;
+ return -EINVAL;
+ }
+
+ epoch = period.get_realm_epoch();
+ current_period = period.get_id();
+
+ int ret = update();
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: period update: " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ ret = period.reflect();
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: period.reflect(): " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ return 0;
+}
+
+string RGWRealm::get_control_oid() const
+{
+ return get_info_oid_prefix() + id + ".control";
+}
+
+int RGWRealm::notify_zone(bufferlist& bl)
+{
+ rgw_pool pool{get_pool(cct)};
+ auto obj_ctx = sysobj_svc->init_obj_ctx();
+ auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj{pool, get_control_oid()});
+ int ret = sysobj.wn().notify(bl, 0, nullptr);
+ if (ret < 0) {
+ return ret;
+ }
+ return 0;
+}
+
+int RGWRealm::notify_new_period(const RGWPeriod& period)
+{
+ bufferlist bl;
+ using ceph::encode;
+ // push the period to dependent zonegroups/zones
+ encode(RGWRealmNotify::ZonesNeedPeriod, bl);
+ encode(period, bl);
+ // reload the gateway with the new period
+ encode(RGWRealmNotify::Reload, bl);
+
+ return notify_zone(bl);
+}
+
+std::string RGWPeriodConfig::get_oid(const std::string& realm_id)
+{
+ if (realm_id.empty()) {
+ return "period_config.default";
+ }
+ return "period_config." + realm_id;
+}
+
+rgw_pool RGWPeriodConfig::get_pool(CephContext *cct)
+{
+ const auto& pool_name = cct->_conf->rgw_period_root_pool;
+ if (pool_name.empty()) {
+ return {RGW_DEFAULT_PERIOD_ROOT_POOL};
+ }
+ return {pool_name};
+}
+
+int RGWPeriodConfig::read(RGWSI_SysObj *sysobj_svc, const std::string& realm_id)
+{
+ const auto& pool = get_pool(sysobj_svc->ctx());
+ const auto& oid = get_oid(realm_id);
+ bufferlist bl;
+
+ auto obj_ctx = sysobj_svc->init_obj_ctx();
+ auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj{pool, oid});
+ int ret = sysobj.rop().read(&bl);
+ if (ret < 0) {
+ return ret;
+ }
+ using ceph::decode;
+ try {
+ auto iter = bl.cbegin();
+ decode(*this, iter);
+ } catch (buffer::error& err) {
+ return -EIO;
+ }
+ return 0;
+}
+
+int RGWPeriodConfig::write(RGWSI_SysObj *sysobj_svc, const std::string& realm_id)
+{
+ const auto& pool = get_pool(sysobj_svc->ctx());
+ const auto& oid = get_oid(realm_id);
+ bufferlist bl;
+ using ceph::encode;
+ encode(*this, bl);
+ auto obj_ctx = sysobj_svc->init_obj_ctx();
+ auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj{pool, oid});
+ return sysobj.wop()
+ .set_exclusive(false)
+ .write(bl);
+}
+
+int RGWPeriod::init(CephContext *_cct, RGWSI_SysObj *_sysobj_svc, const string& period_realm_id,
+ const string& period_realm_name, bool setup_obj)
+{
+ cct = _cct;
+ sysobj_svc = _sysobj_svc;
+
+ realm_id = period_realm_id;
+ realm_name = period_realm_name;
+
+ if (!setup_obj)
+ return 0;
+
+ return init(_cct, _sysobj_svc, setup_obj);
+}
+
+
+int RGWPeriod::init(CephContext *_cct, RGWSI_SysObj *_sysobj_svc, bool setup_obj)
+{
+ cct = _cct;
+ sysobj_svc = _sysobj_svc;
+
+ if (!setup_obj)
+ return 0;
+
+ if (id.empty()) {
+ RGWRealm realm(realm_id, realm_name);
+ int ret = realm.init(cct, sysobj_svc);
+ if (ret < 0) {
+ ldout(cct, 0) << "RGWPeriod::init failed to init realm " << realm_name << " id " << realm_id << " : " <<
+ cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ id = realm.get_current_period();
+ realm_id = realm.get_id();
+ }
+
+ if (!epoch) {
+ int ret = use_latest_epoch();
+ if (ret < 0) {
+ ldout(cct, 0) << "failed to use_latest_epoch period id " << id << " realm " << realm_name << " id " << realm_id
+ << " : " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ }
+
+ return read_info();
+}
+
+
+int RGWPeriod::get_zonegroup(RGWZoneGroup& zonegroup,
+ const string& zonegroup_id) const
+{
+ map<string, RGWZoneGroup>::const_iterator iter;
+ if (!zonegroup_id.empty()) {
+ iter = period_map.zonegroups.find(zonegroup_id);
+ } else {
+ iter = period_map.zonegroups.find("default");
+ }
+ if (iter != period_map.zonegroups.end()) {
+ zonegroup = iter->second;
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+const string& RGWPeriod::get_latest_epoch_oid() const
+{
+ if (cct->_conf->rgw_period_latest_epoch_info_oid.empty()) {
+ return period_latest_epoch_info_oid;
+ }
+ return cct->_conf->rgw_period_latest_epoch_info_oid;
+}
+
+const string& RGWPeriod::get_info_oid_prefix() const
+{
+ return period_info_oid_prefix;
+}
+
+const string RGWPeriod::get_period_oid_prefix() const
+{
+ return get_info_oid_prefix() + id;
+}
+
+const string RGWPeriod::get_period_oid() const
+{
+ std::ostringstream oss;
+ oss << get_period_oid_prefix();
+ // skip the epoch for the staging period
+ if (id != get_staging_id(realm_id))
+ oss << "." << epoch;
+ return oss.str();
+}
+
+int RGWPeriod::read_latest_epoch(RGWPeriodLatestEpochInfo& info,
+ RGWObjVersionTracker *objv)
+{
+ string oid = get_period_oid_prefix() + get_latest_epoch_oid();
+
+ rgw_pool pool(get_pool(cct));
+ bufferlist bl;
+ auto obj_ctx = sysobj_svc->init_obj_ctx();
+ auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj{pool, oid});
+ int ret = sysobj.rop().read(&bl);
+ if (ret < 0) {
+ ldout(cct, 1) << "error read_lastest_epoch " << pool << ":" << oid << dendl;
+ return ret;
+ }
+ try {
+ auto iter = bl.cbegin();
+ using ceph::decode;
+ decode(info, iter);
+ } catch (buffer::error& err) {
+ ldout(cct, 0) << "error decoding data from " << pool << ":" << oid << dendl;
+ return -EIO;
+ }
+
+ return 0;
+}
+
+int RGWPeriod::get_latest_epoch(epoch_t& latest_epoch)
+{
+ RGWPeriodLatestEpochInfo info;
+
+ int ret = read_latest_epoch(info);
+ if (ret < 0) {
+ return ret;
+ }
+
+ latest_epoch = info.epoch;
+
+ return 0;
+}
+
+int RGWPeriod::use_latest_epoch()
+{
+ RGWPeriodLatestEpochInfo info;
+ int ret = read_latest_epoch(info);
+ if (ret < 0) {
+ return ret;
+ }
+
+ epoch = info.epoch;
+
+ return 0;
+}
+
+int RGWPeriod::set_latest_epoch(epoch_t epoch, bool exclusive,
+ RGWObjVersionTracker *objv)
+{
+ string oid = get_period_oid_prefix() + get_latest_epoch_oid();
+
+ rgw_pool pool(get_pool(cct));
+ bufferlist bl;
+
+ RGWPeriodLatestEpochInfo info;
+ info.epoch = epoch;
+
+ using ceph::encode;
+ encode(info, bl);
+
+ auto obj_ctx = sysobj_svc->init_obj_ctx();
+ auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj(pool, oid));
+ return sysobj.wop()
+ .set_exclusive(exclusive)
+ .write(bl);
+}
+
+int RGWPeriod::update_latest_epoch(epoch_t epoch)
+{
+ static constexpr int MAX_RETRIES = 20;
+
+ for (int i = 0; i < MAX_RETRIES; i++) {
+ RGWPeriodLatestEpochInfo info;
+ RGWObjVersionTracker objv;
+ bool exclusive = false;
+
+ // read existing epoch
+ int r = read_latest_epoch(info, &objv);
+ if (r == -ENOENT) {
+ // use an exclusive create to set the epoch atomically
+ exclusive = true;
+ ldout(cct, 20) << "creating initial latest_epoch=" << epoch
+ << " for period=" << id << dendl;
+ } else if (r < 0) {
+ ldout(cct, 0) << "ERROR: failed to read latest_epoch" << dendl;
+ return r;
+ } else if (epoch <= info.epoch) {
+ r = -EEXIST; // fail with EEXIST if epoch is not newer
+ ldout(cct, 10) << "found existing latest_epoch " << info.epoch
+ << " >= given epoch " << epoch << ", returning r=" << r << dendl;
+ return r;
+ } else {
+ ldout(cct, 20) << "updating latest_epoch from " << info.epoch
+ << " -> " << epoch << " on period=" << id << dendl;
+ }
+
+ r = set_latest_epoch(epoch, exclusive, &objv);
+ if (r == -EEXIST) {
+ continue; // exclusive create raced with another update, retry
+ } else if (r == -ECANCELED) {
+ continue; // write raced with a conflicting version, retry
+ }
+ if (r < 0) {
+ ldout(cct, 0) << "ERROR: failed to write latest_epoch" << dendl;
+ return r;
+ }
+ return 0; // return success
+ }
+
+ return -ECANCELED; // fail after max retries
+}
+
+int RGWPeriod::delete_obj()
+{
+ rgw_pool pool(get_pool(cct));
+
+ // delete the object for each period epoch
+ for (epoch_t e = 1; e <= epoch; e++) {
+ RGWPeriod p{get_id(), e};
+ rgw_raw_obj oid{pool, p.get_period_oid()};
+ auto obj_ctx = sysobj_svc->init_obj_ctx();
+ auto sysobj = sysobj_svc->get_obj(obj_ctx, oid);
+ int ret = sysobj.wop().remove();
+ if (ret < 0) {
+ ldout(cct, 0) << "WARNING: failed to delete period object " << oid
+ << ": " << cpp_strerror(-ret) << dendl;
+ }
+ }
+
+ // delete the .latest_epoch object
+ rgw_raw_obj oid{pool, get_period_oid_prefix() + get_latest_epoch_oid()};
+ auto obj_ctx = sysobj_svc->init_obj_ctx();
+ auto sysobj = sysobj_svc->get_obj(obj_ctx, oid);
+ int ret = sysobj.wop().remove();
+ if (ret < 0) {
+ ldout(cct, 0) << "WARNING: failed to delete period object " << oid
+ << ": " << cpp_strerror(-ret) << dendl;
+ }
+ return ret;
+}
+
+int RGWPeriod::read_info()
+{
+ rgw_pool pool(get_pool(cct));
+
+ bufferlist bl;
+
+ auto obj_ctx = sysobj_svc->init_obj_ctx();
+ auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj{pool, get_period_oid()});
+ int ret = sysobj.rop().read(&bl);
+ if (ret < 0) {
+ ldout(cct, 0) << "failed reading obj info from " << pool << ":" << get_period_oid() << ": " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ try {
+ using ceph::decode;
+ auto iter = bl.cbegin();
+ decode(*this, iter);
+ } catch (buffer::error& err) {
+ ldout(cct, 0) << "ERROR: failed to decode obj from " << pool << ":" << get_period_oid() << dendl;
+ return -EIO;
+ }
+
+ return 0;
+}
+
+int RGWPeriod::create(bool exclusive)
+{
+ int ret;
+
+ /* create unique id */
+ uuid_d new_uuid;
+ char uuid_str[37];
+ new_uuid.generate_random();
+ new_uuid.print(uuid_str);
+ id = uuid_str;
+
+ epoch = FIRST_EPOCH;
+
+ period_map.id = id;
+
+ ret = store_info(exclusive);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: storing info for " << id << ": " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ ret = set_latest_epoch(epoch);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: setting latest epoch " << id << ": " << cpp_strerror(-ret) << dendl;
+ }
+
+ return ret;
+}
+
+int RGWPeriod::store_info(bool exclusive)
+{
+ rgw_pool pool(get_pool(cct));
+
+ string oid = get_period_oid();
+ bufferlist bl;
+ using ceph::encode;
+ encode(*this, bl);
+
+ auto obj_ctx = sysobj_svc->init_obj_ctx();
+ auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj(pool, oid));
+ return sysobj.wop()
+ .set_exclusive(exclusive)
+ .write(bl);
+}
+
+rgw_pool RGWPeriod::get_pool(CephContext *cct) const
+{
+ if (cct->_conf->rgw_period_root_pool.empty()) {
+ return rgw_pool(RGW_DEFAULT_PERIOD_ROOT_POOL);
+ }
+ return rgw_pool(cct->_conf->rgw_period_root_pool);
+}
+
+int RGWPeriod::add_zonegroup(const RGWZoneGroup& zonegroup)
+{
+ if (zonegroup.realm_id != realm_id) {
+ return 0;
+ }
+ int ret = period_map.update(zonegroup, cct);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: updating period map: " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ return store_info(false);
+}
+
+int RGWPeriod::update()
+{
+ auto zone_svc = sysobj_svc->get_zone_svc();
+ ldout(cct, 20) << __func__ << " realm " << realm_id << " period " << get_id() << dendl;
+ list<string> zonegroups;
+ int ret = zone_svc->list_zonegroups(zonegroups);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: failed to list zonegroups: " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ // clear zone short ids of removed zones. period_map.update() will add the
+ // remaining zones back
+ period_map.short_zone_ids.clear();
+
+ for (auto& iter : zonegroups) {
+ RGWZoneGroup zg(string(), iter);
+ ret = zg.init(cct, sysobj_svc);
+ if (ret < 0) {
+ ldout(cct, 0) << "WARNING: zg.init() failed: " << cpp_strerror(-ret) << dendl;
+ continue;
+ }
+
+ if (zg.realm_id != realm_id) {
+ ldout(cct, 20) << "skipping zonegroup " << zg.get_name() << " zone realm id " << zg.realm_id << ", not on our realm " << realm_id << dendl;
+ continue;
+ }
+
+ if (zg.master_zone.empty()) {
+ ldout(cct, 0) << "ERROR: zonegroup " << zg.get_name() << " should have a master zone " << dendl;
+ return -EINVAL;
+ }
+
+ if (zg.zones.find(zg.master_zone) == zg.zones.end()) {
+ ldout(cct,0) << "ERROR: zonegroup " << zg.get_name()
+ << " has a non existent master zone "<< dendl;
+ return -EINVAL;
+ }
+
+ if (zg.is_master_zonegroup()) {
+ master_zonegroup = zg.get_id();
+ master_zone = zg.master_zone;
+ }
+
+ int ret = period_map.update(zg, cct);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ ret = period_config.read(sysobj_svc, realm_id);
+ if (ret < 0 && ret != -ENOENT) {
+ ldout(cct, 0) << "ERROR: failed to read period config: "
+ << cpp_strerror(ret) << dendl;
+ return ret;
+ }
+ return 0;
+}
+
+int RGWPeriod::reflect()
+{
+ for (auto& iter : period_map.zonegroups) {
+ RGWZoneGroup& zg = iter.second;
+ zg.reinit_instance(cct, sysobj_svc);
+ int r = zg.write(false);
+ if (r < 0) {
+ ldout(cct, 0) << "ERROR: failed to store zonegroup info for zonegroup=" << iter.first << ": " << cpp_strerror(-r) << dendl;
+ return r;
+ }
+ if (zg.is_master_zonegroup()) {
+ // set master as default if no default exists
+ r = zg.set_as_default(true);
+ if (r == 0) {
+ ldout(cct, 1) << "Set the period's master zonegroup " << zg.get_id()
+ << " as the default" << dendl;
+ }
+ }
+ }
+
+ int r = period_config.write(sysobj_svc, realm_id);
+ if (r < 0) {
+ ldout(cct, 0) << "ERROR: failed to store period config: "
+ << cpp_strerror(-r) << dendl;
+ return r;
+ }
+ return 0;
+}
+
+void RGWPeriod::fork()
+{
+ ldout(cct, 20) << __func__ << " realm " << realm_id << " period " << id << dendl;
+ predecessor_uuid = id;
+ id = get_staging_id(realm_id);
+ period_map.reset();
+ realm_epoch++;
+}
+
+static int read_sync_status(RGWRados *store, rgw_meta_sync_status *sync_status)
+{
+ // initialize a sync status manager to read the status
+ RGWMetaSyncStatusManager mgr(store, store->get_async_rados());
+ int r = mgr.init();
+ if (r < 0) {
+ return r;
+ }
+ r = mgr.read_sync_status(sync_status);
+ mgr.stop();
+ return r;
+}
+
+int RGWPeriod::update_sync_status(RGWRados *store, /* for now */
+ const RGWPeriod &current_period,
+ std::ostream& error_stream,
+ bool force_if_stale)
+{
+ rgw_meta_sync_status status;
+ int r = read_sync_status(store, &status);
+ if (r < 0) {
+ ldout(cct, 0) << "period failed to read sync status: "
+ << cpp_strerror(-r) << dendl;
+ return r;
+ }
+
+ std::vector<std::string> markers;
+
+ const auto current_epoch = current_period.get_realm_epoch();
+ if (current_epoch != status.sync_info.realm_epoch) {
+ // no sync status markers for the current period
+ ceph_assert(current_epoch > status.sync_info.realm_epoch);
+ const int behind = current_epoch - status.sync_info.realm_epoch;
+ if (!force_if_stale && current_epoch > 1) {
+ error_stream << "ERROR: This zone is " << behind << " period(s) behind "
+ "the current master zone in metadata sync. If this zone is promoted "
+ "to master, any metadata changes during that time are likely to "
+ "be lost.\n"
+ "Waiting for this zone to catch up on metadata sync (see "
+ "'radosgw-admin sync status') is recommended.\n"
+ "To promote this zone to master anyway, add the flag "
+ "--yes-i-really-mean-it." << std::endl;
+ return -EINVAL;
+ }
+ // empty sync status markers - other zones will skip this period during
+ // incremental metadata sync
+ markers.resize(status.sync_info.num_shards);
+ } else {
+ markers.reserve(status.sync_info.num_shards);
+ for (auto& i : status.sync_markers) {
+ auto& marker = i.second;
+ // filter out markers from other periods
+ if (marker.realm_epoch != current_epoch) {
+ marker.marker.clear();
+ }
+ markers.emplace_back(std::move(marker.marker));
+ }
+ }
+
+ std::swap(sync_status, markers);
+ return 0;
+}
+
+int RGWPeriod::commit(RGWRados *store,
+ RGWRealm& realm, const RGWPeriod& current_period,
+ std::ostream& error_stream, bool force_if_stale)
+{
+ auto zone_svc = sysobj_svc->get_zone_svc();
+ ldout(cct, 20) << __func__ << " realm " << realm.get_id() << " period " << current_period.get_id() << dendl;
+ // gateway must be in the master zone to commit
+ if (master_zone != zone_svc->get_zone_params().get_id()) {
+ error_stream << "Cannot commit period on zone "
+ << zone_svc->get_zone_params().get_id() << ", it must be sent to "
+ "the period's master zone " << master_zone << '.' << std::endl;
+ return -EINVAL;
+ }
+ // period predecessor must match current period
+ if (predecessor_uuid != current_period.get_id()) {
+ error_stream << "Period predecessor " << predecessor_uuid
+ << " does not match current period " << current_period.get_id()
+ << ". Use 'period pull' to get the latest period from the master, "
+ "reapply your changes, and try again." << std::endl;
+ return -EINVAL;
+ }
+ // realm epoch must be 1 greater than current period
+ if (realm_epoch != current_period.get_realm_epoch() + 1) {
+ error_stream << "Period's realm epoch " << realm_epoch
+ << " does not come directly after current realm epoch "
+ << current_period.get_realm_epoch() << ". Use 'realm pull' to get the "
+ "latest realm and period from the master zone, reapply your changes, "
+ "and try again." << std::endl;
+ return -EINVAL;
+ }
+ // did the master zone change?
+ if (master_zone != current_period.get_master_zone()) {
+ // store the current metadata sync status in the period
+ int r = update_sync_status(store, current_period, error_stream, force_if_stale);
+ if (r < 0) {
+ ldout(cct, 0) << "failed to update metadata sync status: "
+ << cpp_strerror(-r) << dendl;
+ return r;
+ }
+ // create an object with a new period id
+ r = create(true);
+ if (r < 0) {
+ ldout(cct, 0) << "failed to create new period: " << cpp_strerror(-r) << dendl;
+ return r;
+ }
+ // set as current period
+ r = realm.set_current_period(*this);
+ if (r < 0) {
+ ldout(cct, 0) << "failed to update realm's current period: "
+ << cpp_strerror(-r) << dendl;
+ return r;
+ }
+ ldout(cct, 4) << "Promoted to master zone and committed new period "
+ << id << dendl;
+ realm.notify_new_period(*this);
+ return 0;
+ }
+ // period must be based on current epoch
+ if (epoch != current_period.get_epoch()) {
+ error_stream << "Period epoch " << epoch << " does not match "
+ "predecessor epoch " << current_period.get_epoch()
+ << ". Use 'period pull' to get the latest epoch from the master zone, "
+ "reapply your changes, and try again." << std::endl;
+ return -EINVAL;
+ }
+ // set period as next epoch
+ set_id(current_period.get_id());
+ set_epoch(current_period.get_epoch() + 1);
+ set_predecessor(current_period.get_predecessor());
+ realm_epoch = current_period.get_realm_epoch();
+ // write the period to rados
+ int r = store_info(false);
+ if (r < 0) {
+ ldout(cct, 0) << "failed to store period: " << cpp_strerror(-r) << dendl;
+ return r;
+ }
+ // set as latest epoch
+ r = update_latest_epoch(epoch);
+ if (r == -EEXIST) {
+ // already have this epoch (or a more recent one)
+ return 0;
+ }
+ if (r < 0) {
+ ldout(cct, 0) << "failed to set latest epoch: " << cpp_strerror(-r) << dendl;
+ return r;
+ }
+ r = reflect();
+ if (r < 0) {
+ ldout(cct, 0) << "failed to update local objects: " << cpp_strerror(-r) << dendl;
+ return r;
+ }
+ ldout(cct, 4) << "Committed new epoch " << epoch
+ << " for period " << id << dendl;
+ realm.notify_new_period(*this);
+ return 0;
+}
+
+int RGWZoneParams::create_default(bool old_format)
+{
+ name = default_zone_name;
+
+ int r = create();
+ if (r < 0) {
+ return r;
+ }
+
+ if (old_format) {
+ name = id;
+ }
+
+ return r;
+}
+
+
+int get_zones_pool_set(CephContext* cct,
+ RGWSI_SysObj* sysobj_svc,
+ const list<string>& zones,
+ const string& my_zone_id,
+ set<rgw_pool>& pool_names)
+{
+ for(auto const& iter : zones) {
+ RGWZoneParams zone(iter);
+ int r = zone.init(cct, sysobj_svc);
+ if (r < 0) {
+ ldout(cct, 0) << "Error: init zone " << iter << ":" << cpp_strerror(-r) << dendl;
+ return r;
+ }
+ if (zone.get_id() != my_zone_id) {
+ pool_names.insert(zone.domain_root);
+ pool_names.insert(zone.metadata_heap);
+ pool_names.insert(zone.control_pool);
+ pool_names.insert(zone.gc_pool);
+ pool_names.insert(zone.log_pool);
+ pool_names.insert(zone.intent_log_pool);
+ pool_names.insert(zone.usage_log_pool);
+ pool_names.insert(zone.user_keys_pool);
+ pool_names.insert(zone.user_email_pool);
+ pool_names.insert(zone.user_swift_pool);
+ pool_names.insert(zone.user_uid_pool);
+ pool_names.insert(zone.otp_pool);
+ pool_names.insert(zone.roles_pool);
+ pool_names.insert(zone.reshard_pool);
+ for(auto& iter : zone.placement_pools) {
+ pool_names.insert(iter.second.index_pool);
+ for (auto& pi : iter.second.storage_classes.get_all()) {
+ if (pi.second.data_pool) {
+ pool_names.insert(pi.second.data_pool.get());
+ }
+ }
+ pool_names.insert(iter.second.data_extra_pool);
+ }
+ }
+ }
+ return 0;
+}
+
+rgw_pool fix_zone_pool_dup(set<rgw_pool> pools,
+ const string& default_prefix,
+ const string& default_suffix,
+ const rgw_pool& suggested_pool)
+{
+ string suggested_name = suggested_pool.to_str();
+
+ string prefix = default_prefix;
+ string suffix = default_suffix;
+
+ if (!suggested_pool.empty()) {
+ prefix = suggested_name.substr(0, suggested_name.find("."));
+ suffix = suggested_name.substr(prefix.length());
+ }
+
+ rgw_pool pool(prefix + suffix);
+
+ if (pools.find(pool) == pools.end()) {
+ return pool;
+ } else {
+ while(true) {
+ pool = prefix + "_" + std::to_string(std::rand()) + suffix;
+ if (pools.find(pool) == pools.end()) {
+ return pool;
+ }
+ }
+ }
+}
+
+int RGWZoneParams::fix_pool_names()
+{
+
+ list<string> zones;
+ int r = zone_svc->list_zones(zones);
+ if (r < 0) {
+ ldout(cct, 10) << "WARNING: store->list_zones() returned r=" << r << dendl;
+ }
+
+ set<rgw_pool> pools;
+ r = get_zones_pool_set(cct, sysobj_svc, zones, id, pools);
+ if (r < 0) {
+ ldout(cct, 0) << "Error: get_zones_pool_names" << r << dendl;
+ return r;
+ }
+
+ domain_root = fix_zone_pool_dup(pools, name, ".rgw.meta:root", domain_root);
+ if (!metadata_heap.name.empty()) {
+ metadata_heap = fix_zone_pool_dup(pools, name, ".rgw.meta:heap", metadata_heap);
+ }
+ control_pool = fix_zone_pool_dup(pools, name, ".rgw.control", control_pool);
+ gc_pool = fix_zone_pool_dup(pools, name ,".rgw.log:gc", gc_pool);
+ lc_pool = fix_zone_pool_dup(pools, name ,".rgw.log:lc", lc_pool);
+ log_pool = fix_zone_pool_dup(pools, name, ".rgw.log", log_pool);
+ intent_log_pool = fix_zone_pool_dup(pools, name, ".rgw.log:intent", intent_log_pool);
+ usage_log_pool = fix_zone_pool_dup(pools, name, ".rgw.log:usage", usage_log_pool);
+ user_keys_pool = fix_zone_pool_dup(pools, name, ".rgw.meta:users.keys", user_keys_pool);
+ user_email_pool = fix_zone_pool_dup(pools, name, ".rgw.meta:users.email", user_email_pool);
+ user_swift_pool = fix_zone_pool_dup(pools, name, ".rgw.meta:users.swift", user_swift_pool);
+ user_uid_pool = fix_zone_pool_dup(pools, name, ".rgw.meta:users.uid", user_uid_pool);
+ roles_pool = fix_zone_pool_dup(pools, name, ".rgw.meta:roles", roles_pool);
+ reshard_pool = fix_zone_pool_dup(pools, name, ".rgw.log:reshard", reshard_pool);
+ otp_pool = fix_zone_pool_dup(pools, name, ".rgw.otp", otp_pool);
+
+ for(auto& iter : placement_pools) {
+ iter.second.index_pool = fix_zone_pool_dup(pools, name, "." + default_bucket_index_pool_suffix,
+ iter.second.index_pool);
+ for (auto& pi : iter.second.storage_classes.get_all()) {
+ if (pi.second.data_pool) {
+ rgw_pool& pool = pi.second.data_pool.get();
+ pool = fix_zone_pool_dup(pools, name, "." + default_storage_pool_suffix,
+ pool);
+ }
+ }
+ iter.second.data_extra_pool= fix_zone_pool_dup(pools, name, "." + default_storage_extra_pool_suffix,
+ iter.second.data_extra_pool);
+ }
+
+ return 0;
+}
+
+int RGWZoneParams::create(bool exclusive)
+{
+ /* check for old pools config */
+ rgw_raw_obj obj(domain_root, avail_pools);
+ auto obj_ctx = sysobj_svc->init_obj_ctx();
+ auto sysobj = sysobj_svc->get_obj(obj_ctx, obj);
+ int r = sysobj.rop().stat();
+ if (r < 0) {
+ ldout(cct, 10) << "couldn't find old data placement pools config, setting up new ones for the zone" << dendl;
+ /* a new system, let's set new placement info */
+ RGWZonePlacementInfo default_placement;
+ default_placement.index_pool = name + "." + default_bucket_index_pool_suffix;
+ rgw_pool pool = name + "." + default_storage_pool_suffix;
+ default_placement.storage_classes.set_storage_class(RGW_STORAGE_CLASS_STANDARD, &pool, nullptr);
+ default_placement.data_extra_pool = name + "." + default_storage_extra_pool_suffix;
+ placement_pools["default-placement"] = default_placement;
+ }
+
+ r = fix_pool_names();
+ if (r < 0) {
+ ldout(cct, 0) << "ERROR: fix_pool_names returned r=" << r << dendl;
+ return r;
+ }
+
+ r = RGWSystemMetaObj::create(exclusive);
+ if (r < 0) {
+ return r;
+ }
+
+ // try to set as default. may race with another create, so pass exclusive=true
+ // so we don't override an existing default
+ r = set_as_default(true);
+ if (r < 0 && r != -EEXIST) {
+ ldout(cct, 10) << "WARNING: failed to set zone as default, r=" << r << dendl;
+ }
+
+ return 0;
+}
+
+rgw_pool RGWZoneParams::get_pool(CephContext *cct) const
+{
+ if (cct->_conf->rgw_zone_root_pool.empty()) {
+ return rgw_pool(RGW_DEFAULT_ZONE_ROOT_POOL);
+ }
+
+ return rgw_pool(cct->_conf->rgw_zone_root_pool);
+}
+
+const string RGWZoneParams::get_default_oid(bool old_format) const
+{
+ if (old_format) {
+ return cct->_conf->rgw_default_zone_info_oid;
+ }
+
+ return cct->_conf->rgw_default_zone_info_oid + "." + realm_id;
+}
+
+const string& RGWZoneParams::get_names_oid_prefix() const
+{
+ return zone_names_oid_prefix;
+}
+
+const string& RGWZoneParams::get_info_oid_prefix(bool old_format) const
+{
+ return zone_info_oid_prefix;
+}
+
+const string& RGWZoneParams::get_predefined_name(CephContext *cct) const {
+ return cct->_conf->rgw_zone;
+}
+
+int RGWZoneParams::init(CephContext *cct, RGWSI_SysObj *sysobj_svc, bool setup_obj, bool old_format)
+{
+ if (name.empty()) {
+ name = cct->_conf->rgw_zone;
+ }
+
+ return RGWSystemMetaObj::init(cct, sysobj_svc, setup_obj, old_format);
+}
+
+int RGWZoneParams::read_default_id(string& default_id, bool old_format)
+{
+ if (realm_id.empty()) {
+ /* try using default realm */
+ RGWRealm realm;
+ int ret = realm.init(cct, sysobj_svc);
+ //no default realm exist
+ if (ret < 0) {
+ return read_id(default_zone_name, default_id);
+ }
+ realm_id = realm.get_id();
+ }
+
+ return RGWSystemMetaObj::read_default_id(default_id, old_format);
+}
+
+
+int RGWZoneParams::set_as_default(bool exclusive)
+{
+ if (realm_id.empty()) {
+ /* try using default realm */
+ RGWRealm realm;
+ int ret = realm.init(cct, sysobj_svc);
+ if (ret < 0) {
+ ldout(cct, 10) << "could not read realm id: " << cpp_strerror(-ret) << dendl;
+ return -EINVAL;
+ }
+ realm_id = realm.get_id();
+ }
+
+ return RGWSystemMetaObj::set_as_default(exclusive);
+}
+
+const string& RGWZoneParams::get_compression_type(const rgw_placement_rule& placement_rule) const
+{
+ static const std::string NONE{"none"};
+ auto p = placement_pools.find(placement_rule.name);
+ if (p == placement_pools.end()) {
+ return NONE;
+ }
+ const auto& type = p->second.get_compression_type(placement_rule.get_storage_class());
+ return !type.empty() ? type : NONE;
+}
+
+void RGWPeriodMap::encode(bufferlist& bl) const {
+ ENCODE_START(2, 1, bl);
+ encode(id, bl);
+ encode(zonegroups, bl);
+ encode(master_zonegroup, bl);
+ encode(short_zone_ids, bl);
+ ENCODE_FINISH(bl);
+}
+
+void RGWPeriodMap::decode(bufferlist::const_iterator& bl) {
+ DECODE_START(2, bl);
+ decode(id, bl);
+ decode(zonegroups, bl);
+ decode(master_zonegroup, bl);
+ if (struct_v >= 2) {
+ decode(short_zone_ids, bl);
+ }
+ DECODE_FINISH(bl);
+
+ zonegroups_by_api.clear();
+ for (map<string, RGWZoneGroup>::iterator iter = zonegroups.begin();
+ iter != zonegroups.end(); ++iter) {
+ RGWZoneGroup& zonegroup = iter->second;
+ zonegroups_by_api[zonegroup.api_name] = zonegroup;
+ if (zonegroup.is_master_zonegroup()) {
+ master_zonegroup = zonegroup.get_id();
+ }
+ }
+}
+
+// run an MD5 hash on the zone_id and return the first 32 bits
+static uint32_t gen_short_zone_id(const std::string zone_id)
+{
+ unsigned char md5[CEPH_CRYPTO_MD5_DIGESTSIZE];
+ MD5 hash;
+ hash.Update((const unsigned char *)zone_id.c_str(), zone_id.size());
+ hash.Final(md5);
+
+ uint32_t short_id;
+ memcpy((char *)&short_id, md5, sizeof(short_id));
+ return std::max(short_id, 1u);
+}
+
+int RGWPeriodMap::update(const RGWZoneGroup& zonegroup, CephContext *cct)
+{
+ if (zonegroup.is_master_zonegroup() && (!master_zonegroup.empty() && zonegroup.get_id() != master_zonegroup)) {
+ ldout(cct,0) << "Error updating periodmap, multiple master zonegroups configured "<< dendl;
+ ldout(cct,0) << "master zonegroup: " << master_zonegroup << " and " << zonegroup.get_id() <<dendl;
+ return -EINVAL;
+ }
+ map<string, RGWZoneGroup>::iterator iter = zonegroups.find(zonegroup.get_id());
+ if (iter != zonegroups.end()) {
+ RGWZoneGroup& old_zonegroup = iter->second;
+ if (!old_zonegroup.api_name.empty()) {
+ zonegroups_by_api.erase(old_zonegroup.api_name);
+ }
+ }
+ zonegroups[zonegroup.get_id()] = zonegroup;
+
+ if (!zonegroup.api_name.empty()) {
+ zonegroups_by_api[zonegroup.api_name] = zonegroup;
+ }
+
+ if (zonegroup.is_master_zonegroup()) {
+ master_zonegroup = zonegroup.get_id();
+ } else if (master_zonegroup == zonegroup.get_id()) {
+ master_zonegroup = "";
+ }
+
+ for (auto& i : zonegroup.zones) {
+ auto& zone = i.second;
+ if (short_zone_ids.find(zone.id) != short_zone_ids.end()) {
+ continue;
+ }
+ // calculate the zone's short id
+ uint32_t short_id = gen_short_zone_id(zone.id);
+
+ // search for an existing zone with the same short id
+ for (auto& s : short_zone_ids) {
+ if (s.second == short_id) {
+ ldout(cct, 0) << "New zone '" << zone.name << "' (" << zone.id
+ << ") generates the same short_zone_id " << short_id
+ << " as existing zone id " << s.first << dendl;
+ return -EEXIST;
+ }
+ }
+
+ short_zone_ids[zone.id] = short_id;
+ }
+
+ return 0;
+}
+
+uint32_t RGWPeriodMap::get_zone_short_id(const string& zone_id) const
+{
+ auto i = short_zone_ids.find(zone_id);
+ if (i == short_zone_ids.end()) {
+ return 0;
+ }
+ return i->second;
+}
+
+int RGWZoneGroupMap::read(CephContext *cct, RGWSI_SysObj *sysobj_svc)
+{
+
+ RGWPeriod period;
+ int ret = period.init(cct, sysobj_svc);
+ if (ret < 0) {
+ cerr << "failed to read current period info: " << cpp_strerror(ret);
+ return ret;
+ }
+
+ bucket_quota = period.get_config().bucket_quota;
+ user_quota = period.get_config().user_quota;
+ zonegroups = period.get_map().zonegroups;
+ zonegroups_by_api = period.get_map().zonegroups_by_api;
+ master_zonegroup = period.get_map().master_zonegroup;
+
+ return 0;
+}
+
+void RGWRegionMap::encode(bufferlist& bl) const {
+ ENCODE_START( 3, 1, bl);
+ encode(regions, bl);
+ encode(master_region, bl);
+ encode(bucket_quota, bl);
+ encode(user_quota, bl);
+ ENCODE_FINISH(bl);
+}
+
+void RGWRegionMap::decode(bufferlist::const_iterator& bl) {
+ DECODE_START(3, bl);
+ decode(regions, bl);
+ decode(master_region, bl);
+ if (struct_v >= 2)
+ decode(bucket_quota, bl);
+ if (struct_v >= 3)
+ decode(user_quota, bl);
+ DECODE_FINISH(bl);
+}
+
+void RGWZoneGroupMap::encode(bufferlist& bl) const {
+ ENCODE_START( 3, 1, bl);
+ encode(zonegroups, bl);
+ encode(master_zonegroup, bl);
+ encode(bucket_quota, bl);
+ encode(user_quota, bl);
+ ENCODE_FINISH(bl);
+}
+
+void RGWZoneGroupMap::decode(bufferlist::const_iterator& bl) {
+ DECODE_START(3, bl);
+ decode(zonegroups, bl);
+ decode(master_zonegroup, bl);
+ if (struct_v >= 2)
+ decode(bucket_quota, bl);
+ if (struct_v >= 3)
+ decode(user_quota, bl);
+ DECODE_FINISH(bl);
+
+ zonegroups_by_api.clear();
+ for (map<string, RGWZoneGroup>::iterator iter = zonegroups.begin();
+ iter != zonegroups.end(); ++iter) {
+ RGWZoneGroup& zonegroup = iter->second;
+ zonegroups_by_api[zonegroup.api_name] = zonegroup;
+ if (zonegroup.is_master_zonegroup()) {
+ master_zonegroup = zonegroup.get_name();
+ }
+ }
+}
+
+
diff --git a/src/rgw/rgw_zone.h b/src/rgw/rgw_zone.h
new file mode 100644
index 00000000..89f635a5
--- /dev/null
+++ b/src/rgw/rgw_zone.h
@@ -0,0 +1,1145 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RGW_ZONE_H
+#define CEPH_RGW_ZONE_H
+
+#include "rgw_common.h"
+
+namespace rgw_zone_defaults {
+
+extern std::string zone_names_oid_prefix;
+extern std::string region_info_oid_prefix;
+extern std::string realm_names_oid_prefix;
+extern std::string zone_group_info_oid_prefix;
+extern std::string realm_info_oid_prefix;
+extern std::string default_region_info_oid;
+extern std::string default_zone_group_info_oid;
+extern std::string region_map_oid;
+extern std::string default_realm_info_oid;
+extern std::string default_zonegroup_name;
+extern std::string default_zone_name;
+extern std::string zonegroup_names_oid_prefix;
+extern std::string RGW_DEFAULT_ZONE_ROOT_POOL;
+extern std::string RGW_DEFAULT_ZONEGROUP_ROOT_POOL;
+extern std::string RGW_DEFAULT_REALM_ROOT_POOL;
+extern std::string RGW_DEFAULT_PERIOD_ROOT_POOL;
+extern std::string avail_pools;
+extern std::string default_storage_pool_suffix;
+
+}
+
+class JSONObj;
+class RGWSyncModulesManager;
+
+struct RGWNameToId {
+ std::string obj_id;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(obj_id, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(obj_id, bl);
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+};
+WRITE_CLASS_ENCODER(RGWNameToId)
+
+struct RGWDefaultSystemMetaObjInfo {
+ std::string default_id;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(default_id, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(default_id, bl);
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+};
+WRITE_CLASS_ENCODER(RGWDefaultSystemMetaObjInfo)
+
+class RGWSI_SysObj;
+class RGWSI_Zone;
+
+class RGWSystemMetaObj {
+protected:
+ std::string id;
+ std::string name;
+
+ CephContext *cct{nullptr};
+ RGWSI_SysObj *sysobj_svc{nullptr};
+ RGWSI_Zone *zone_svc{nullptr};
+
+ int store_name(bool exclusive);
+ int store_info(bool exclusive);
+ int read_info(const std::string& obj_id, bool old_format = false);
+ int read_id(const std::string& obj_name, std::string& obj_id);
+ int read_default(RGWDefaultSystemMetaObjInfo& default_info,
+ const std::string& oid);
+ /* read and use default id */
+ int use_default(bool old_format = false);
+
+public:
+ RGWSystemMetaObj() {}
+ RGWSystemMetaObj(const std::string& _name): name(_name) {}
+ RGWSystemMetaObj(const std::string& _id, const std::string& _name) : id(_id), name(_name) {}
+ RGWSystemMetaObj(CephContext *_cct, RGWSI_SysObj *_sysobj_svc) {
+ reinit_instance(_cct, _sysobj_svc);
+ }
+ RGWSystemMetaObj(const std::string& _name, CephContext *_cct, RGWSI_SysObj *_sysobj_svc): name(_name) {
+ reinit_instance(_cct, _sysobj_svc);
+ }
+
+ const std::string& get_name() const { return name; }
+ const std::string& get_id() const { return id; }
+
+ void set_name(const std::string& _name) { name = _name;}
+ void set_id(const std::string& _id) { id = _id;}
+ void clear_id() { id.clear(); }
+
+ virtual ~RGWSystemMetaObj() {}
+
+ virtual void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(id, bl);
+ encode(name, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ virtual void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(id, bl);
+ decode(name, bl);
+ DECODE_FINISH(bl);
+ }
+
+ void reinit_instance(CephContext *_cct, RGWSI_SysObj *_sysobj_svc);
+ int init(CephContext *_cct, RGWSI_SysObj *_sysobj_svc, bool setup_obj = true, bool old_format = false);
+ virtual int read_default_id(std::string& default_id, bool old_format = false);
+ virtual int set_as_default(bool exclusive = false);
+ int delete_default();
+ virtual int create(bool exclusive = true);
+ int delete_obj(bool old_format = false);
+ int rename(const std::string& new_name);
+ int update() { return store_info(false);}
+ int update_name() { return store_name(false);}
+ int read();
+ int write(bool exclusive);
+
+ virtual rgw_pool get_pool(CephContext *cct) const = 0;
+ virtual const std::string get_default_oid(bool old_format = false) const = 0;
+ virtual const std::string& get_names_oid_prefix() const = 0;
+ virtual const std::string& get_info_oid_prefix(bool old_format = false) const = 0;
+ virtual const std::string& get_predefined_name(CephContext *cct) const = 0;
+
+ void dump(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+};
+WRITE_CLASS_ENCODER(RGWSystemMetaObj)
+
+struct RGWZoneStorageClass {
+ boost::optional<rgw_pool> data_pool;
+ boost::optional<std::string> compression_type;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(data_pool, bl);
+ encode(compression_type, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(data_pool, bl);
+ decode(compression_type, bl);
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+};
+WRITE_CLASS_ENCODER(RGWZoneStorageClass)
+
+
+class RGWZoneStorageClasses {
+ map<string, RGWZoneStorageClass> m;
+
+ /* in memory only */
+ RGWZoneStorageClass *standard_class;
+
+public:
+ RGWZoneStorageClasses() {
+ standard_class = &m[RGW_STORAGE_CLASS_STANDARD];
+ }
+ RGWZoneStorageClasses(const RGWZoneStorageClasses& rhs) {
+ m = rhs.m;
+ standard_class = &m[RGW_STORAGE_CLASS_STANDARD];
+ }
+ RGWZoneStorageClasses& operator=(const RGWZoneStorageClasses& rhs) {
+ m = rhs.m;
+ standard_class = &m[RGW_STORAGE_CLASS_STANDARD];
+ return *this;
+ }
+
+ const RGWZoneStorageClass& get_standard() const {
+ return *standard_class;
+ }
+
+ bool find(const string& sc, const RGWZoneStorageClass **pstorage_class) const {
+ auto iter = m.find(sc);
+ if (iter == m.end()) {
+ return false;
+ }
+ *pstorage_class = &iter->second;
+ return true;
+ }
+
+ bool exists(const string& sc) const {
+ if (sc.empty()) {
+ return true;
+ }
+ auto iter = m.find(sc);
+ return (iter != m.end());
+ }
+
+ const map<string, RGWZoneStorageClass>& get_all() const {
+ return m;
+ }
+
+ map<string, RGWZoneStorageClass>& get_all() {
+ return m;
+ }
+
+ void set_storage_class(const string& sc, const rgw_pool *data_pool, const string *compression_type) {
+ const string *psc = &sc;
+ if (sc.empty()) {
+ psc = &RGW_STORAGE_CLASS_STANDARD;
+ }
+ RGWZoneStorageClass& storage_class = m[*psc];
+ if (data_pool) {
+ storage_class.data_pool = *data_pool;
+ }
+ if (compression_type) {
+ storage_class.compression_type = *compression_type;
+ }
+ }
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(m, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(m, bl);
+ standard_class = &m[RGW_STORAGE_CLASS_STANDARD];
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+};
+WRITE_CLASS_ENCODER(RGWZoneStorageClasses)
+
+struct RGWZonePlacementInfo {
+ rgw_pool index_pool;
+ rgw_pool data_extra_pool; /* if not set we should use data_pool */
+ RGWZoneStorageClasses storage_classes;
+ RGWBucketIndexType index_type;
+
+ RGWZonePlacementInfo() : index_type(RGWBIType_Normal) {}
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(7, 1, bl);
+ encode(index_pool.to_str(), bl);
+ rgw_pool standard_data_pool = get_data_pool(RGW_STORAGE_CLASS_STANDARD);
+ encode(standard_data_pool.to_str(), bl);
+ encode(data_extra_pool.to_str(), bl);
+ encode((uint32_t)index_type, bl);
+ string standard_compression_type = get_compression_type(RGW_STORAGE_CLASS_STANDARD);
+ encode(standard_compression_type, bl);
+ encode(storage_classes, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(7, bl);
+ string index_pool_str;
+ string data_pool_str;
+ decode(index_pool_str, bl);
+ index_pool = rgw_pool(index_pool_str);
+ decode(data_pool_str, bl);
+ rgw_pool standard_data_pool(data_pool_str);
+ if (struct_v >= 4) {
+ string data_extra_pool_str;
+ decode(data_extra_pool_str, bl);
+ data_extra_pool = rgw_pool(data_extra_pool_str);
+ }
+ if (struct_v >= 5) {
+ uint32_t it;
+ decode(it, bl);
+ index_type = (RGWBucketIndexType)it;
+ }
+ string standard_compression_type;
+ if (struct_v >= 6) {
+ decode(standard_compression_type, bl);
+ }
+ if (struct_v >= 7) {
+ decode(storage_classes, bl);
+ } else {
+ storage_classes.set_storage_class(RGW_STORAGE_CLASS_STANDARD, &standard_data_pool,
+ (!standard_compression_type.empty() ? &standard_compression_type : nullptr));
+ }
+ DECODE_FINISH(bl);
+ }
+ const rgw_pool& get_data_extra_pool() const {
+ static rgw_pool no_pool;
+ if (data_extra_pool.empty()) {
+ return storage_classes.get_standard().data_pool.get_value_or(no_pool);
+ }
+ return data_extra_pool;
+ }
+ const rgw_pool& get_data_pool(const string& sc) const {
+ const RGWZoneStorageClass *storage_class;
+ static rgw_pool no_pool;
+
+ if (!storage_classes.find(sc, &storage_class)) {
+ return storage_classes.get_standard().data_pool.get_value_or(no_pool);
+ }
+
+ return storage_class->data_pool.get_value_or(no_pool);
+ }
+ const rgw_pool& get_standard_data_pool() const {
+ return get_data_pool(RGW_STORAGE_CLASS_STANDARD);
+ }
+
+ const string& get_compression_type(const string& sc) const {
+ const RGWZoneStorageClass *storage_class;
+ static string no_compression;
+
+ if (!storage_classes.find(sc, &storage_class)) {
+ return no_compression;
+ }
+ return storage_class->compression_type.get_value_or(no_compression);
+ }
+
+ bool storage_class_exists(const string& sc) const {
+ return storage_classes.exists(sc);
+ }
+
+ void dump(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+
+};
+WRITE_CLASS_ENCODER(RGWZonePlacementInfo)
+
+struct RGWZoneParams : RGWSystemMetaObj {
+ rgw_pool domain_root;
+ rgw_pool metadata_heap;
+ rgw_pool control_pool;
+ rgw_pool gc_pool;
+ rgw_pool lc_pool;
+ rgw_pool log_pool;
+ rgw_pool intent_log_pool;
+ rgw_pool usage_log_pool;
+
+ rgw_pool user_keys_pool;
+ rgw_pool user_email_pool;
+ rgw_pool user_swift_pool;
+ rgw_pool user_uid_pool;
+ rgw_pool roles_pool;
+ rgw_pool reshard_pool;
+ rgw_pool otp_pool;
+
+ RGWAccessKey system_key;
+
+ map<std::string, RGWZonePlacementInfo> placement_pools;
+
+ std::string realm_id;
+
+ JSONFormattable tier_config;
+
+ RGWZoneParams() : RGWSystemMetaObj() {}
+ explicit RGWZoneParams(const std::string& name) : RGWSystemMetaObj(name){}
+ RGWZoneParams(const std::string& id, const std::string& name) : RGWSystemMetaObj(id, name) {}
+ RGWZoneParams(const std::string& id, const std::string& name, const std::string& _realm_id)
+ : RGWSystemMetaObj(id, name), realm_id(_realm_id) {}
+
+ rgw_pool get_pool(CephContext *cct) const override;
+ const std::string get_default_oid(bool old_format = false) const override;
+ const std::string& get_names_oid_prefix() const override;
+ const std::string& get_info_oid_prefix(bool old_format = false) const override;
+ const std::string& get_predefined_name(CephContext *cct) const override;
+
+ int init(CephContext *_cct, RGWSI_SysObj *_sysobj_svc, bool setup_obj = true,
+ bool old_format = false);
+ using RGWSystemMetaObj::init;
+ int read_default_id(std::string& default_id, bool old_format = false) override;
+ int set_as_default(bool exclusive = false) override;
+ int create_default(bool old_format = false);
+ int create(bool exclusive = true) override;
+ int fix_pool_names();
+
+ const string& get_compression_type(const rgw_placement_rule& placement_rule) const;
+
+ void encode(bufferlist& bl) const override {
+ ENCODE_START(12, 1, bl);
+ encode(domain_root, bl);
+ encode(control_pool, bl);
+ encode(gc_pool, bl);
+ encode(log_pool, bl);
+ encode(intent_log_pool, bl);
+ encode(usage_log_pool, bl);
+ encode(user_keys_pool, bl);
+ encode(user_email_pool, bl);
+ encode(user_swift_pool, bl);
+ encode(user_uid_pool, bl);
+ RGWSystemMetaObj::encode(bl);
+ encode(system_key, bl);
+ encode(placement_pools, bl);
+ encode(metadata_heap, bl);
+ encode(realm_id, bl);
+ encode(lc_pool, bl);
+ map<std::string, std::string, ltstr_nocase> old_tier_config;
+ encode(old_tier_config, bl);
+ encode(roles_pool, bl);
+ encode(reshard_pool, bl);
+ encode(otp_pool, bl);
+ encode(tier_config, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) override {
+ DECODE_START(12, bl);
+ decode(domain_root, bl);
+ decode(control_pool, bl);
+ decode(gc_pool, bl);
+ decode(log_pool, bl);
+ decode(intent_log_pool, bl);
+ decode(usage_log_pool, bl);
+ decode(user_keys_pool, bl);
+ decode(user_email_pool, bl);
+ decode(user_swift_pool, bl);
+ decode(user_uid_pool, bl);
+ if (struct_v >= 6) {
+ RGWSystemMetaObj::decode(bl);
+ } else if (struct_v >= 2) {
+ decode(name, bl);
+ id = name;
+ }
+ if (struct_v >= 3)
+ decode(system_key, bl);
+ if (struct_v >= 4)
+ decode(placement_pools, bl);
+ if (struct_v >= 5)
+ decode(metadata_heap, bl);
+ if (struct_v >= 6) {
+ decode(realm_id, bl);
+ }
+ if (struct_v >= 7) {
+ decode(lc_pool, bl);
+ } else {
+ lc_pool = log_pool.name + ":lc";
+ }
+ map<std::string, std::string, ltstr_nocase> old_tier_config;
+ if (struct_v >= 8) {
+ decode(old_tier_config, bl);
+ }
+ if (struct_v >= 9) {
+ decode(roles_pool, bl);
+ } else {
+ roles_pool = name + ".rgw.meta:roles";
+ }
+ if (struct_v >= 10) {
+ decode(reshard_pool, bl);
+ } else {
+ reshard_pool = log_pool.name + ":reshard";
+ }
+ if (struct_v >= 11) {
+ ::decode(otp_pool, bl);
+ } else {
+ otp_pool = name + ".rgw.otp";
+ }
+ if (struct_v >= 12) {
+ ::decode(tier_config, bl);
+ } else {
+ for (auto& kv : old_tier_config) {
+ tier_config.set(kv.first, kv.second);
+ }
+ }
+ DECODE_FINISH(bl);
+ }
+ void dump(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+ static void generate_test_instances(list<RGWZoneParams*>& o);
+
+ bool get_placement(const std::string& placement_id, RGWZonePlacementInfo *placement) const {
+ auto iter = placement_pools.find(placement_id);
+ if (iter == placement_pools.end()) {
+ return false;
+ }
+ *placement = iter->second;
+ return true;
+ }
+
+ /*
+ * return data pool of the head object
+ */
+ bool get_head_data_pool(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_pool *pool) const {
+ const rgw_data_placement_target& explicit_placement = obj.bucket.explicit_placement;
+ if (!explicit_placement.data_pool.empty()) {
+ if (!obj.in_extra_data) {
+ *pool = explicit_placement.data_pool;
+ } else {
+ *pool = explicit_placement.get_data_extra_pool();
+ }
+ return true;
+ }
+ if (placement_rule.empty()) {
+ return false;
+ }
+ auto iter = placement_pools.find(placement_rule.name);
+ if (iter == placement_pools.end()) {
+ return false;
+ }
+ if (!obj.in_extra_data) {
+ *pool = iter->second.get_data_pool(placement_rule.storage_class);
+ } else {
+ *pool = iter->second.get_data_extra_pool();
+ }
+ return true;
+ }
+
+ bool valid_placement(const rgw_placement_rule& rule) const {
+ auto iter = placement_pools.find(rule.name);
+ if (iter == placement_pools.end()) {
+ return false;
+ }
+ return iter->second.storage_class_exists(rule.storage_class);
+ }
+};
+WRITE_CLASS_ENCODER(RGWZoneParams)
+
+struct RGWZone {
+ std::string id;
+ std::string name;
+ list<std::string> endpoints;
+ bool log_meta;
+ bool log_data;
+ bool read_only;
+ std::string tier_type;
+
+ std::string redirect_zone;
+
+/**
+ * Represents the number of shards for the bucket index object, a value of zero
+ * indicates there is no sharding. By default (no sharding, the name of the object
+ * is '.dir.{marker}', with sharding, the name is '.dir.{marker}.{sharding_id}',
+ * sharding_id is zero-based value. It is not recommended to set a too large value
+ * (e.g. thousand) as it increases the cost for bucket listing.
+ */
+ uint32_t bucket_index_max_shards;
+
+ bool sync_from_all;
+ set<std::string> sync_from; /* list of zones to sync from */
+
+ RGWZone() : log_meta(false), log_data(false), read_only(false), bucket_index_max_shards(0),
+ sync_from_all(true) {}
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(7, 1, bl);
+ encode(name, bl);
+ encode(endpoints, bl);
+ encode(log_meta, bl);
+ encode(log_data, bl);
+ encode(bucket_index_max_shards, bl);
+ encode(id, bl);
+ encode(read_only, bl);
+ encode(tier_type, bl);
+ encode(sync_from_all, bl);
+ encode(sync_from, bl);
+ encode(redirect_zone, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(7, bl);
+ decode(name, bl);
+ if (struct_v < 4) {
+ id = name;
+ }
+ decode(endpoints, bl);
+ if (struct_v >= 2) {
+ decode(log_meta, bl);
+ decode(log_data, bl);
+ }
+ if (struct_v >= 3) {
+ decode(bucket_index_max_shards, bl);
+ }
+ if (struct_v >= 4) {
+ decode(id, bl);
+ decode(read_only, bl);
+ }
+ if (struct_v >= 5) {
+ decode(tier_type, bl);
+ }
+ if (struct_v >= 6) {
+ decode(sync_from_all, bl);
+ decode(sync_from, bl);
+ }
+ if (struct_v >= 7) {
+ decode(redirect_zone, bl);
+ }
+ DECODE_FINISH(bl);
+ }
+ void dump(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+ static void generate_test_instances(list<RGWZone*>& o);
+
+ bool is_read_only() const { return read_only; }
+
+ bool syncs_from(const std::string& zone_name) const {
+ return (sync_from_all || sync_from.find(zone_name) != sync_from.end());
+ }
+};
+WRITE_CLASS_ENCODER(RGWZone)
+
+struct RGWDefaultZoneGroupInfo {
+ std::string default_zonegroup;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(default_zonegroup, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(default_zonegroup, bl);
+ DECODE_FINISH(bl);
+ }
+ void dump(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+ //todo: implement ceph-dencoder
+};
+WRITE_CLASS_ENCODER(RGWDefaultZoneGroupInfo)
+
+struct RGWZoneGroupPlacementTarget {
+ std::string name;
+ set<std::string> tags;
+ set<std::string> storage_classes;
+
+ bool user_permitted(const list<std::string>& user_tags) const {
+ if (tags.empty()) {
+ return true;
+ }
+ for (auto& rule : user_tags) {
+ if (tags.find(rule) != tags.end()) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(2, 1, bl);
+ encode(name, bl);
+ encode(tags, bl);
+ encode(storage_classes, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(2, bl);
+ decode(name, bl);
+ decode(tags, bl);
+ if (struct_v >= 2) {
+ decode(storage_classes, bl);
+ }
+ if (storage_classes.empty()) {
+ storage_classes.insert(RGW_STORAGE_CLASS_STANDARD);
+ }
+ DECODE_FINISH(bl);
+ }
+ void dump(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+};
+WRITE_CLASS_ENCODER(RGWZoneGroupPlacementTarget)
+
+struct RGWZoneGroup : public RGWSystemMetaObj {
+ std::string api_name;
+ list<std::string> endpoints;
+ bool is_master = false;
+
+ std::string master_zone;
+ map<std::string, RGWZone> zones;
+
+ map<std::string, RGWZoneGroupPlacementTarget> placement_targets;
+ rgw_placement_rule default_placement;
+
+ list<std::string> hostnames;
+ list<std::string> hostnames_s3website;
+ // TODO: Maybe convert hostnames to a map<std::string,list<std::string>> for
+ // endpoint_type->hostnames
+/*
+20:05 < _robbat21irssi> maybe I do someting like: if (hostname_map.empty()) { populate all map keys from hostnames; };
+20:05 < _robbat21irssi> but that's a later compatability migration planning bit
+20:06 < yehudasa> more like if (!hostnames.empty()) {
+20:06 < yehudasa> for (list<std::string>::iterator iter = hostnames.begin(); iter != hostnames.end(); ++iter) {
+20:06 < yehudasa> hostname_map["s3"].append(iter->second);
+20:07 < yehudasa> hostname_map["s3website"].append(iter->second);
+20:07 < yehudasa> s/append/push_back/g
+20:08 < _robbat21irssi> inner loop over APIs
+20:08 < yehudasa> yeah, probably
+20:08 < _robbat21irssi> s3, s3website, swift, swith_auth, swift_website
+*/
+ map<std::string, list<std::string> > api_hostname_map;
+ map<std::string, list<std::string> > api_endpoints_map;
+
+ std::string realm_id;
+
+ RGWZoneGroup(): is_master(false){}
+ RGWZoneGroup(const std::string &id, const std::string &name):RGWSystemMetaObj(id, name) {}
+ explicit RGWZoneGroup(const std::string &_name):RGWSystemMetaObj(_name) {}
+ RGWZoneGroup(const std::string &_name, bool _is_master, CephContext *cct, RGWSI_SysObj* sysobj_svc,
+ const std::string& _realm_id, const list<std::string>& _endpoints)
+ : RGWSystemMetaObj(_name, cct , sysobj_svc), endpoints(_endpoints), is_master(_is_master),
+ realm_id(_realm_id) {}
+
+ bool is_master_zonegroup() const { return is_master;}
+ void update_master(bool _is_master) {
+ is_master = _is_master;
+ post_process_params();
+ }
+ void post_process_params();
+
+ void encode(bufferlist& bl) const override {
+ ENCODE_START(4, 1, bl);
+ encode(name, bl);
+ encode(api_name, bl);
+ encode(is_master, bl);
+ encode(endpoints, bl);
+ encode(master_zone, bl);
+ encode(zones, bl);
+ encode(placement_targets, bl);
+ encode(default_placement, bl);
+ encode(hostnames, bl);
+ encode(hostnames_s3website, bl);
+ RGWSystemMetaObj::encode(bl);
+ encode(realm_id, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) override {
+ DECODE_START(4, bl);
+ decode(name, bl);
+ decode(api_name, bl);
+ decode(is_master, bl);
+ decode(endpoints, bl);
+ decode(master_zone, bl);
+ decode(zones, bl);
+ decode(placement_targets, bl);
+ decode(default_placement, bl);
+ if (struct_v >= 2) {
+ decode(hostnames, bl);
+ }
+ if (struct_v >= 3) {
+ decode(hostnames_s3website, bl);
+ }
+ if (struct_v >= 4) {
+ RGWSystemMetaObj::decode(bl);
+ decode(realm_id, bl);
+ } else {
+ id = name;
+ }
+ DECODE_FINISH(bl);
+ }
+
+ int read_default_id(std::string& default_id, bool old_format = false) override;
+ int set_as_default(bool exclusive = false) override;
+ int create_default(bool old_format = false);
+ int equals(const std::string& other_zonegroup) const;
+ int add_zone(const RGWZoneParams& zone_params, bool *is_master, bool *read_only,
+ const list<std::string>& endpoints, const std::string *ptier_type,
+ bool *psync_from_all, list<std::string>& sync_from, list<std::string>& sync_from_rm,
+ std::string *predirect_zone, RGWSyncModulesManager *sync_mgr);
+ int remove_zone(const std::string& zone_id);
+ int rename_zone(const RGWZoneParams& zone_params);
+ rgw_pool get_pool(CephContext *cct) const override;
+ const std::string get_default_oid(bool old_region_format = false) const override;
+ const std::string& get_info_oid_prefix(bool old_region_format = false) const override;
+ const std::string& get_names_oid_prefix() const override;
+ const std::string& get_predefined_name(CephContext *cct) const override;
+
+ void dump(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+ static void generate_test_instances(list<RGWZoneGroup*>& o);
+};
+WRITE_CLASS_ENCODER(RGWZoneGroup)
+
+struct RGWPeriodMap
+{
+ std::string id;
+ map<std::string, RGWZoneGroup> zonegroups;
+ map<std::string, RGWZoneGroup> zonegroups_by_api;
+ map<std::string, uint32_t> short_zone_ids;
+
+ std::string master_zonegroup;
+
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::const_iterator& bl);
+
+ int update(const RGWZoneGroup& zonegroup, CephContext *cct);
+
+ void dump(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+
+ void reset() {
+ zonegroups.clear();
+ zonegroups_by_api.clear();
+ master_zonegroup.clear();
+ }
+
+ uint32_t get_zone_short_id(const std::string& zone_id) const;
+};
+WRITE_CLASS_ENCODER(RGWPeriodMap)
+
+struct RGWPeriodConfig
+{
+ RGWQuotaInfo bucket_quota;
+ RGWQuotaInfo user_quota;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(bucket_quota, bl);
+ encode(user_quota, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(bucket_quota, bl);
+ decode(user_quota, bl);
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+
+ // the period config must be stored in a local object outside of the period,
+ // so that it can be used in a default configuration where no realm/period
+ // exists
+ int read(RGWSI_SysObj *sysobj_svc, const std::string& realm_id);
+ int write(RGWSI_SysObj *sysobj_svc, const std::string& realm_id);
+
+ static std::string get_oid(const std::string& realm_id);
+ static rgw_pool get_pool(CephContext *cct);
+};
+WRITE_CLASS_ENCODER(RGWPeriodConfig)
+
+/* for backward comaptability */
+struct RGWRegionMap {
+
+ map<std::string, RGWZoneGroup> regions;
+
+ std::string master_region;
+
+ RGWQuotaInfo bucket_quota;
+ RGWQuotaInfo user_quota;
+
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::const_iterator& bl);
+
+ void dump(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+};
+WRITE_CLASS_ENCODER(RGWRegionMap)
+
+struct RGWZoneGroupMap {
+
+ map<std::string, RGWZoneGroup> zonegroups;
+ map<std::string, RGWZoneGroup> zonegroups_by_api;
+
+ std::string master_zonegroup;
+
+ RGWQuotaInfo bucket_quota;
+ RGWQuotaInfo user_quota;
+
+ /* construct the map */
+ int read(CephContext *cct, RGWSI_SysObj *sysobj_svc);
+
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::const_iterator& bl);
+
+ void dump(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+};
+WRITE_CLASS_ENCODER(RGWZoneGroupMap)
+
+class RGWRealm;
+class RGWPeriod;
+
+class RGWRealm : public RGWSystemMetaObj
+{
+ std::string current_period;
+ epoch_t epoch{0}; //< realm epoch, incremented for each new period
+
+ int create_control(bool exclusive);
+ int delete_control();
+public:
+ RGWRealm() {}
+ RGWRealm(const std::string& _id, const std::string& _name = "") : RGWSystemMetaObj(_id, _name) {}
+ RGWRealm(CephContext *_cct, RGWSI_SysObj *_sysobj_svc): RGWSystemMetaObj(_cct, _sysobj_svc) {}
+ RGWRealm(const std::string& _name, CephContext *_cct, RGWSI_SysObj *_sysobj_svc): RGWSystemMetaObj(_name, _cct, _sysobj_svc){}
+
+ void encode(bufferlist& bl) const override {
+ ENCODE_START(1, 1, bl);
+ RGWSystemMetaObj::encode(bl);
+ encode(current_period, bl);
+ encode(epoch, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) override {
+ DECODE_START(1, bl);
+ RGWSystemMetaObj::decode(bl);
+ decode(current_period, bl);
+ decode(epoch, bl);
+ DECODE_FINISH(bl);
+ }
+
+ int create(bool exclusive = true) override;
+ int delete_obj();
+ rgw_pool get_pool(CephContext *cct) const override;
+ const std::string get_default_oid(bool old_format = false) const override;
+ const std::string& get_names_oid_prefix() const override;
+ const std::string& get_info_oid_prefix(bool old_format = false) const override;
+ const std::string& get_predefined_name(CephContext *cct) const override;
+
+ using RGWSystemMetaObj::read_id; // expose as public for radosgw-admin
+
+ void dump(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+ static void generate_test_instances(list<RGWRealm*>& o);
+
+ const std::string& get_current_period() const {
+ return current_period;
+ }
+ int set_current_period(RGWPeriod& period);
+ void clear_current_period_and_epoch() {
+ current_period.clear();
+ epoch = 0;
+ }
+ epoch_t get_epoch() const { return epoch; }
+
+ std::string get_control_oid() const;
+ /// send a notify on the realm control object
+ int notify_zone(bufferlist& bl);
+ /// notify the zone of a new period
+ int notify_new_period(const RGWPeriod& period);
+};
+WRITE_CLASS_ENCODER(RGWRealm)
+
+struct RGWPeriodLatestEpochInfo {
+ epoch_t epoch;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(epoch, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(epoch, bl);
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+};
+WRITE_CLASS_ENCODER(RGWPeriodLatestEpochInfo)
+
+class RGWPeriod
+{
+ std::string id;
+ epoch_t epoch{0};
+ std::string predecessor_uuid;
+ std::vector<std::string> sync_status;
+ RGWPeriodMap period_map;
+ RGWPeriodConfig period_config;
+ std::string master_zonegroup;
+ std::string master_zone;
+
+ std::string realm_id;
+ std::string realm_name;
+ epoch_t realm_epoch{1}; //< realm epoch when period was made current
+
+ CephContext *cct{nullptr};
+ RGWSI_SysObj *sysobj_svc{nullptr};
+
+ int read_info();
+ int read_latest_epoch(RGWPeriodLatestEpochInfo& epoch_info,
+ RGWObjVersionTracker *objv = nullptr);
+ int use_latest_epoch();
+ int use_current_period();
+
+ const std::string get_period_oid() const;
+ const std::string get_period_oid_prefix() const;
+
+ // gather the metadata sync status for each shard; only for use on master zone
+ int update_sync_status(RGWRados *store,
+ const RGWPeriod &current_period,
+ std::ostream& error_stream, bool force_if_stale);
+
+public:
+ RGWPeriod() {}
+
+ RGWPeriod(const std::string& period_id, epoch_t _epoch = 0)
+ : id(period_id), epoch(_epoch) {}
+
+ const std::string& get_id() const { return id; }
+ epoch_t get_epoch() const { return epoch; }
+ epoch_t get_realm_epoch() const { return realm_epoch; }
+ const std::string& get_predecessor() const { return predecessor_uuid; }
+ const std::string& get_master_zone() const { return master_zone; }
+ const std::string& get_master_zonegroup() const { return master_zonegroup; }
+ const std::string& get_realm() const { return realm_id; }
+ const RGWPeriodMap& get_map() const { return period_map; }
+ RGWPeriodConfig& get_config() { return period_config; }
+ const RGWPeriodConfig& get_config() const { return period_config; }
+ const std::vector<std::string>& get_sync_status() const { return sync_status; }
+ rgw_pool get_pool(CephContext *cct) const;
+ const std::string& get_latest_epoch_oid() const;
+ const std::string& get_info_oid_prefix() const;
+
+ void set_user_quota(RGWQuotaInfo& user_quota) {
+ period_config.user_quota = user_quota;
+ }
+
+ void set_bucket_quota(RGWQuotaInfo& bucket_quota) {
+ period_config.bucket_quota = bucket_quota;
+ }
+
+ void set_id(const std::string& id) {
+ this->id = id;
+ period_map.id = id;
+ }
+ void set_epoch(epoch_t epoch) { this->epoch = epoch; }
+ void set_realm_epoch(epoch_t epoch) { realm_epoch = epoch; }
+
+ void set_predecessor(const std::string& predecessor)
+ {
+ predecessor_uuid = predecessor;
+ }
+
+ void set_realm_id(const std::string& _realm_id) {
+ realm_id = _realm_id;
+ }
+
+ int reflect();
+
+ int get_zonegroup(RGWZoneGroup& zonegroup,
+ const std::string& zonegroup_id) const;
+
+ bool is_single_zonegroup() const
+ {
+ return (period_map.zonegroups.size() <= 1);
+ }
+
+ /*
+ returns true if there are several zone groups with a least one zone
+ */
+ bool is_multi_zonegroups_with_zones() const
+ {
+ int count = 0;
+ for (const auto& zg: period_map.zonegroups) {
+ if (zg.second.zones.size() > 0) {
+ if (count++ > 0) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ int get_latest_epoch(epoch_t& epoch);
+ int set_latest_epoch(epoch_t epoch, bool exclusive = false,
+ RGWObjVersionTracker *objv = nullptr);
+ // update latest_epoch if the given epoch is higher, else return -EEXIST
+ int update_latest_epoch(epoch_t epoch);
+
+ int init(CephContext *_cct, RGWSI_SysObj *_sysobj_svc, const std::string &period_realm_id, const std::string &period_realm_name = "",
+ bool setup_obj = true);
+ int init(CephContext *_cct, RGWSI_SysObj *_sysobj_svc, bool setup_obj = true);
+
+ int create(bool exclusive = true);
+ int delete_obj();
+ int store_info(bool exclusive);
+ int add_zonegroup(const RGWZoneGroup& zonegroup);
+
+ void fork();
+ int update();
+
+ // commit a staging period; only for use on master zone
+ int commit(RGWRados *store,
+ RGWRealm& realm, const RGWPeriod &current_period,
+ std::ostream& error_stream, bool force_if_stale = false);
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(id, bl);
+ encode(epoch, bl);
+ encode(realm_epoch, bl);
+ encode(predecessor_uuid, bl);
+ encode(sync_status, bl);
+ encode(period_map, bl);
+ encode(master_zone, bl);
+ encode(master_zonegroup, bl);
+ encode(period_config, bl);
+ encode(realm_id, bl);
+ encode(realm_name, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(id, bl);
+ decode(epoch, bl);
+ decode(realm_epoch, bl);
+ decode(predecessor_uuid, bl);
+ decode(sync_status, bl);
+ decode(period_map, bl);
+ decode(master_zone, bl);
+ decode(master_zonegroup, bl);
+ decode(period_config, bl);
+ decode(realm_id, bl);
+ decode(realm_name, bl);
+ DECODE_FINISH(bl);
+ }
+ void dump(Formatter *f) const;
+ void decode_json(JSONObj *obj);
+ static void generate_test_instances(list<RGWPeriod*>& o);
+
+ static std::string get_staging_id(const std::string& realm_id) {
+ return realm_id + ":staging";
+ }
+};
+WRITE_CLASS_ENCODER(RGWPeriod)
+
+#endif
diff --git a/src/rgw/services/svc_finisher.cc b/src/rgw/services/svc_finisher.cc
new file mode 100644
index 00000000..d239ff3c
--- /dev/null
+++ b/src/rgw/services/svc_finisher.cc
@@ -0,0 +1,53 @@
+#include "common/Finisher.h"
+
+#include "svc_finisher.h"
+
+int RGWSI_Finisher::do_start()
+{
+ finisher = new Finisher(cct);
+ finisher->start();
+
+ return 0;
+}
+
+void RGWSI_Finisher::shutdown()
+{
+ if (finalized) {
+ return;
+ }
+
+ if (finisher) {
+ finisher->stop();
+
+ map<int, ShutdownCB *> cbs;
+ cbs.swap(shutdown_cbs); /* move cbs out, in case caller unregisetrs */
+ for (auto& iter : cbs) {
+ iter.second->call();
+ }
+ delete finisher;
+ }
+
+ finalized = true;
+}
+
+RGWSI_Finisher::~RGWSI_Finisher()
+{
+ shutdown();
+}
+
+void RGWSI_Finisher::register_caller(ShutdownCB *cb, int *phandle)
+{
+ *phandle = ++handles_counter;
+ shutdown_cbs[*phandle] = cb;
+}
+
+void RGWSI_Finisher::unregister_caller(int handle)
+{
+ shutdown_cbs.erase(handle);
+}
+
+void RGWSI_Finisher::schedule_context(Context *c)
+{
+ finisher->queue(c);
+}
+
diff --git a/src/rgw/services/svc_finisher.h b/src/rgw/services/svc_finisher.h
new file mode 100644
index 00000000..116fd8fd
--- /dev/null
+++ b/src/rgw/services/svc_finisher.h
@@ -0,0 +1,45 @@
+#ifndef CEPH_RGW_SERVICES_FINISHER_H
+#define CEPH_RGW_SERVICES_FINISHER_H
+
+
+#include "rgw/rgw_service.h"
+
+class Context;
+class Finisher;
+
+class RGWSI_Finisher : public RGWServiceInstance
+{
+ friend struct RGWServices_Def;
+public:
+ class ShutdownCB;
+
+private:
+ Finisher *finisher{nullptr};
+ bool finalized{false};
+
+ void shutdown() override;
+
+ std::map<int, ShutdownCB *> shutdown_cbs;
+ std::atomic<int> handles_counter{0};
+
+protected:
+ void init() {}
+ int do_start() override;
+
+public:
+ RGWSI_Finisher(CephContext *cct): RGWServiceInstance(cct) {}
+ ~RGWSI_Finisher();
+
+ class ShutdownCB {
+ public:
+ virtual ~ShutdownCB() {}
+ virtual void call() = 0;
+ };
+
+ void register_caller(ShutdownCB *cb, int *phandle);
+ void unregister_caller(int handle);
+
+ void schedule_context(Context *c);
+};
+
+#endif
diff --git a/src/rgw/services/svc_notify.cc b/src/rgw/services/svc_notify.cc
new file mode 100644
index 00000000..9ee7f295
--- /dev/null
+++ b/src/rgw/services/svc_notify.cc
@@ -0,0 +1,484 @@
+#include "include/random.h"
+#include "common/errno.h"
+
+#include "svc_notify.h"
+#include "svc_finisher.h"
+#include "svc_zone.h"
+#include "svc_rados.h"
+
+#include "rgw/rgw_zone.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+static string notify_oid_prefix = "notify";
+
+class RGWWatcher : public librados::WatchCtx2 {
+ CephContext *cct;
+ RGWSI_Notify *svc;
+ int index;
+ RGWSI_RADOS::Obj obj;
+ uint64_t watch_handle;
+ int register_ret{0};
+ librados::AioCompletion *register_completion{nullptr};
+
+ class C_ReinitWatch : public Context {
+ RGWWatcher *watcher;
+ public:
+ explicit C_ReinitWatch(RGWWatcher *_watcher) : watcher(_watcher) {}
+ void finish(int r) override {
+ watcher->reinit();
+ }
+ };
+public:
+ RGWWatcher(CephContext *_cct, RGWSI_Notify *s, int i, RGWSI_RADOS::Obj& o) : cct(_cct), svc(s), index(i), obj(o), watch_handle(0) {}
+ void handle_notify(uint64_t notify_id,
+ uint64_t cookie,
+ uint64_t notifier_id,
+ bufferlist& bl) override {
+ ldout(cct, 10) << "RGWWatcher::handle_notify() "
+ << " notify_id " << notify_id
+ << " cookie " << cookie
+ << " notifier " << notifier_id
+ << " bl.length()=" << bl.length() << dendl;
+
+ if (unlikely(svc->inject_notify_timeout_probability == 1) ||
+ (svc->inject_notify_timeout_probability > 0 &&
+ (svc->inject_notify_timeout_probability >
+ ceph::util::generate_random_number(0.0, 1.0)))) {
+ ldout(cct, 0)
+ << "RGWWatcher::handle_notify() dropping notification! "
+ << "If this isn't what you want, set "
+ << "rgw_inject_notify_timeout_probability to zero!" << dendl;
+ return;
+ }
+
+ svc->watch_cb(notify_id, cookie, notifier_id, bl);
+
+ bufferlist reply_bl; // empty reply payload
+ obj.notify_ack(notify_id, cookie, reply_bl);
+ }
+ void handle_error(uint64_t cookie, int err) override {
+ lderr(cct) << "RGWWatcher::handle_error cookie " << cookie
+ << " err " << cpp_strerror(err) << dendl;
+ svc->remove_watcher(index);
+ svc->schedule_context(new C_ReinitWatch(this));
+ }
+
+ void reinit() {
+ int ret = unregister_watch();
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: unregister_watch() returned ret=" << ret << dendl;
+ return;
+ }
+ ret = register_watch();
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: register_watch() returned ret=" << ret << dendl;
+ return;
+ }
+ }
+
+ int unregister_watch() {
+ int r = svc->unwatch(obj, watch_handle);
+ if (r < 0) {
+ return r;
+ }
+ svc->remove_watcher(index);
+ return 0;
+ }
+
+ int register_watch_async() {
+ if (register_completion) {
+ register_completion->release();
+ register_completion = nullptr;
+ }
+ register_completion = librados::Rados::aio_create_completion(nullptr, nullptr, nullptr);
+ register_ret = obj.aio_watch(register_completion, &watch_handle, this);
+ if (register_ret < 0) {
+ register_completion->release();
+ return register_ret;
+ }
+ return 0;
+ }
+
+ int register_watch_finish() {
+ if (register_ret < 0) {
+ return register_ret;
+ }
+ if (!register_completion) {
+ return -EINVAL;
+ }
+ register_completion->wait_for_safe();
+ int r = register_completion->get_return_value();
+ register_completion->release();
+ register_completion = nullptr;
+ if (r < 0) {
+ return r;
+ }
+ svc->add_watcher(index);
+ return 0;
+ }
+
+ int register_watch() {
+ int r = obj.watch(&watch_handle, this);
+ if (r < 0) {
+ return r;
+ }
+ svc->add_watcher(index);
+ return 0;
+ }
+};
+
+
+class RGWSI_Notify_ShutdownCB : public RGWSI_Finisher::ShutdownCB
+{
+ RGWSI_Notify *svc;
+public:
+ RGWSI_Notify_ShutdownCB(RGWSI_Notify *_svc) : svc(_svc) {}
+ void call() override {
+ svc->shutdown();
+ }
+};
+
+string RGWSI_Notify::get_control_oid(int i)
+{
+ char buf[notify_oid_prefix.size() + 16];
+ snprintf(buf, sizeof(buf), "%s.%d", notify_oid_prefix.c_str(), i);
+
+ return string(buf);
+}
+
+// do not call pick_obj_control before init_watch
+RGWSI_RADOS::Obj RGWSI_Notify::pick_control_obj(const string& key)
+{
+ uint32_t r = ceph_str_hash_linux(key.c_str(), key.size());
+
+ int i = r % num_watchers;
+ return notify_objs[i];
+}
+
+int RGWSI_Notify::init_watch()
+{
+ num_watchers = cct->_conf->rgw_num_control_oids;
+
+ bool compat_oid = (num_watchers == 0);
+
+ if (num_watchers <= 0)
+ num_watchers = 1;
+
+ watchers = new RGWWatcher *[num_watchers];
+
+ int error = 0;
+
+ notify_objs.resize(num_watchers);
+
+ for (int i=0; i < num_watchers; i++) {
+ string notify_oid;
+
+ if (!compat_oid) {
+ notify_oid = get_control_oid(i);
+ } else {
+ notify_oid = notify_oid_prefix;
+ }
+
+ notify_objs[i] = rados_svc->handle().obj({control_pool, notify_oid});
+ auto& notify_obj = notify_objs[i];
+
+ int r = notify_obj.open();
+ if (r < 0) {
+ ldout(cct, 0) << "ERROR: notify_obj.open() returned r=" << r << dendl;
+ return r;
+ }
+
+ librados::ObjectWriteOperation op;
+ op.create(false);
+ r = notify_obj.operate(&op, null_yield);
+ if (r < 0 && r != -EEXIST) {
+ ldout(cct, 0) << "ERROR: notify_obj.operate() returned r=" << r << dendl;
+ return r;
+ }
+
+ RGWWatcher *watcher = new RGWWatcher(cct, this, i, notify_obj);
+ watchers[i] = watcher;
+
+ r = watcher->register_watch_async();
+ if (r < 0) {
+ ldout(cct, 0) << "WARNING: register_watch_aio() returned " << r << dendl;
+ error = r;
+ continue;
+ }
+ }
+
+ for (int i = 0; i < num_watchers; ++i) {
+ int r = watchers[i]->register_watch_finish();
+ if (r < 0) {
+ ldout(cct, 0) << "WARNING: async watch returned " << r << dendl;
+ error = r;
+ }
+ }
+
+ if (error < 0) {
+ return error;
+ }
+
+ return 0;
+}
+
+void RGWSI_Notify::finalize_watch()
+{
+ for (int i = 0; i < num_watchers; i++) {
+ RGWWatcher *watcher = watchers[i];
+ watcher->unregister_watch();
+ delete watcher;
+ }
+
+ delete[] watchers;
+}
+
+int RGWSI_Notify::do_start()
+{
+ int r = zone_svc->start();
+ if (r < 0) {
+ return r;
+ }
+
+ assert(zone_svc->is_started()); /* otherwise there's an ordering problem */
+
+ r = rados_svc->start();
+ if (r < 0) {
+ return r;
+ }
+ r = finisher_svc->start();
+ if (r < 0) {
+ return r;
+ }
+
+ control_pool = zone_svc->get_zone_params().control_pool;
+
+ int ret = init_watch();
+ if (ret < 0) {
+ lderr(cct) << "ERROR: failed to initialize watch: " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ shutdown_cb = new RGWSI_Notify_ShutdownCB(this);
+ int handle;
+ finisher_svc->register_caller(shutdown_cb, &handle);
+ finisher_handle = handle;
+
+ return 0;
+}
+
+void RGWSI_Notify::shutdown()
+{
+ if (finalized) {
+ return;
+ }
+
+ if (finisher_handle) {
+ finisher_svc->unregister_caller(*finisher_handle);
+ }
+ finalize_watch();
+
+ delete shutdown_cb;
+
+ finalized = true;
+}
+
+RGWSI_Notify::~RGWSI_Notify()
+{
+ shutdown();
+}
+
+int RGWSI_Notify::unwatch(RGWSI_RADOS::Obj& obj, uint64_t watch_handle)
+{
+ int r = obj.unwatch(watch_handle);
+ if (r < 0) {
+ ldout(cct, 0) << "ERROR: rados->unwatch2() returned r=" << r << dendl;
+ return r;
+ }
+ r = rados_svc->handle().watch_flush();
+ if (r < 0) {
+ ldout(cct, 0) << "ERROR: rados->watch_flush() returned r=" << r << dendl;
+ return r;
+ }
+ return 0;
+}
+
+void RGWSI_Notify::add_watcher(int i)
+{
+ ldout(cct, 20) << "add_watcher() i=" << i << dendl;
+ RWLock::WLocker l(watchers_lock);
+ watchers_set.insert(i);
+ if (watchers_set.size() == (size_t)num_watchers) {
+ ldout(cct, 2) << "all " << num_watchers << " watchers are set, enabling cache" << dendl;
+ _set_enabled(true);
+ }
+}
+
+void RGWSI_Notify::remove_watcher(int i)
+{
+ ldout(cct, 20) << "remove_watcher() i=" << i << dendl;
+ RWLock::WLocker l(watchers_lock);
+ size_t orig_size = watchers_set.size();
+ watchers_set.erase(i);
+ if (orig_size == (size_t)num_watchers &&
+ watchers_set.size() < orig_size) { /* actually removed */
+ ldout(cct, 2) << "removed watcher, disabling cache" << dendl;
+ _set_enabled(false);
+ }
+}
+
+int RGWSI_Notify::watch_cb(uint64_t notify_id,
+ uint64_t cookie,
+ uint64_t notifier_id,
+ bufferlist& bl)
+{
+ RWLock::RLocker l(watchers_lock);
+ if (cb) {
+ return cb->watch_cb(notify_id, cookie, notifier_id, bl);
+ }
+ return 0;
+}
+
+void RGWSI_Notify::set_enabled(bool status)
+{
+ RWLock::WLocker l(watchers_lock);
+ _set_enabled(status);
+}
+
+void RGWSI_Notify::_set_enabled(bool status)
+{
+ enabled = status;
+ if (cb) {
+ cb->set_enabled(status);
+ }
+}
+
+int RGWSI_Notify::distribute(const string& key, bufferlist& bl)
+{
+ /* The RGW uses the control pool to store the watch notify objects.
+ The precedence in RGWSI_Notify::do_start is to call to zone_svc->start and later to init_watch().
+ The first time, RGW starts in the cluster, the RGW will try to create zone and zonegroup system object.
+ In that case RGW will try to distribute the cache before it ran init_watch,
+ which will lead to division by 0 in pick_obj_control (num_watchers is 0).
+ */
+ if (num_watchers > 0) {
+ RGWSI_RADOS::Obj notify_obj = pick_control_obj(key);
+
+ ldout(cct, 10) << "distributing notification oid=" << notify_obj.get_ref().obj
+ << " bl.length()=" << bl.length() << dendl;
+ return robust_notify(notify_obj, bl);
+ }
+ return 0;
+}
+
+int RGWSI_Notify::robust_notify(RGWSI_RADOS::Obj& notify_obj, bufferlist& bl)
+{
+ // The reply of every machine that acks goes in here.
+ boost::container::flat_set<std::pair<uint64_t, uint64_t>> acks;
+ bufferlist rbl;
+
+ // First, try to send, without being fancy about it.
+ auto r = notify_obj.notify(bl, 0, &rbl);
+
+ // If that doesn't work, get serious.
+ if (r < 0) {
+ ldout(cct, 1) << "robust_notify: If at first you don't succeed: "
+ << cpp_strerror(-r) << dendl;
+
+
+ auto p = rbl.cbegin();
+ // Gather up the replies to the first attempt.
+ try {
+ uint32_t num_acks;
+ decode(num_acks, p);
+ // Doing this ourselves since we don't care about the payload;
+ for (auto i = 0u; i < num_acks; ++i) {
+ std::pair<uint64_t, uint64_t> id;
+ decode(id, p);
+ acks.insert(id);
+ ldout(cct, 20) << "robust_notify: acked by " << id << dendl;
+ uint32_t blen;
+ decode(blen, p);
+ p.advance(blen);
+ }
+ } catch (const buffer::error& e) {
+ ldout(cct, 0) << "robust_notify: notify response parse failed: "
+ << e.what() << dendl;
+ acks.clear(); // Throw away junk on failed parse.
+ }
+
+
+ // Every machine that fails to reply and hasn't acked a previous
+ // attempt goes in here.
+ boost::container::flat_set<std::pair<uint64_t, uint64_t>> timeouts;
+
+ auto tries = 1u;
+ while (r < 0 && tries < max_notify_retries) {
+ ++tries;
+ rbl.clear();
+ // Reset the timeouts, we're only concerned with new ones.
+ timeouts.clear();
+ r = notify_obj.notify(bl, 0, &rbl);
+ if (r < 0) {
+ ldout(cct, 1) << "robust_notify: retry " << tries << " failed: "
+ << cpp_strerror(-r) << dendl;
+ p = rbl.begin();
+ try {
+ uint32_t num_acks;
+ decode(num_acks, p);
+ // Not only do we not care about the payload, but we don't
+ // want to empty the container; we just want to augment it
+ // with any new members.
+ for (auto i = 0u; i < num_acks; ++i) {
+ std::pair<uint64_t, uint64_t> id;
+ decode(id, p);
+ auto ir = acks.insert(id);
+ if (ir.second) {
+ ldout(cct, 20) << "robust_notify: acked by " << id << dendl;
+ }
+ uint32_t blen;
+ decode(blen, p);
+ p.advance(blen);
+ }
+
+ uint32_t num_timeouts;
+ decode(num_timeouts, p);
+ for (auto i = 0u; i < num_timeouts; ++i) {
+ std::pair<uint64_t, uint64_t> id;
+ decode(id, p);
+ // Only track timeouts from hosts that haven't acked previously.
+ if (acks.find(id) != acks.cend()) {
+ ldout(cct, 20) << "robust_notify: " << id << " timed out."
+ << dendl;
+ timeouts.insert(id);
+ }
+ }
+ } catch (const buffer::error& e) {
+ ldout(cct, 0) << "robust_notify: notify response parse failed: "
+ << e.what() << dendl;
+ continue;
+ }
+ // If we got a good parse and timeouts is empty, that means
+ // everyone who timed out in one call received the update in a
+ // previous one.
+ if (timeouts.empty()) {
+ r = 0;
+ }
+ }
+ }
+ }
+ return r;
+}
+
+void RGWSI_Notify::register_watch_cb(CB *_cb)
+{
+ RWLock::WLocker l(watchers_lock);
+ cb = _cb;
+ _set_enabled(enabled);
+}
+
+void RGWSI_Notify::schedule_context(Context *c)
+{
+ finisher_svc->schedule_context(c);
+}
diff --git a/src/rgw/services/svc_notify.h b/src/rgw/services/svc_notify.h
new file mode 100644
index 00000000..cd9d9eb8
--- /dev/null
+++ b/src/rgw/services/svc_notify.h
@@ -0,0 +1,100 @@
+#ifndef CEPH_RGW_SERVICES_NOTIFY_H
+#define CEPH_RGW_SERVICES_NOTIFY_H
+
+
+#include "rgw/rgw_service.h"
+
+#include "svc_rados.h"
+
+
+class RGWSI_Zone;
+class RGWSI_Finisher;
+
+class RGWWatcher;
+class RGWSI_Notify_ShutdownCB;
+
+class RGWSI_Notify : public RGWServiceInstance
+{
+ friend class RGWWatcher;
+ friend class RGWSI_Notify_ShutdownCB;
+ friend class RGWServices_Def;
+
+public:
+ class CB;
+
+private:
+ RGWSI_Zone *zone_svc{nullptr};
+ RGWSI_RADOS *rados_svc{nullptr};
+ RGWSI_Finisher *finisher_svc{nullptr};
+
+ RWLock watchers_lock{"watchers_lock"};
+ rgw_pool control_pool;
+
+ int num_watchers{0};
+ RGWWatcher **watchers{nullptr};
+ std::set<int> watchers_set;
+ vector<RGWSI_RADOS::Obj> notify_objs;
+
+ bool enabled{false};
+
+ double inject_notify_timeout_probability{0};
+ unsigned max_notify_retries{0};
+
+ string get_control_oid(int i);
+ RGWSI_RADOS::Obj pick_control_obj(const string& key);
+
+ CB *cb{nullptr};
+
+ std::optional<int> finisher_handle;
+ RGWSI_Notify_ShutdownCB *shutdown_cb{nullptr};
+
+ bool finalized{false};
+
+ int init_watch();
+ void finalize_watch();
+
+ void init(RGWSI_Zone *_zone_svc,
+ RGWSI_RADOS *_rados_svc,
+ RGWSI_Finisher *_finisher_svc) {
+ zone_svc = _zone_svc;
+ rados_svc = _rados_svc;
+ finisher_svc = _finisher_svc;
+ }
+ int do_start() override;
+ void shutdown() override;
+
+ int unwatch(RGWSI_RADOS::Obj& obj, uint64_t watch_handle);
+ void add_watcher(int i);
+ void remove_watcher(int i);
+
+ int watch_cb(uint64_t notify_id,
+ uint64_t cookie,
+ uint64_t notifier_id,
+ bufferlist& bl);
+ void _set_enabled(bool status);
+ void set_enabled(bool status);
+
+ int robust_notify(RGWSI_RADOS::Obj& notify_obj, bufferlist& bl);
+
+ void schedule_context(Context *c);
+public:
+ RGWSI_Notify(CephContext *cct): RGWServiceInstance(cct) {}
+ ~RGWSI_Notify();
+
+ class CB {
+ public:
+ virtual ~CB() {}
+ virtual int watch_cb(uint64_t notify_id,
+ uint64_t cookie,
+ uint64_t notifier_id,
+ bufferlist& bl) = 0;
+ virtual void set_enabled(bool status) = 0;
+ };
+
+ int distribute(const string& key, bufferlist& bl);
+
+ void register_watch_cb(CB *cb);
+};
+
+#endif
+
diff --git a/src/rgw/services/svc_quota.cc b/src/rgw/services/svc_quota.cc
new file mode 100644
index 00000000..f2baac36
--- /dev/null
+++ b/src/rgw/services/svc_quota.cc
@@ -0,0 +1,15 @@
+#include "svc_quota.h"
+#include "svc_zone.h"
+
+#include "rgw/rgw_zone.h"
+
+const RGWQuotaInfo& RGWSI_Quota::get_bucket_quota() const
+{
+ return zone_svc->get_current_period().get_config().bucket_quota;
+}
+
+const RGWQuotaInfo& RGWSI_Quota::get_user_quota() const
+{
+ return zone_svc->get_current_period().get_config().user_quota;
+}
+
diff --git a/src/rgw/services/svc_quota.h b/src/rgw/services/svc_quota.h
new file mode 100644
index 00000000..7dfbf19b
--- /dev/null
+++ b/src/rgw/services/svc_quota.h
@@ -0,0 +1,23 @@
+#ifndef CEPH_RGW_SERVICES_QUOTA_H
+#define CEPH_RGW_SERVICES_QUOTA_H
+
+
+#include "rgw/rgw_service.h"
+
+
+class RGWSI_Quota : public RGWServiceInstance
+{
+ RGWSI_Zone *zone_svc{nullptr};
+
+public:
+ RGWSI_Quota(CephContext *cct): RGWServiceInstance(cct) {}
+
+ void init(RGWSI_Zone *_zone_svc) {
+ zone_svc = _zone_svc;
+ }
+
+ const RGWQuotaInfo& get_bucket_quota() const;
+ const RGWQuotaInfo& get_user_quota() const;
+};
+
+#endif
diff --git a/src/rgw/services/svc_rados.cc b/src/rgw/services/svc_rados.cc
new file mode 100644
index 00000000..408d25d9
--- /dev/null
+++ b/src/rgw/services/svc_rados.cc
@@ -0,0 +1,308 @@
+#include "svc_rados.h"
+
+#include "include/rados/librados.hpp"
+#include "common/errno.h"
+#include "osd/osd_types.h"
+#include "rgw/rgw_tools.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+int RGWSI_RADOS::do_start()
+{
+ int ret = rados.init_with_context(cct);
+ if (ret < 0) {
+ return ret;
+ }
+ ret = rados.connect();
+ if (ret < 0) {
+ return ret;
+ }
+ return 0;
+}
+
+librados::Rados* RGWSI_RADOS::get_rados_handle()
+{
+ return &rados;
+}
+
+uint64_t RGWSI_RADOS::instance_id()
+{
+ return get_rados_handle()->get_instance_id();
+}
+
+int RGWSI_RADOS::open_pool_ctx(const rgw_pool& pool, librados::IoCtx& io_ctx)
+{
+ constexpr bool create = true; // create the pool if it doesn't exist
+ return rgw_init_ioctx(get_rados_handle(), pool, io_ctx, create);
+}
+
+int RGWSI_RADOS::pool_iterate(librados::IoCtx& io_ctx,
+ librados::NObjectIterator& iter,
+ uint32_t num, vector<rgw_bucket_dir_entry>& objs,
+ RGWAccessListFilter *filter,
+ bool *is_truncated)
+{
+ if (iter == io_ctx.nobjects_end())
+ return -ENOENT;
+
+ uint32_t i;
+
+ for (i = 0; i < num && iter != io_ctx.nobjects_end(); ++i, ++iter) {
+ rgw_bucket_dir_entry e;
+
+ string oid = iter->get_oid();
+ ldout(cct, 20) << "RGWRados::pool_iterate: got " << oid << dendl;
+
+ // fill it in with initial values; we may correct later
+ if (filter && !filter->filter(oid, oid))
+ continue;
+
+ e.key = oid;
+ objs.push_back(e);
+ }
+
+ if (is_truncated)
+ *is_truncated = (iter != io_ctx.nobjects_end());
+
+ return objs.size();
+}
+
+void RGWSI_RADOS::Obj::init(const rgw_raw_obj& obj)
+{
+ ref.obj = obj;
+}
+
+int RGWSI_RADOS::Obj::open()
+{
+ int r = rados_svc->open_pool_ctx(ref.obj.pool, ref.ioctx);
+ if (r < 0) {
+ return r;
+ }
+
+ ref.ioctx.locator_set_key(ref.obj.loc);
+
+ return 0;
+}
+
+int RGWSI_RADOS::Obj::operate(librados::ObjectWriteOperation *op,
+ optional_yield y)
+{
+ return rgw_rados_operate(ref.ioctx, ref.obj.oid, op, y);
+}
+
+int RGWSI_RADOS::Obj::operate(librados::ObjectReadOperation *op, bufferlist *pbl,
+ optional_yield y)
+{
+ return rgw_rados_operate(ref.ioctx, ref.obj.oid, op, pbl, y);
+}
+
+int RGWSI_RADOS::Obj::aio_operate(librados::AioCompletion *c, librados::ObjectWriteOperation *op)
+{
+ return ref.ioctx.aio_operate(ref.obj.oid, c, op);
+}
+
+int RGWSI_RADOS::Obj::aio_operate(librados::AioCompletion *c, librados::ObjectReadOperation *op,
+ bufferlist *pbl)
+{
+ return ref.ioctx.aio_operate(ref.obj.oid, c, op, pbl);
+}
+
+int RGWSI_RADOS::Obj::watch(uint64_t *handle, librados::WatchCtx2 *ctx)
+{
+ return ref.ioctx.watch2(ref.obj.oid, handle, ctx);
+}
+
+int RGWSI_RADOS::Obj::aio_watch(librados::AioCompletion *c, uint64_t *handle, librados::WatchCtx2 *ctx)
+{
+ return ref.ioctx.aio_watch(ref.obj.oid, c, handle, ctx);
+}
+
+int RGWSI_RADOS::Obj::unwatch(uint64_t handle)
+{
+ return ref.ioctx.unwatch2(handle);
+}
+
+int RGWSI_RADOS::Obj::notify(bufferlist& bl,
+ uint64_t timeout_ms,
+ bufferlist *pbl)
+{
+ return ref.ioctx.notify2(ref.obj.oid, bl, timeout_ms, pbl);
+}
+
+void RGWSI_RADOS::Obj::notify_ack(uint64_t notify_id,
+ uint64_t cookie,
+ bufferlist& bl)
+{
+ ref.ioctx.notify_ack(ref.obj.oid, notify_id, cookie, bl);
+}
+
+uint64_t RGWSI_RADOS::Obj::get_last_version()
+{
+ return ref.ioctx.get_last_version();
+}
+
+int RGWSI_RADOS::Pool::create()
+{
+ librados::Rados *rad = rados_svc->get_rados_handle();
+ int r = rad->pool_create(pool.name.c_str());
+ if (r < 0) {
+ ldout(rados_svc->cct, 0) << "WARNING: pool_create returned " << r << dendl;
+ return r;
+ }
+ librados::IoCtx io_ctx;
+ r = rad->ioctx_create(pool.name.c_str(), io_ctx);
+ if (r < 0) {
+ ldout(rados_svc->cct, 0) << "WARNING: ioctx_create returned " << r << dendl;
+ return r;
+ }
+ r = io_ctx.application_enable(pg_pool_t::APPLICATION_NAME_RGW, false);
+ if (r < 0) {
+ ldout(rados_svc->cct, 0) << "WARNING: application_enable returned " << r << dendl;
+ return r;
+ }
+ return 0;
+}
+
+int RGWSI_RADOS::Pool::create(const vector<rgw_pool>& pools, vector<int> *retcodes)
+{
+ vector<librados::PoolAsyncCompletion *> completions;
+ vector<int> rets;
+
+ librados::Rados *rad = rados_svc->get_rados_handle();
+ for (auto iter = pools.begin(); iter != pools.end(); ++iter) {
+ librados::PoolAsyncCompletion *c = librados::Rados::pool_async_create_completion();
+ completions.push_back(c);
+ auto& pool = *iter;
+ int ret = rad->pool_create_async(pool.name.c_str(), c);
+ rets.push_back(ret);
+ }
+
+ vector<int>::iterator riter;
+ vector<librados::PoolAsyncCompletion *>::iterator citer;
+
+ bool error = false;
+ ceph_assert(rets.size() == completions.size());
+ for (riter = rets.begin(), citer = completions.begin(); riter != rets.end(); ++riter, ++citer) {
+ int r = *riter;
+ librados::PoolAsyncCompletion *c = *citer;
+ if (r == 0) {
+ c->wait();
+ r = c->get_return_value();
+ if (r < 0) {
+ ldout(rados_svc->cct, 0) << "WARNING: async pool_create returned " << r << dendl;
+ error = true;
+ }
+ }
+ c->release();
+ retcodes->push_back(r);
+ }
+ if (error) {
+ return 0;
+ }
+
+ std::vector<librados::IoCtx> io_ctxs;
+ retcodes->clear();
+ for (auto pool : pools) {
+ io_ctxs.emplace_back();
+ int ret = rad->ioctx_create(pool.name.c_str(), io_ctxs.back());
+ if (ret < 0) {
+ ldout(rados_svc->cct, 0) << "WARNING: ioctx_create returned " << ret << dendl;
+ error = true;
+ }
+ retcodes->push_back(ret);
+ }
+ if (error) {
+ return 0;
+ }
+
+ completions.clear();
+ for (auto &io_ctx : io_ctxs) {
+ librados::PoolAsyncCompletion *c =
+ librados::Rados::pool_async_create_completion();
+ completions.push_back(c);
+ int ret = io_ctx.application_enable_async(pg_pool_t::APPLICATION_NAME_RGW,
+ false, c);
+ ceph_assert(ret == 0);
+ }
+
+ retcodes->clear();
+ for (auto c : completions) {
+ c->wait();
+ int ret = c->get_return_value();
+ if (ret == -EOPNOTSUPP) {
+ ret = 0;
+ } else if (ret < 0) {
+ ldout(rados_svc->cct, 0) << "WARNING: async application_enable returned " << ret
+ << dendl;
+ error = true;
+ }
+ c->release();
+ retcodes->push_back(ret);
+ }
+ return 0;
+}
+
+int RGWSI_RADOS::Pool::lookup()
+{
+ librados::Rados *rad = rados_svc->get_rados_handle();
+ int ret = rad->pool_lookup(pool.name.c_str());
+ if (ret < 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWSI_RADOS::Pool::List::init(const string& marker, RGWAccessListFilter *filter)
+{
+ if (ctx.initialized) {
+ return -EINVAL;
+ }
+
+ int r = pool.rados_svc->open_pool_ctx(pool.pool, ctx.ioctx);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::ObjectCursor oc;
+ if (!oc.from_str(marker)) {
+ ldout(pool.rados_svc->cct, 10) << "failed to parse cursor: " << marker << dendl;
+ return -EINVAL;
+ }
+
+ ctx.iter = ctx.ioctx.nobjects_begin(oc);
+ ctx.filter = filter;
+ ctx.initialized = true;
+
+ return 0;
+}
+
+int RGWSI_RADOS::Pool::List::get_next(int max,
+ std::list<string> *oids,
+ bool *is_truncated)
+{
+ if (!ctx.initialized) {
+ return -EINVAL;
+ }
+ vector<rgw_bucket_dir_entry> objs;
+ int r = pool.rados_svc->pool_iterate(ctx.ioctx, ctx.iter, max, objs, ctx.filter, is_truncated);
+ if (r < 0) {
+ if(r != -ENOENT) {
+ ldout(pool.rados_svc->cct, 10) << "failed to list objects pool_iterate returned r=" << r << dendl;
+ }
+ return r;
+ }
+
+ vector<rgw_bucket_dir_entry>::iterator iter;
+ for (auto& o : objs) {
+ oids->push_back(o.key.name);
+ }
+
+ return oids->size();
+}
+
+int RGWSI_RADOS::Handle::watch_flush()
+{
+ librados::Rados *rad = rados_svc->get_rados_handle();
+ return rad->watch_flush();
+}
diff --git a/src/rgw/services/svc_rados.h b/src/rgw/services/svc_rados.h
new file mode 100644
index 00000000..0453eb0c
--- /dev/null
+++ b/src/rgw/services/svc_rados.h
@@ -0,0 +1,178 @@
+#ifndef CEPH_RGW_SERVICES_RADOS_H
+#define CEPH_RGW_SERVICES_RADOS_H
+
+
+#include "rgw/rgw_service.h"
+
+#include "include/rados/librados.hpp"
+#include "common/async/yield_context.h"
+
+class RGWAccessListFilter {
+public:
+ virtual ~RGWAccessListFilter() {}
+ virtual bool filter(const string& name, string& key) = 0;
+};
+
+struct RGWAccessListFilterPrefix : public RGWAccessListFilter {
+ string prefix;
+
+ explicit RGWAccessListFilterPrefix(const string& _prefix) : prefix(_prefix) {}
+ bool filter(const string& name, string& key) override {
+ return (prefix.compare(key.substr(0, prefix.size())) == 0);
+ }
+};
+
+struct rgw_rados_ref {
+ rgw_raw_obj obj;
+ librados::IoCtx ioctx;
+};
+
+class RGWSI_RADOS : public RGWServiceInstance
+{
+ librados::Rados rados;
+
+ int do_start() override;
+
+ librados::Rados* get_rados_handle();
+ int open_pool_ctx(const rgw_pool& pool, librados::IoCtx& io_ctx);
+ int pool_iterate(librados::IoCtx& ioctx,
+ librados::NObjectIterator& iter,
+ uint32_t num, vector<rgw_bucket_dir_entry>& objs,
+ RGWAccessListFilter *filter,
+ bool *is_truncated);
+
+public:
+ RGWSI_RADOS(CephContext *cct) : RGWServiceInstance(cct) {}
+
+ void init() {}
+
+ uint64_t instance_id();
+
+ class Handle;
+
+ class Obj {
+ friend class RGWSI_RADOS;
+ friend Handle;
+
+ RGWSI_RADOS *rados_svc{nullptr};
+ rgw_rados_ref ref;
+
+ void init(const rgw_raw_obj& obj);
+
+ Obj(RGWSI_RADOS *_rados_svc, const rgw_raw_obj& _obj)
+ : rados_svc(_rados_svc) {
+ init(_obj);
+ }
+
+ public:
+ Obj() {}
+
+ int open();
+
+ int operate(librados::ObjectWriteOperation *op, optional_yield y);
+ int operate(librados::ObjectReadOperation *op, bufferlist *pbl,
+ optional_yield y);
+ int aio_operate(librados::AioCompletion *c, librados::ObjectWriteOperation *op);
+ int aio_operate(librados::AioCompletion *c, librados::ObjectReadOperation *op,
+ bufferlist *pbl);
+
+ int watch(uint64_t *handle, librados::WatchCtx2 *ctx);
+ int aio_watch(librados::AioCompletion *c, uint64_t *handle, librados::WatchCtx2 *ctx);
+ int unwatch(uint64_t handle);
+ int notify(bufferlist& bl,
+ uint64_t timeout_ms,
+ bufferlist *pbl);
+ void notify_ack(uint64_t notify_id,
+ uint64_t cookie,
+ bufferlist& bl);
+
+ uint64_t get_last_version();
+
+ rgw_rados_ref& get_ref() { return ref; }
+ const rgw_rados_ref& get_ref() const { return ref; }
+ };
+
+ class Pool {
+ friend class RGWSI_RADOS;
+ friend Handle;
+
+ RGWSI_RADOS *rados_svc{nullptr};
+ rgw_pool pool;
+
+ Pool(RGWSI_RADOS *_rados_svc,
+ const rgw_pool& _pool) : rados_svc(_rados_svc),
+ pool(_pool) {}
+
+ Pool(RGWSI_RADOS *_rados_svc) : rados_svc(_rados_svc) {}
+ public:
+ Pool() {}
+
+ int create();
+ int create(const std::vector<rgw_pool>& pools, std::vector<int> *retcodes);
+ int lookup();
+
+ struct List {
+ Pool& pool;
+
+ struct Ctx {
+ bool initialized{false};
+ librados::IoCtx ioctx;
+ librados::NObjectIterator iter;
+ RGWAccessListFilter *filter{nullptr};
+ } ctx;
+
+ List(Pool& _pool) : pool(_pool) {}
+
+ int init(const string& marker, RGWAccessListFilter *filter = nullptr);
+ int get_next(int max,
+ std::list<string> *oids,
+ bool *is_truncated);
+ };
+
+ List op() {
+ return List(*this);
+ }
+
+ friend List;
+ };
+
+ class Handle {
+ friend class RGWSI_RADOS;
+
+ RGWSI_RADOS *rados_svc{nullptr};
+
+ Handle(RGWSI_RADOS *_rados_svc) : rados_svc(_rados_svc) {}
+ public:
+ Obj obj(const rgw_raw_obj& o) {
+ return Obj(rados_svc, o);
+ }
+
+ Pool pool(const rgw_pool& p) {
+ return Pool(rados_svc, p);
+ }
+
+ int watch_flush();
+ };
+
+ Handle handle() {
+ return Handle(this);
+ }
+
+ Obj obj(const rgw_raw_obj& o) {
+ return Obj(this, o);
+ }
+
+ Pool pool() {
+ return Pool(this);
+ }
+
+ Pool pool(const rgw_pool& p) {
+ return Pool(this, p);
+ }
+
+ friend Obj;
+ friend Pool;
+ friend Pool::List;
+};
+
+#endif
diff --git a/src/rgw/services/svc_sync_modules.cc b/src/rgw/services/svc_sync_modules.cc
new file mode 100644
index 00000000..ca6a7a30
--- /dev/null
+++ b/src/rgw/services/svc_sync_modules.cc
@@ -0,0 +1,15 @@
+#include "svc_sync_modules.h"
+
+#include "rgw/rgw_sync_module.h"
+
+void RGWSI_SyncModules::init()
+{
+ sync_modules_manager = new RGWSyncModulesManager();
+ rgw_register_sync_modules(sync_modules_manager);
+}
+
+RGWSI_SyncModules::~RGWSI_SyncModules()
+{
+ delete sync_modules_manager;
+}
+
diff --git a/src/rgw/services/svc_sync_modules.h b/src/rgw/services/svc_sync_modules.h
new file mode 100644
index 00000000..19c4ec57
--- /dev/null
+++ b/src/rgw/services/svc_sync_modules.h
@@ -0,0 +1,26 @@
+#ifndef CEPH_RGW_SERVICES_SYNC_MODULES_H
+#define CEPH_RGW_SERVICES_SYNC_MODULES_H
+
+
+#include "rgw/rgw_service.h"
+
+
+class RGWSyncModulesManager;
+
+class RGWSI_SyncModules : public RGWServiceInstance
+{
+ RGWSyncModulesManager *sync_modules_manager{nullptr};
+
+public:
+ RGWSI_SyncModules(CephContext *cct): RGWServiceInstance(cct) {}
+ ~RGWSI_SyncModules();
+
+ RGWSyncModulesManager *get_manager() {
+ return sync_modules_manager;
+ }
+
+ void init();
+};
+
+#endif
+
diff --git a/src/rgw/services/svc_sys_obj.cc b/src/rgw/services/svc_sys_obj.cc
new file mode 100644
index 00000000..1eda37f8
--- /dev/null
+++ b/src/rgw/services/svc_sys_obj.cc
@@ -0,0 +1,192 @@
+#include "svc_sys_obj.h"
+#include "svc_sys_obj_core.h"
+#include "svc_rados.h"
+#include "svc_zone.h"
+
+#include "rgw/rgw_zone.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+RGWSysObjectCtx RGWSI_SysObj::init_obj_ctx()
+{
+ return RGWSysObjectCtx(this);
+}
+
+RGWSI_SysObj::Obj RGWSI_SysObj::get_obj(RGWSysObjectCtx& obj_ctx, const rgw_raw_obj& obj)
+{
+ return Obj(core_svc, obj_ctx, obj);
+}
+
+void RGWSI_SysObj::Obj::invalidate()
+{
+ ctx.invalidate(obj);
+}
+
+int RGWSI_SysObj::Obj::ROp::stat()
+{
+ RGWSI_SysObj_Core *svc = source.core_svc;
+ rgw_raw_obj& obj = source.obj;
+
+ return svc->stat(source.get_ctx(), state, obj,
+ attrs, raw_attrs,
+ lastmod, obj_size,
+ objv_tracker);
+}
+
+int RGWSI_SysObj::Obj::ROp::read(int64_t ofs, int64_t end, bufferlist *bl)
+{
+ RGWSI_SysObj_Core *svc = source.core_svc;
+ rgw_raw_obj& obj = source.get_obj();
+
+ return svc->read(source.get_ctx(), state,
+ objv_tracker,
+ obj, bl, ofs, end,
+ attrs,
+ raw_attrs,
+ cache_info,
+ refresh_version);
+}
+
+int RGWSI_SysObj::Obj::ROp::get_attr(const char *name, bufferlist *dest)
+{
+ RGWSI_SysObj_Core *svc = source.core_svc;
+ rgw_raw_obj& obj = source.get_obj();
+
+ return svc->get_attr(obj, name, dest);
+}
+
+int RGWSI_SysObj::Obj::WOp::remove()
+{
+ RGWSI_SysObj_Core *svc = source.core_svc;
+ rgw_raw_obj& obj = source.get_obj();
+
+ return svc->remove(source.get_ctx(),
+ objv_tracker,
+ obj);
+}
+
+int RGWSI_SysObj::Obj::WOp::write(bufferlist& bl)
+{
+ RGWSI_SysObj_Core *svc = source.core_svc;
+ rgw_raw_obj& obj = source.get_obj();
+
+ return svc->write(obj, pmtime, attrs, exclusive,
+ bl, objv_tracker, mtime);
+}
+
+int RGWSI_SysObj::Obj::WOp::write_data(bufferlist& bl)
+{
+ RGWSI_SysObj_Core *svc = source.core_svc;
+ rgw_raw_obj& obj = source.get_obj();
+
+ return svc->write_data(obj, bl, exclusive, objv_tracker);
+}
+
+int RGWSI_SysObj::Obj::WOp::write_attrs()
+{
+ RGWSI_SysObj_Core *svc = source.core_svc;
+ rgw_raw_obj& obj = source.get_obj();
+
+ return svc->set_attrs(obj, attrs, nullptr, objv_tracker);
+}
+
+int RGWSI_SysObj::Obj::WOp::write_attr(const char *name, bufferlist& bl)
+{
+ RGWSI_SysObj_Core *svc = source.core_svc;
+ rgw_raw_obj& obj = source.get_obj();
+
+ map<string, bufferlist> m;
+ m[name] = bl;
+
+ return svc->set_attrs(obj, m, nullptr, objv_tracker);
+}
+
+int RGWSI_SysObj::Pool::Op::list_prefixed_objs(const string& prefix, list<string> *result)
+{
+ bool is_truncated;
+
+ auto rados_pool = source.rados_svc->pool(source.pool);
+
+ auto op = rados_pool.op();
+
+ RGWAccessListFilterPrefix filter(prefix);
+
+ int r = op.init(string(), &filter);
+ if (r < 0) {
+ return r;
+ }
+
+ do {
+ list<string> oids;
+#define MAX_OBJS_DEFAULT 1000
+ int r = op.get_next(MAX_OBJS_DEFAULT, &oids, &is_truncated);
+ if (r < 0) {
+ return r;
+ }
+ for (auto& val : oids) {
+ if (val.size() > prefix.size()) {
+ result->push_back(val.substr(prefix.size()));
+ }
+ }
+ } while (is_truncated);
+
+ return 0;
+}
+
+int RGWSI_SysObj::Obj::OmapOp::get_all(std::map<string, bufferlist> *m)
+{
+ RGWSI_SysObj_Core *svc = source.core_svc;
+ rgw_raw_obj& obj = source.obj;
+
+ return svc->omap_get_all(obj, m);
+}
+
+int RGWSI_SysObj::Obj::OmapOp::get_vals(const string& marker,
+ uint64_t count,
+ std::map<string, bufferlist> *m,
+ bool *pmore)
+{
+ RGWSI_SysObj_Core *svc = source.core_svc;
+ rgw_raw_obj& obj = source.obj;
+
+ return svc->omap_get_vals(obj, marker, count, m, pmore);
+}
+
+int RGWSI_SysObj::Obj::OmapOp::set(const std::string& key, bufferlist& bl)
+{
+ RGWSI_SysObj_Core *svc = source.core_svc;
+ rgw_raw_obj& obj = source.obj;
+
+ return svc->omap_set(obj, key, bl, must_exist);
+}
+
+int RGWSI_SysObj::Obj::OmapOp::set(const map<std::string, bufferlist>& m)
+{
+ RGWSI_SysObj_Core *svc = source.core_svc;
+ rgw_raw_obj& obj = source.obj;
+
+ return svc->omap_set(obj, m, must_exist);
+}
+
+int RGWSI_SysObj::Obj::OmapOp::del(const std::string& key)
+{
+ RGWSI_SysObj_Core *svc = source.core_svc;
+ rgw_raw_obj& obj = source.obj;
+
+ return svc->omap_del(obj, key);
+}
+
+int RGWSI_SysObj::Obj::WNOp::notify(bufferlist& bl,
+ uint64_t timeout_ms,
+ bufferlist *pbl)
+{
+ RGWSI_SysObj_Core *svc = source.core_svc;
+ rgw_raw_obj& obj = source.obj;
+
+ return svc->notify(obj, bl, timeout_ms, pbl);
+}
+
+RGWSI_Zone *RGWSI_SysObj::get_zone_svc()
+{
+ return core_svc->get_zone_svc();
+}
diff --git a/src/rgw/services/svc_sys_obj.h b/src/rgw/services/svc_sys_obj.h
new file mode 100644
index 00000000..f6cd77ce
--- /dev/null
+++ b/src/rgw/services/svc_sys_obj.h
@@ -0,0 +1,275 @@
+#ifndef CEPH_RGW_SERVICES_SYS_OBJ_H
+#define CEPH_RGW_SERVICES_SYS_OBJ_H
+
+
+#include "rgw/rgw_service.h"
+
+#include "svc_rados.h"
+#include "svc_sys_obj_core.h"
+
+
+class RGWSI_Zone;
+class RGWSI_SysObj;
+class RGWSysObjectCtx;
+
+struct rgw_cache_entry_info;
+
+class RGWSI_SysObj : public RGWServiceInstance
+{
+ friend struct RGWServices_Def;
+
+public:
+ class Obj {
+ friend class ROp;
+
+ RGWSI_SysObj_Core *core_svc;
+ RGWSysObjectCtx& ctx;
+ rgw_raw_obj obj;
+
+ public:
+ Obj(RGWSI_SysObj_Core *_core_svc,
+ RGWSysObjectCtx& _ctx,
+ const rgw_raw_obj& _obj) : core_svc(_core_svc),
+ ctx(_ctx),
+ obj(_obj) {}
+
+ void invalidate();
+
+ RGWSysObjectCtx& get_ctx() {
+ return ctx;
+ }
+
+ rgw_raw_obj& get_obj() {
+ return obj;
+ }
+
+ struct ROp {
+ Obj& source;
+
+ RGWSI_SysObj_Core::GetObjState state;
+
+ RGWObjVersionTracker *objv_tracker{nullptr};
+ map<string, bufferlist> *attrs{nullptr};
+ bool raw_attrs{false};
+ boost::optional<obj_version> refresh_version{boost::none};
+ ceph::real_time *lastmod{nullptr};
+ uint64_t *obj_size{nullptr};
+ rgw_cache_entry_info *cache_info{nullptr};
+
+ ROp& set_objv_tracker(RGWObjVersionTracker *_objv_tracker) {
+ objv_tracker = _objv_tracker;
+ return *this;
+ }
+
+ ROp& set_last_mod(ceph::real_time *_lastmod) {
+ lastmod = _lastmod;
+ return *this;
+ }
+
+ ROp& set_obj_size(uint64_t *_obj_size) {
+ obj_size = _obj_size;
+ return *this;
+ }
+
+ ROp& set_attrs(map<string, bufferlist> *_attrs) {
+ attrs = _attrs;
+ return *this;
+ }
+
+ ROp& set_raw_attrs(bool ra) {
+ raw_attrs = ra;
+ return *this;
+ }
+
+ ROp& set_refresh_version(boost::optional<obj_version>& rf) {
+ refresh_version = rf;
+ return *this;
+ }
+
+ ROp& set_cache_info(rgw_cache_entry_info *ci) {
+ cache_info = ci;
+ return *this;
+ }
+
+ ROp(Obj& _source) : source(_source) {}
+
+ int stat();
+ int read(int64_t ofs, int64_t end, bufferlist *pbl);
+ int read(bufferlist *pbl) {
+ return read(0, -1, pbl);
+ }
+ int get_attr(const char *name, bufferlist *dest);
+ };
+
+ struct WOp {
+ Obj& source;
+
+ RGWObjVersionTracker *objv_tracker{nullptr};
+ map<string, bufferlist> attrs;
+ ceph::real_time mtime;
+ ceph::real_time *pmtime{nullptr};
+ bool exclusive{false};
+
+ WOp& set_objv_tracker(RGWObjVersionTracker *_objv_tracker) {
+ objv_tracker = _objv_tracker;
+ return *this;
+ }
+
+ WOp& set_attrs(map<string, bufferlist>& _attrs) {
+ attrs = _attrs;
+ return *this;
+ }
+
+ WOp& set_attrs(map<string, bufferlist>&& _attrs) {
+ attrs = _attrs;
+ return *this;
+ }
+
+ WOp& set_mtime(const ceph::real_time& _mtime) {
+ mtime = _mtime;
+ return *this;
+ }
+
+ WOp& set_pmtime(ceph::real_time *_pmtime) {
+ pmtime = _pmtime;
+ return *this;
+ }
+
+ WOp& set_exclusive(bool _exclusive = true) {
+ exclusive = _exclusive;
+ return *this;
+ }
+
+ WOp(Obj& _source) : source(_source) {}
+
+ int remove();
+ int write(bufferlist& bl);
+
+ int write_data(bufferlist& bl); /* write data only */
+ int write_attrs(); /* write attrs only */
+ int write_attr(const char *name, bufferlist& bl); /* write attrs only */
+ };
+
+ struct OmapOp {
+ Obj& source;
+
+ bool must_exist{false};
+
+ OmapOp& set_must_exist(bool _must_exist = true) {
+ must_exist = _must_exist;
+ return *this;
+ }
+
+ OmapOp(Obj& _source) : source(_source) {}
+
+ int get_all(std::map<string, bufferlist> *m);
+ int get_vals(const string& marker,
+ uint64_t count,
+ std::map<string, bufferlist> *m,
+ bool *pmore);
+ int set(const std::string& key, bufferlist& bl);
+ int set(const map<std::string, bufferlist>& m);
+ int del(const std::string& key);
+ };
+
+ struct WNOp {
+ Obj& source;
+
+ WNOp(Obj& _source) : source(_source) {}
+
+ int notify(bufferlist& bl,
+ uint64_t timeout_ms,
+ bufferlist *pbl);
+ };
+ ROp rop() {
+ return ROp(*this);
+ }
+
+ WOp wop() {
+ return WOp(*this);
+ }
+
+ OmapOp omap() {
+ return OmapOp(*this);
+ }
+
+ WNOp wn() {
+ return WNOp(*this);
+ }
+ };
+
+ class Pool {
+ friend class Op;
+
+ RGWSI_RADOS *rados_svc;
+ RGWSI_SysObj_Core *core_svc;
+ rgw_pool pool;
+
+ public:
+ Pool(RGWSI_RADOS *_rados_svc,
+ RGWSI_SysObj_Core *_core_svc,
+ const rgw_pool& _pool) : rados_svc(_rados_svc),
+ core_svc(_core_svc),
+ pool(_pool) {}
+
+ rgw_pool& get_pool() {
+ return pool;
+ }
+
+ struct Op {
+ Pool& source;
+
+ Op(Pool& _source) : source(_source) {}
+
+ int list_prefixed_objs(const std::string& prefix, std::list<std::string> *result);
+ };
+
+ Op op() {
+ return Op(*this);
+ }
+ };
+
+ friend class Obj;
+ friend class Obj::ROp;
+ friend class Obj::WOp;
+ friend class Pool;
+ friend class Pool::Op;
+
+protected:
+ RGWSI_RADOS *rados_svc{nullptr};
+ RGWSI_SysObj_Core *core_svc{nullptr};
+
+ void init(RGWSI_RADOS *_rados_svc,
+ RGWSI_SysObj_Core *_core_svc) {
+ rados_svc = _rados_svc;
+ core_svc = _core_svc;
+ }
+
+public:
+ RGWSI_SysObj(CephContext *cct): RGWServiceInstance(cct) {}
+
+ RGWSysObjectCtx init_obj_ctx();
+ Obj get_obj(RGWSysObjectCtx& obj_ctx, const rgw_raw_obj& obj);
+
+ Pool get_pool(const rgw_pool& pool) {
+ return Pool(rados_svc, core_svc, pool);
+ }
+
+ RGWSI_Zone *get_zone_svc();
+};
+
+using RGWSysObj = RGWSI_SysObj::Obj;
+
+class RGWSysObjectCtx : public RGWSysObjectCtxBase
+{
+ RGWSI_SysObj *sysobj_svc;
+public:
+ RGWSysObjectCtx(RGWSI_SysObj *_sysobj_svc) : sysobj_svc(_sysobj_svc) {}
+
+ RGWSI_SysObj::Obj get_obj(const rgw_raw_obj& obj) {
+ return sysobj_svc->get_obj(*this, obj);
+ }
+};
+
+#endif
+
diff --git a/src/rgw/services/svc_sys_obj_cache.cc b/src/rgw/services/svc_sys_obj_cache.cc
new file mode 100644
index 00000000..9130e054
--- /dev/null
+++ b/src/rgw/services/svc_sys_obj_cache.cc
@@ -0,0 +1,506 @@
+#include "svc_sys_obj_cache.h"
+#include "svc_zone.h"
+#include "svc_notify.h"
+
+#include "rgw/rgw_zone.h"
+#include "rgw/rgw_tools.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+class RGWSI_SysObj_Cache_CB : public RGWSI_Notify::CB
+{
+ RGWSI_SysObj_Cache *svc;
+public:
+ RGWSI_SysObj_Cache_CB(RGWSI_SysObj_Cache *_svc) : svc(_svc) {}
+ int watch_cb(uint64_t notify_id,
+ uint64_t cookie,
+ uint64_t notifier_id,
+ bufferlist& bl) {
+ return svc->watch_cb(notify_id, cookie, notifier_id, bl);
+ }
+
+ void set_enabled(bool status) {
+ svc->set_enabled(status);
+ }
+};
+
+int RGWSI_SysObj_Cache::do_start()
+{
+ int r = RGWSI_SysObj_Core::do_start();
+ if (r < 0) {
+ return r;
+ }
+
+ r = notify_svc->start();
+ if (r < 0) {
+ return r;
+ }
+
+ assert(notify_svc->is_started());
+
+ cb.reset(new RGWSI_SysObj_Cache_CB(this));
+
+ notify_svc->register_watch_cb(cb.get());
+
+ return 0;
+}
+
+static string normal_name(rgw_pool& pool, const std::string& oid) {
+ std::string buf;
+ buf.reserve(pool.name.size() + pool.ns.size() + oid.size() + 2);
+ buf.append(pool.name).append("+").append(pool.ns).append("+").append(oid);
+ return buf;
+}
+
+void RGWSI_SysObj_Cache::normalize_pool_and_obj(const rgw_pool& src_pool, const string& src_obj, rgw_pool& dst_pool, string& dst_obj)
+{
+ if (src_obj.size()) {
+ dst_pool = src_pool;
+ dst_obj = src_obj;
+ } else {
+ dst_pool = zone_svc->get_zone_params().domain_root;
+ dst_obj = src_pool.name;
+ }
+}
+
+
+int RGWSI_SysObj_Cache::remove(RGWSysObjectCtxBase& obj_ctx,
+ RGWObjVersionTracker *objv_tracker,
+ const rgw_raw_obj& obj)
+
+{
+ rgw_pool pool;
+ string oid;
+ normalize_pool_and_obj(obj.pool, obj.oid, pool, oid);
+
+ string name = normal_name(pool, oid);
+ cache.remove(name);
+
+ ObjectCacheInfo info;
+ int r = distribute_cache(name, obj, info, REMOVE_OBJ);
+ if (r < 0) {
+ ldout(cct, 0) << "ERROR: " << __func__ << "(): failed to distribute cache: r=" << r << dendl;
+ }
+
+ return RGWSI_SysObj_Core::remove(obj_ctx, objv_tracker, obj);
+}
+
+int RGWSI_SysObj_Cache::read(RGWSysObjectCtxBase& obj_ctx,
+ GetObjState& read_state,
+ RGWObjVersionTracker *objv_tracker,
+ const rgw_raw_obj& obj,
+ bufferlist *obl, off_t ofs, off_t end,
+ map<string, bufferlist> *attrs,
+ bool raw_attrs,
+ rgw_cache_entry_info *cache_info,
+ boost::optional<obj_version> refresh_version)
+{
+ rgw_pool pool;
+ string oid;
+ if (ofs != 0) {
+ return RGWSI_SysObj_Core::read(obj_ctx, read_state, objv_tracker,
+ obj, obl, ofs, end, attrs, raw_attrs,
+ cache_info, refresh_version);
+ }
+
+ normalize_pool_and_obj(obj.pool, obj.oid, pool, oid);
+ string name = normal_name(pool, oid);
+
+ ObjectCacheInfo info;
+
+ uint32_t flags = (end != 0 ? CACHE_FLAG_DATA : 0);
+ if (objv_tracker)
+ flags |= CACHE_FLAG_OBJV;
+ if (attrs)
+ flags |= CACHE_FLAG_XATTRS;
+
+ int r = cache.get(name, info, flags, cache_info);
+ if (r == 0 &&
+ (!refresh_version || !info.version.compare(&(*refresh_version)))) {
+ if (info.status < 0)
+ return info.status;
+
+ bufferlist& bl = info.data;
+
+ bufferlist::iterator i = bl.begin();
+
+ obl->clear();
+
+ i.copy_all(*obl);
+ if (objv_tracker)
+ objv_tracker->read_version = info.version;
+ if (attrs) {
+ if (raw_attrs) {
+ *attrs = info.xattrs;
+ } else {
+ rgw_filter_attrset(info.xattrs, RGW_ATTR_PREFIX, attrs);
+ }
+ }
+ return obl->length();
+ }
+ if(r == -ENODATA)
+ return -ENOENT;
+
+ map<string, bufferlist> unfiltered_attrset;
+ r = RGWSI_SysObj_Core::read(obj_ctx, read_state, objv_tracker,
+ obj, obl, ofs, end,
+ (attrs ? &unfiltered_attrset : nullptr),
+ true, /* cache unfiltered attrs */
+ cache_info,
+ refresh_version);
+ if (r < 0) {
+ if (r == -ENOENT) { // only update ENOENT, we'd rather retry other errors
+ info.status = r;
+ cache.put(name, info, cache_info);
+ }
+ return r;
+ }
+
+ if (obl->length() == end + 1) {
+ /* in this case, most likely object contains more data, we can't cache it */
+ flags &= ~CACHE_FLAG_DATA;
+ } else {
+ bufferptr p(r);
+ bufferlist& bl = info.data;
+ bl.clear();
+ bufferlist::iterator o = obl->begin();
+ o.copy_all(bl);
+ }
+
+ info.status = 0;
+ info.flags = flags;
+ if (objv_tracker) {
+ info.version = objv_tracker->read_version;
+ }
+ if (attrs) {
+ info.xattrs = std::move(unfiltered_attrset);
+ if (raw_attrs) {
+ *attrs = info.xattrs;
+ } else {
+ rgw_filter_attrset(info.xattrs, RGW_ATTR_PREFIX, attrs);
+ }
+ }
+ cache.put(name, info, cache_info);
+ return r;
+}
+
+int RGWSI_SysObj_Cache::get_attr(const rgw_raw_obj& obj,
+ const char *attr_name,
+ bufferlist *dest)
+{
+ rgw_pool pool;
+ string oid;
+
+ normalize_pool_and_obj(obj.pool, obj.oid, pool, oid);
+ string name = normal_name(pool, oid);
+
+ ObjectCacheInfo info;
+
+ uint32_t flags = CACHE_FLAG_XATTRS;
+
+ int r = cache.get(name, info, flags, nullptr);
+ if (r == 0) {
+ if (info.status < 0)
+ return info.status;
+
+ auto iter = info.xattrs.find(attr_name);
+ if (iter == info.xattrs.end()) {
+ return -ENODATA;
+ }
+
+ *dest = iter->second;
+ return dest->length();
+ } else if (r == -ENODATA) {
+ return -ENOENT;
+ }
+ /* don't try to cache this one */
+ return RGWSI_SysObj_Core::get_attr(obj, attr_name, dest);
+}
+
+int RGWSI_SysObj_Cache::set_attrs(const rgw_raw_obj& obj,
+ map<string, bufferlist>& attrs,
+ map<string, bufferlist> *rmattrs,
+ RGWObjVersionTracker *objv_tracker)
+{
+ rgw_pool pool;
+ string oid;
+ normalize_pool_and_obj(obj.pool, obj.oid, pool, oid);
+ ObjectCacheInfo info;
+ info.xattrs = attrs;
+ if (rmattrs) {
+ info.rm_xattrs = *rmattrs;
+ }
+ info.status = 0;
+ info.flags = CACHE_FLAG_MODIFY_XATTRS;
+ int ret = RGWSI_SysObj_Core::set_attrs(obj, attrs, rmattrs, objv_tracker);
+ string name = normal_name(pool, oid);
+ if (ret >= 0) {
+ if (objv_tracker && objv_tracker->read_version.ver) {
+ info.version = objv_tracker->read_version;
+ info.flags |= CACHE_FLAG_OBJV;
+ }
+ cache.put(name, info, NULL);
+ int r = distribute_cache(name, obj, info, UPDATE_OBJ);
+ if (r < 0)
+ ldout(cct, 0) << "ERROR: failed to distribute cache for " << obj << dendl;
+ } else {
+ cache.remove(name);
+ }
+
+ return ret;
+}
+
+int RGWSI_SysObj_Cache::write(const rgw_raw_obj& obj,
+ real_time *pmtime,
+ map<std::string, bufferlist>& attrs,
+ bool exclusive,
+ const bufferlist& data,
+ RGWObjVersionTracker *objv_tracker,
+ real_time set_mtime)
+{
+ rgw_pool pool;
+ string oid;
+ normalize_pool_and_obj(obj.pool, obj.oid, pool, oid);
+ ObjectCacheInfo info;
+ info.xattrs = attrs;
+ info.status = 0;
+ info.data = data;
+ info.flags = CACHE_FLAG_XATTRS | CACHE_FLAG_DATA | CACHE_FLAG_META;
+ ceph::real_time result_mtime;
+ int ret = RGWSI_SysObj_Core::write(obj, &result_mtime, attrs,
+ exclusive, data,
+ objv_tracker, set_mtime);
+ if (pmtime) {
+ *pmtime = result_mtime;
+ }
+ if (objv_tracker && objv_tracker->read_version.ver) {
+ info.version = objv_tracker->read_version;
+ info.flags |= CACHE_FLAG_OBJV;
+ }
+ info.meta.mtime = result_mtime;
+ info.meta.size = data.length();
+ string name = normal_name(pool, oid);
+ if (ret >= 0) {
+ cache.put(name, info, NULL);
+ int r = distribute_cache(name, obj, info, UPDATE_OBJ);
+ if (r < 0)
+ ldout(cct, 0) << "ERROR: failed to distribute cache for " << obj << dendl;
+ } else {
+ cache.remove(name);
+ }
+
+ return ret;
+}
+
+int RGWSI_SysObj_Cache::write_data(const rgw_raw_obj& obj,
+ const bufferlist& data,
+ bool exclusive,
+ RGWObjVersionTracker *objv_tracker)
+{
+ rgw_pool pool;
+ string oid;
+ normalize_pool_and_obj(obj.pool, obj.oid, pool, oid);
+
+ ObjectCacheInfo info;
+ info.data = data;
+ info.meta.size = data.length();
+ info.status = 0;
+ info.flags = CACHE_FLAG_DATA;
+
+ int ret = RGWSI_SysObj_Core::write_data(obj, data, exclusive, objv_tracker);
+ string name = normal_name(pool, oid);
+ if (ret >= 0) {
+ if (objv_tracker && objv_tracker->read_version.ver) {
+ info.version = objv_tracker->read_version;
+ info.flags |= CACHE_FLAG_OBJV;
+ }
+ cache.put(name, info, NULL);
+ int r = distribute_cache(name, obj, info, UPDATE_OBJ);
+ if (r < 0)
+ ldout(cct, 0) << "ERROR: failed to distribute cache for " << obj << dendl;
+ } else {
+ cache.remove(name);
+ }
+
+ return ret;
+}
+
+int RGWSI_SysObj_Cache::raw_stat(const rgw_raw_obj& obj, uint64_t *psize, real_time *pmtime, uint64_t *pepoch,
+ map<string, bufferlist> *attrs, bufferlist *first_chunk,
+ RGWObjVersionTracker *objv_tracker)
+{
+ rgw_pool pool;
+ string oid;
+ normalize_pool_and_obj(obj.pool, obj.oid, pool, oid);
+
+ string name = normal_name(pool, oid);
+
+ uint64_t size;
+ real_time mtime;
+ uint64_t epoch;
+
+ ObjectCacheInfo info;
+ uint32_t flags = CACHE_FLAG_META | CACHE_FLAG_XATTRS;
+ if (objv_tracker)
+ flags |= CACHE_FLAG_OBJV;
+ int r = cache.get(name, info, flags, NULL);
+ if (r == 0) {
+ if (info.status < 0)
+ return info.status;
+
+ size = info.meta.size;
+ mtime = info.meta.mtime;
+ epoch = info.epoch;
+ if (objv_tracker)
+ objv_tracker->read_version = info.version;
+ goto done;
+ }
+ if (r == -ENODATA) {
+ return -ENOENT;
+ }
+ r = RGWSI_SysObj_Core::raw_stat(obj, &size, &mtime, &epoch, &info.xattrs, first_chunk, objv_tracker);
+ if (r < 0) {
+ if (r == -ENOENT) {
+ info.status = r;
+ cache.put(name, info, NULL);
+ }
+ return r;
+ }
+ info.status = 0;
+ info.epoch = epoch;
+ info.meta.mtime = mtime;
+ info.meta.size = size;
+ info.flags = CACHE_FLAG_META | CACHE_FLAG_XATTRS;
+ if (objv_tracker) {
+ info.flags |= CACHE_FLAG_OBJV;
+ info.version = objv_tracker->read_version;
+ }
+ cache.put(name, info, NULL);
+done:
+ if (psize)
+ *psize = size;
+ if (pmtime)
+ *pmtime = mtime;
+ if (pepoch)
+ *pepoch = epoch;
+ if (attrs)
+ *attrs = info.xattrs;
+ return 0;
+}
+
+int RGWSI_SysObj_Cache::distribute_cache(const string& normal_name, const rgw_raw_obj& obj, ObjectCacheInfo& obj_info, int op)
+{
+ RGWCacheNotifyInfo info;
+
+ info.op = op;
+
+ info.obj_info = obj_info;
+ info.obj = obj;
+ bufferlist bl;
+ encode(info, bl);
+ return notify_svc->distribute(normal_name, bl);
+}
+
+int RGWSI_SysObj_Cache::watch_cb(uint64_t notify_id,
+ uint64_t cookie,
+ uint64_t notifier_id,
+ bufferlist& bl)
+{
+ RGWCacheNotifyInfo info;
+
+ try {
+ auto iter = bl.cbegin();
+ decode(info, iter);
+ } catch (buffer::end_of_buffer& err) {
+ ldout(cct, 0) << "ERROR: got bad notification" << dendl;
+ return -EIO;
+ } catch (buffer::error& err) {
+ ldout(cct, 0) << "ERROR: buffer::error" << dendl;
+ return -EIO;
+ }
+
+ rgw_pool pool;
+ string oid;
+ normalize_pool_and_obj(info.obj.pool, info.obj.oid, pool, oid);
+ string name = normal_name(pool, oid);
+
+ switch (info.op) {
+ case UPDATE_OBJ:
+ cache.put(name, info.obj_info, NULL);
+ break;
+ case REMOVE_OBJ:
+ cache.remove(name);
+ break;
+ default:
+ ldout(cct, 0) << "WARNING: got unknown notification op: " << info.op << dendl;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+void RGWSI_SysObj_Cache::set_enabled(bool status)
+{
+ cache.set_enabled(status);
+}
+
+bool RGWSI_SysObj_Cache::chain_cache_entry(std::initializer_list<rgw_cache_entry_info *> cache_info_entries,
+ RGWChainedCache::Entry *chained_entry)
+{
+ return cache.chain_cache_entry(cache_info_entries, chained_entry);
+}
+
+void RGWSI_SysObj_Cache::register_chained_cache(RGWChainedCache *cc)
+{
+ cache.chain_cache(cc);
+}
+
+void RGWSI_SysObj_Cache::unregister_chained_cache(RGWChainedCache *cc)
+{
+ cache.unchain_cache(cc);
+}
+
+static void cache_list_dump_helper(Formatter* f,
+ const std::string& name,
+ const ceph::real_time mtime,
+ const std::uint64_t size)
+{
+ f->dump_string("name", name);
+ f->dump_string("mtime", ceph::to_iso_8601(mtime));
+ f->dump_unsigned("size", size);
+}
+
+void RGWSI_SysObj_Cache::call_list(const std::optional<std::string>& filter, Formatter* f)
+{
+ cache.for_each(
+ [this, &filter, f] (const string& name, const ObjectCacheEntry& entry) {
+ if (!filter || name.find(*filter) != name.npos) {
+ cache_list_dump_helper(f, name, entry.info.meta.mtime,
+ entry.info.meta.size);
+ }
+ });
+}
+
+int RGWSI_SysObj_Cache::call_inspect(const std::string& target, Formatter* f)
+{
+ if (const auto entry = cache.get(target)) {
+ f->open_object_section("cache_entry");
+ f->dump_string("name", target.c_str());
+ entry->dump(f);
+ f->close_section();
+ return true;
+ } else {
+ return false;
+ }
+}
+
+int RGWSI_SysObj_Cache::call_erase(const std::string& target)
+{
+ return cache.remove(target);
+}
+
+int RGWSI_SysObj_Cache::call_zap()
+{
+ cache.invalidate_all();
+ return 0;
+}
diff --git a/src/rgw/services/svc_sys_obj_cache.h b/src/rgw/services/svc_sys_obj_cache.h
new file mode 100644
index 00000000..e48b64f2
--- /dev/null
+++ b/src/rgw/services/svc_sys_obj_cache.h
@@ -0,0 +1,176 @@
+
+#ifndef CEPH_RGW_SERVICES_SYS_OBJ_CACHE_H
+#define CEPH_RGW_SERVICES_SYS_OBJ_CACHE_H
+
+
+#include "rgw/rgw_service.h"
+#include "rgw/rgw_cache.h"
+
+#include "svc_sys_obj_core.h"
+
+class RGWSI_Notify;
+
+class RGWSI_SysObj_Cache_CB;
+
+class RGWSI_SysObj_Cache : public RGWSI_SysObj_Core
+{
+ friend class RGWSI_SysObj_Cache_CB;
+ friend class RGWServices_Def;
+
+ RGWSI_Notify *notify_svc{nullptr};
+ ObjectCache cache;
+
+ std::shared_ptr<RGWSI_SysObj_Cache_CB> cb;
+
+ void normalize_pool_and_obj(const rgw_pool& src_pool, const string& src_obj, rgw_pool& dst_pool, string& dst_obj);
+protected:
+ void init(RGWSI_RADOS *_rados_svc,
+ RGWSI_Zone *_zone_svc,
+ RGWSI_Notify *_notify_svc) {
+ core_init(_rados_svc, _zone_svc);
+ notify_svc = _notify_svc;
+ }
+
+ int do_start() override;
+
+ int raw_stat(const rgw_raw_obj& obj, uint64_t *psize, real_time *pmtime, uint64_t *epoch,
+ map<string, bufferlist> *attrs, bufferlist *first_chunk,
+ RGWObjVersionTracker *objv_tracker) override;
+
+ int read(RGWSysObjectCtxBase& obj_ctx,
+ GetObjState& read_state,
+ RGWObjVersionTracker *objv_tracker,
+ const rgw_raw_obj& obj,
+ bufferlist *bl, off_t ofs, off_t end,
+ map<string, bufferlist> *attrs,
+ bool raw_attrs,
+ rgw_cache_entry_info *cache_info,
+ boost::optional<obj_version>) override;
+
+ int get_attr(const rgw_raw_obj& obj, const char *name, bufferlist *dest) override;
+
+ int set_attrs(const rgw_raw_obj& obj,
+ map<string, bufferlist>& attrs,
+ map<string, bufferlist> *rmattrs,
+ RGWObjVersionTracker *objv_tracker);
+
+ int remove(RGWSysObjectCtxBase& obj_ctx,
+ RGWObjVersionTracker *objv_tracker,
+ const rgw_raw_obj& obj) override;
+
+ int write(const rgw_raw_obj& obj,
+ real_time *pmtime,
+ map<std::string, bufferlist>& attrs,
+ bool exclusive,
+ const bufferlist& data,
+ RGWObjVersionTracker *objv_tracker,
+ real_time set_mtime) override;
+
+ int write_data(const rgw_raw_obj& obj,
+ const bufferlist& bl,
+ bool exclusive,
+ RGWObjVersionTracker *objv_tracker);
+
+ int distribute_cache(const string& normal_name, const rgw_raw_obj& obj, ObjectCacheInfo& obj_info, int op);
+
+ int watch_cb(uint64_t notify_id,
+ uint64_t cookie,
+ uint64_t notifier_id,
+ bufferlist& bl);
+
+ void set_enabled(bool status);
+
+public:
+ RGWSI_SysObj_Cache(CephContext *cct) : RGWSI_SysObj_Core(cct) {
+ cache.set_ctx(cct);
+ }
+
+ bool chain_cache_entry(std::initializer_list<rgw_cache_entry_info *> cache_info_entries,
+ RGWChainedCache::Entry *chained_entry);
+ void register_chained_cache(RGWChainedCache *cc);
+ void unregister_chained_cache(RGWChainedCache *cc);
+
+ void call_list(const std::optional<std::string>& filter, Formatter* f);
+ int call_inspect(const std::string& target, Formatter* f);
+ int call_erase(const std::string& target);
+ int call_zap();
+};
+
+template <class T>
+class RGWChainedCacheImpl : public RGWChainedCache {
+ RGWSI_SysObj_Cache *svc{nullptr};
+ ceph::timespan expiry;
+ RWLock lock;
+
+ std::unordered_map<std::string, std::pair<T, ceph::coarse_mono_time>> entries;
+
+public:
+ RGWChainedCacheImpl() : lock("RGWChainedCacheImpl::lock") {}
+ ~RGWChainedCacheImpl() {
+ if (!svc) {
+ return;
+ }
+ svc->unregister_chained_cache(this);
+ }
+
+ void unregistered() override {
+ svc = nullptr;
+ }
+
+ void init(RGWSI_SysObj_Cache *_svc) {
+ if (!_svc) {
+ return;
+ }
+ svc = _svc;
+ svc->register_chained_cache(this);
+ expiry = std::chrono::seconds(svc->ctx()->_conf.get_val<uint64_t>(
+ "rgw_cache_expiry_interval"));
+ }
+
+ boost::optional<T> find(const string& key) {
+ RWLock::RLocker rl(lock);
+ auto iter = entries.find(key);
+ if (iter == entries.end()) {
+ return boost::none;
+ }
+ if (expiry.count() &&
+ (ceph::coarse_mono_clock::now() - iter->second.second) > expiry) {
+ return boost::none;
+ }
+
+ return iter->second.first;
+ }
+
+ bool put(RGWSI_SysObj_Cache *svc, const string& key, T *entry,
+ std::initializer_list<rgw_cache_entry_info *> cache_info_entries) {
+ if (!svc) {
+ return false;
+ }
+
+ Entry chain_entry(this, key, entry);
+
+ /* we need the svc cache to call us under its lock to maintain lock ordering */
+ return svc->chain_cache_entry(cache_info_entries, &chain_entry);
+ }
+
+ void chain_cb(const string& key, void *data) override {
+ T *entry = static_cast<T *>(data);
+ RWLock::WLocker wl(lock);
+ entries[key].first = *entry;
+ if (expiry.count() > 0) {
+ entries[key].second = ceph::coarse_mono_clock::now();
+ }
+ }
+
+ void invalidate(const string& key) override {
+ RWLock::WLocker wl(lock);
+ entries.erase(key);
+ }
+
+ void invalidate_all() override {
+ RWLock::WLocker wl(lock);
+ entries.clear();
+ }
+}; /* RGWChainedCacheImpl */
+
+#endif
diff --git a/src/rgw/services/svc_sys_obj_core.cc b/src/rgw/services/svc_sys_obj_core.cc
new file mode 100644
index 00000000..ead6aebd
--- /dev/null
+++ b/src/rgw/services/svc_sys_obj_core.cc
@@ -0,0 +1,595 @@
+#include "svc_sys_obj_core.h"
+#include "svc_rados.h"
+#include "svc_zone.h"
+
+#include "rgw/rgw_tools.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+int RGWSI_SysObj_Core::GetObjState::get_rados_obj(RGWSI_RADOS *rados_svc,
+ RGWSI_Zone *zone_svc,
+ const rgw_raw_obj& obj,
+ RGWSI_RADOS::Obj **pobj)
+{
+ if (!has_rados_obj) {
+ if (obj.oid.empty()) {
+ ldout(rados_svc->ctx(), 0) << "ERROR: obj.oid is empty" << dendl;
+ return -EINVAL;
+ }
+
+ rados_obj = rados_svc->obj(obj);
+ int r = rados_obj.open();
+ if (r < 0) {
+ return r;
+ }
+ has_rados_obj = true;
+ }
+ *pobj = &rados_obj;
+ return 0;
+}
+
+int RGWSI_SysObj_Core::get_rados_obj(RGWSI_Zone *zone_svc,
+ const rgw_raw_obj& obj,
+ RGWSI_RADOS::Obj *pobj)
+{
+ if (obj.oid.empty()) {
+ ldout(rados_svc->ctx(), 0) << "ERROR: obj.oid is empty" << dendl;
+ return -EINVAL;
+ }
+
+ *pobj = std::move(rados_svc->obj(obj));
+ int r = pobj->open();
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+int RGWSI_SysObj_Core::get_system_obj_state_impl(RGWSysObjectCtxBase *rctx, const rgw_raw_obj& obj, RGWSysObjState **state, RGWObjVersionTracker *objv_tracker)
+{
+ if (obj.empty()) {
+ return -EINVAL;
+ }
+
+ RGWSysObjState *s = rctx->get_state(obj);
+ ldout(cct, 20) << "get_system_obj_state: rctx=" << (void *)rctx << " obj=" << obj << " state=" << (void *)s << " s->prefetch_data=" << s->prefetch_data << dendl;
+ *state = s;
+ if (s->has_attrs) {
+ return 0;
+ }
+
+ s->obj = obj;
+
+ int r = raw_stat(obj, &s->size, &s->mtime, &s->epoch, &s->attrset, (s->prefetch_data ? &s->data : nullptr), objv_tracker);
+ if (r == -ENOENT) {
+ s->exists = false;
+ s->has_attrs = true;
+ s->mtime = real_time();
+ return 0;
+ }
+ if (r < 0)
+ return r;
+
+ s->exists = true;
+ s->has_attrs = true;
+ s->obj_tag = s->attrset[RGW_ATTR_ID_TAG];
+
+ if (s->obj_tag.length())
+ ldout(cct, 20) << "get_system_obj_state: setting s->obj_tag to "
+ << s->obj_tag.c_str() << dendl;
+ else
+ ldout(cct, 20) << "get_system_obj_state: s->obj_tag was set empty" << dendl;
+
+ return 0;
+}
+
+int RGWSI_SysObj_Core::get_system_obj_state(RGWSysObjectCtxBase *rctx, const rgw_raw_obj& obj, RGWSysObjState **state, RGWObjVersionTracker *objv_tracker)
+{
+ int ret;
+
+ do {
+ ret = get_system_obj_state_impl(rctx, obj, state, objv_tracker);
+ } while (ret == -EAGAIN);
+
+ return ret;
+}
+
+int RGWSI_SysObj_Core::raw_stat(const rgw_raw_obj& obj, uint64_t *psize, real_time *pmtime, uint64_t *epoch,
+ map<string, bufferlist> *attrs, bufferlist *first_chunk,
+ RGWObjVersionTracker *objv_tracker)
+{
+ RGWSI_RADOS::Obj rados_obj;
+ int r = get_rados_obj(zone_svc, obj, &rados_obj);
+ if (r < 0) {
+ return r;
+ }
+
+ uint64_t size = 0;
+ struct timespec mtime_ts;
+
+ librados::ObjectReadOperation op;
+ if (objv_tracker) {
+ objv_tracker->prepare_op_for_read(&op);
+ }
+ op.getxattrs(attrs, nullptr);
+ if (psize || pmtime) {
+ op.stat2(&size, &mtime_ts, nullptr);
+ }
+ if (first_chunk) {
+ op.read(0, cct->_conf->rgw_max_chunk_size, first_chunk, nullptr);
+ }
+ bufferlist outbl;
+ r = rados_obj.operate(&op, &outbl, null_yield);
+
+ if (epoch) {
+ *epoch = rados_obj.get_last_version();
+ }
+
+ if (r < 0)
+ return r;
+
+ if (psize)
+ *psize = size;
+ if (pmtime)
+ *pmtime = ceph::real_clock::from_timespec(mtime_ts);
+
+ return 0;
+}
+
+int RGWSI_SysObj_Core::stat(RGWSysObjectCtxBase& obj_ctx,
+ GetObjState& state,
+ const rgw_raw_obj& obj,
+ map<string, bufferlist> *attrs,
+ bool raw_attrs,
+ real_time *lastmod,
+ uint64_t *obj_size,
+ RGWObjVersionTracker *objv_tracker)
+{
+ RGWSysObjState *astate = nullptr;
+
+ int r = get_system_obj_state(&obj_ctx, obj, &astate, objv_tracker);
+ if (r < 0)
+ return r;
+
+ if (!astate->exists) {
+ return -ENOENT;
+ }
+
+ if (attrs) {
+ if (raw_attrs) {
+ *attrs = astate->attrset;
+ } else {
+ rgw_filter_attrset(astate->attrset, RGW_ATTR_PREFIX, attrs);
+ }
+ if (cct->_conf->subsys.should_gather<ceph_subsys_rgw, 20>()) {
+ map<string, bufferlist>::iterator iter;
+ for (iter = attrs->begin(); iter != attrs->end(); ++iter) {
+ ldout(cct, 20) << "Read xattr: " << iter->first << dendl;
+ }
+ }
+ }
+
+ if (obj_size)
+ *obj_size = astate->size;
+ if (lastmod)
+ *lastmod = astate->mtime;
+
+ return 0;
+}
+
+int RGWSI_SysObj_Core::read(RGWSysObjectCtxBase& obj_ctx,
+ GetObjState& read_state,
+ RGWObjVersionTracker *objv_tracker,
+ const rgw_raw_obj& obj,
+ bufferlist *bl, off_t ofs, off_t end,
+ map<string, bufferlist> *attrs,
+ bool raw_attrs,
+ rgw_cache_entry_info *cache_info,
+ boost::optional<obj_version>)
+{
+ uint64_t len;
+ librados::ObjectReadOperation op;
+
+ if (end < 0)
+ len = 0;
+ else
+ len = end - ofs + 1;
+
+ if (objv_tracker) {
+ objv_tracker->prepare_op_for_read(&op);
+ }
+
+ ldout(cct, 20) << "rados->read ofs=" << ofs << " len=" << len << dendl;
+ op.read(ofs, len, bl, nullptr);
+
+ map<string, bufferlist> unfiltered_attrset;
+
+ if (attrs) {
+ if (raw_attrs) {
+ op.getxattrs(attrs, nullptr);
+ } else {
+ op.getxattrs(&unfiltered_attrset, nullptr);
+ }
+ }
+
+ RGWSI_RADOS::Obj rados_obj;
+ int r = get_rados_obj(zone_svc, obj, &rados_obj);
+ if (r < 0) {
+ ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
+ return r;
+ }
+ r = rados_obj.operate(&op, nullptr, null_yield);
+ if (r < 0) {
+ ldout(cct, 20) << "rados_obj.operate() r=" << r << " bl.length=" << bl->length() << dendl;
+ return r;
+ }
+ ldout(cct, 20) << "rados_obj.operate() r=" << r << " bl.length=" << bl->length() << dendl;
+
+ uint64_t op_ver = rados_obj.get_last_version();
+
+ if (read_state.last_ver > 0 &&
+ read_state.last_ver != op_ver) {
+ ldout(cct, 5) << "raced with an object write, abort" << dendl;
+ return -ECANCELED;
+ }
+
+ if (attrs && !raw_attrs) {
+ rgw_filter_attrset(unfiltered_attrset, RGW_ATTR_PREFIX, attrs);
+ }
+
+ read_state.last_ver = op_ver;
+
+ return bl->length();
+}
+
+/**
+ * Get an attribute for a system object.
+ * obj: the object to get attr
+ * name: name of the attr to retrieve
+ * dest: bufferlist to store the result in
+ * Returns: 0 on success, -ERR# otherwise.
+ */
+int RGWSI_SysObj_Core::get_attr(const rgw_raw_obj& obj,
+ const char *name,
+ bufferlist *dest)
+{
+ RGWSI_RADOS::Obj rados_obj;
+ int r = get_rados_obj(zone_svc, obj, &rados_obj);
+ if (r < 0) {
+ ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
+ return r;
+ }
+
+ librados::ObjectReadOperation op;
+
+ int rval;
+ op.getxattr(name, dest, &rval);
+
+ r = rados_obj.operate(&op, nullptr, null_yield);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int RGWSI_SysObj_Core::set_attrs(const rgw_raw_obj& obj,
+ map<string, bufferlist>& attrs,
+ map<string, bufferlist> *rmattrs,
+ RGWObjVersionTracker *objv_tracker)
+{
+ RGWSI_RADOS::Obj rados_obj;
+ int r = get_rados_obj(zone_svc, obj, &rados_obj);
+ if (r < 0) {
+ ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
+ return r;
+ }
+
+ librados::ObjectWriteOperation op;
+
+ if (objv_tracker) {
+ objv_tracker->prepare_op_for_write(&op);
+ }
+
+ map<string, bufferlist>::iterator iter;
+ if (rmattrs) {
+ for (iter = rmattrs->begin(); iter != rmattrs->end(); ++iter) {
+ const string& name = iter->first;
+ op.rmxattr(name.c_str());
+ }
+ }
+
+ for (iter = attrs.begin(); iter != attrs.end(); ++iter) {
+ const string& name = iter->first;
+ bufferlist& bl = iter->second;
+
+ if (!bl.length())
+ continue;
+
+ op.setxattr(name.c_str(), bl);
+ }
+
+ if (!op.size())
+ return 0;
+
+ bufferlist bl;
+
+ r = rados_obj.operate(&op, null_yield);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int RGWSI_SysObj_Core::omap_get_vals(const rgw_raw_obj& obj,
+ const string& marker,
+ uint64_t count,
+ std::map<string, bufferlist> *m,
+ bool *pmore)
+{
+ RGWSI_RADOS::Obj rados_obj;
+ int r = get_rados_obj(zone_svc, obj, &rados_obj);
+ if (r < 0) {
+ ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
+ return r;
+ }
+
+ string start_after = marker;
+ bool more;
+
+ do {
+ librados::ObjectReadOperation op;
+
+ std::map<string, bufferlist> t;
+ int rval;
+ op.omap_get_vals2(start_after, count, &t, &more, &rval);
+
+ r = rados_obj.operate(&op, nullptr, null_yield);
+ if (r < 0) {
+ return r;
+ }
+ if (t.empty()) {
+ break;
+ }
+ count -= t.size();
+ start_after = t.rbegin()->first;
+ m->insert(t.begin(), t.end());
+ } while (more && count > 0);
+
+ if (pmore) {
+ *pmore = more;
+ }
+ return 0;
+}
+
+int RGWSI_SysObj_Core::omap_get_all(const rgw_raw_obj& obj, std::map<string, bufferlist> *m)
+{
+ RGWSI_RADOS::Obj rados_obj;
+ int r = get_rados_obj(zone_svc, obj, &rados_obj);
+ if (r < 0) {
+ ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
+ return r;
+ }
+
+#define MAX_OMAP_GET_ENTRIES 1024
+ const int count = MAX_OMAP_GET_ENTRIES;
+ string start_after;
+ bool more;
+
+ do {
+ librados::ObjectReadOperation op;
+
+ std::map<string, bufferlist> t;
+ int rval;
+ op.omap_get_vals2(start_after, count, &t, &more, &rval);
+
+ r = rados_obj.operate(&op, nullptr, null_yield);
+ if (r < 0) {
+ return r;
+ }
+ if (t.empty()) {
+ break;
+ }
+ start_after = t.rbegin()->first;
+ m->insert(t.begin(), t.end());
+ } while (more);
+ return 0;
+}
+
+int RGWSI_SysObj_Core::omap_set(const rgw_raw_obj& obj, const std::string& key, bufferlist& bl, bool must_exist)
+{
+ RGWSI_RADOS::Obj rados_obj;
+ int r = get_rados_obj(zone_svc, obj, &rados_obj);
+ if (r < 0) {
+ ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
+ return r;
+ }
+
+ ldout(cct, 15) << "omap_set obj=" << obj << " key=" << key << dendl;
+
+ map<string, bufferlist> m;
+ m[key] = bl;
+ librados::ObjectWriteOperation op;
+ if (must_exist)
+ op.assert_exists();
+ op.omap_set(m);
+ r = rados_obj.operate(&op, null_yield);
+ return r;
+}
+
+int RGWSI_SysObj_Core::omap_set(const rgw_raw_obj& obj, const std::map<std::string, bufferlist>& m, bool must_exist)
+{
+ RGWSI_RADOS::Obj rados_obj;
+ int r = get_rados_obj(zone_svc, obj, &rados_obj);
+ if (r < 0) {
+ ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
+ return r;
+ }
+
+ librados::ObjectWriteOperation op;
+ if (must_exist)
+ op.assert_exists();
+ op.omap_set(m);
+ r = rados_obj.operate(&op, null_yield);
+ return r;
+}
+
+int RGWSI_SysObj_Core::omap_del(const rgw_raw_obj& obj, const std::string& key)
+{
+ RGWSI_RADOS::Obj rados_obj;
+ int r = get_rados_obj(zone_svc, obj, &rados_obj);
+ if (r < 0) {
+ ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
+ return r;
+ }
+
+ set<string> k;
+ k.insert(key);
+
+ librados::ObjectWriteOperation op;
+
+ op.omap_rm_keys(k);
+
+ r = rados_obj.operate(&op, null_yield);
+ return r;
+}
+
+int RGWSI_SysObj_Core::notify(const rgw_raw_obj& obj,
+ bufferlist& bl,
+ uint64_t timeout_ms,
+ bufferlist *pbl)
+{
+ RGWSI_RADOS::Obj rados_obj;
+ int r = get_rados_obj(zone_svc, obj, &rados_obj);
+ if (r < 0) {
+ ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
+ return r;
+ }
+
+ r = rados_obj.notify(bl, timeout_ms, pbl);
+ return r;
+}
+
+int RGWSI_SysObj_Core::remove(RGWSysObjectCtxBase& obj_ctx,
+ RGWObjVersionTracker *objv_tracker,
+ const rgw_raw_obj& obj)
+{
+ RGWSI_RADOS::Obj rados_obj;
+ int r = get_rados_obj(zone_svc, obj, &rados_obj);
+ if (r < 0) {
+ ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
+ return r;
+ }
+
+ librados::ObjectWriteOperation op;
+
+ if (objv_tracker) {
+ objv_tracker->prepare_op_for_write(&op);
+ }
+
+ op.remove();
+ r = rados_obj.operate(&op, null_yield);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int RGWSI_SysObj_Core::write(const rgw_raw_obj& obj,
+ real_time *pmtime,
+ map<std::string, bufferlist>& attrs,
+ bool exclusive,
+ const bufferlist& data,
+ RGWObjVersionTracker *objv_tracker,
+ real_time set_mtime)
+{
+ RGWSI_RADOS::Obj rados_obj;
+ int r = get_rados_obj(zone_svc, obj, &rados_obj);
+ if (r < 0) {
+ ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
+ return r;
+ }
+
+ librados::ObjectWriteOperation op;
+
+ if (exclusive) {
+ op.create(true); // exclusive create
+ } else {
+ op.remove();
+ op.set_op_flags2(LIBRADOS_OP_FLAG_FAILOK);
+ op.create(false);
+ }
+
+ if (objv_tracker) {
+ objv_tracker->prepare_op_for_write(&op);
+ }
+
+ if (real_clock::is_zero(set_mtime)) {
+ set_mtime = real_clock::now();
+ }
+
+ struct timespec mtime_ts = real_clock::to_timespec(set_mtime);
+ op.mtime2(&mtime_ts);
+ op.write_full(data);
+
+ bufferlist acl_bl;
+
+ for (map<string, bufferlist>::iterator iter = attrs.begin(); iter != attrs.end(); ++iter) {
+ const string& name = iter->first;
+ bufferlist& bl = iter->second;
+
+ if (!bl.length())
+ continue;
+
+ op.setxattr(name.c_str(), bl);
+ }
+
+ r = rados_obj.operate(&op, null_yield);
+ if (r < 0) {
+ return r;
+ }
+
+ if (objv_tracker) {
+ objv_tracker->apply_write();
+ }
+
+ if (pmtime) {
+ *pmtime = set_mtime;
+ }
+
+ return 0;
+}
+
+
+int RGWSI_SysObj_Core::write_data(const rgw_raw_obj& obj,
+ const bufferlist& bl,
+ bool exclusive,
+ RGWObjVersionTracker *objv_tracker)
+{
+ RGWSI_RADOS::Obj rados_obj;
+ int r = get_rados_obj(zone_svc, obj, &rados_obj);
+ if (r < 0) {
+ ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
+ return r;
+ }
+
+ librados::ObjectWriteOperation op;
+
+ if (exclusive) {
+ op.create(true);
+ }
+
+ if (objv_tracker) {
+ objv_tracker->prepare_op_for_write(&op);
+ }
+ op.write_full(bl);
+ r = rados_obj.operate(&op, null_yield);
+ if (r < 0)
+ return r;
+
+ if (objv_tracker) {
+ objv_tracker->apply_write();
+ }
+ return 0;
+}
+
diff --git a/src/rgw/services/svc_sys_obj_core.h b/src/rgw/services/svc_sys_obj_core.h
new file mode 100644
index 00000000..d033267e
--- /dev/null
+++ b/src/rgw/services/svc_sys_obj_core.h
@@ -0,0 +1,201 @@
+#ifndef CEPH_RGW_SERVICES_SYS_OBJ_CORE_H
+#define CEPH_RGW_SERVICES_SYS_OBJ_CORE_H
+
+
+#include "rgw/rgw_service.h"
+
+#include "svc_rados.h"
+
+
+class RGWSI_Zone;
+
+struct rgw_cache_entry_info;
+
+struct RGWSysObjState {
+ rgw_raw_obj obj;
+ bool has_attrs{false};
+ bool exists{false};
+ uint64_t size{0};
+ ceph::real_time mtime;
+ uint64_t epoch{0};
+ bufferlist obj_tag;
+ bool has_data{false};
+ bufferlist data;
+ bool prefetch_data{false};
+ uint64_t pg_ver{0};
+
+ /* important! don't forget to update copy constructor */
+
+ RGWObjVersionTracker objv_tracker;
+
+ map<string, bufferlist> attrset;
+ RGWSysObjState() {}
+ RGWSysObjState(const RGWSysObjState& rhs) : obj (rhs.obj) {
+ has_attrs = rhs.has_attrs;
+ exists = rhs.exists;
+ size = rhs.size;
+ mtime = rhs.mtime;
+ epoch = rhs.epoch;
+ if (rhs.obj_tag.length()) {
+ obj_tag = rhs.obj_tag;
+ }
+ has_data = rhs.has_data;
+ if (rhs.data.length()) {
+ data = rhs.data;
+ }
+ prefetch_data = rhs.prefetch_data;
+ pg_ver = rhs.pg_ver;
+ objv_tracker = rhs.objv_tracker;
+ }
+};
+
+class RGWSysObjectCtxBase {
+ std::map<rgw_raw_obj, RGWSysObjState> objs_state;
+ RWLock lock;
+
+public:
+ explicit RGWSysObjectCtxBase() : lock("RGWSysObjectCtxBase") {}
+
+ RGWSysObjectCtxBase(const RGWSysObjectCtxBase& rhs) : objs_state(rhs.objs_state),
+ lock("RGWSysObjectCtxBase") {}
+ RGWSysObjectCtxBase(const RGWSysObjectCtxBase&& rhs) : objs_state(std::move(rhs.objs_state)),
+ lock("RGWSysObjectCtxBase") {}
+
+ RGWSysObjState *get_state(const rgw_raw_obj& obj) {
+ RGWSysObjState *result;
+ std::map<rgw_raw_obj, RGWSysObjState>::iterator iter;
+ lock.get_read();
+ assert (!obj.empty());
+ iter = objs_state.find(obj);
+ if (iter != objs_state.end()) {
+ result = &iter->second;
+ lock.unlock();
+ } else {
+ lock.unlock();
+ lock.get_write();
+ result = &objs_state[obj];
+ lock.unlock();
+ }
+ return result;
+ }
+
+ void set_prefetch_data(rgw_raw_obj& obj) {
+ RWLock::WLocker wl(lock);
+ assert (!obj.empty());
+ objs_state[obj].prefetch_data = true;
+ }
+ void invalidate(rgw_raw_obj& obj) {
+ RWLock::WLocker wl(lock);
+ auto iter = objs_state.find(obj);
+ if (iter == objs_state.end()) {
+ return;
+ }
+ objs_state.erase(iter);
+ }
+};
+
+class RGWSI_SysObj_Core : public RGWServiceInstance
+{
+ friend class RGWServices_Def;
+ friend class RGWSI_SysObj;
+
+protected:
+ RGWSI_RADOS *rados_svc{nullptr};
+ RGWSI_Zone *zone_svc{nullptr};
+
+ struct GetObjState {
+ RGWSI_RADOS::Obj rados_obj;
+ bool has_rados_obj{false};
+ uint64_t last_ver{0};
+
+ GetObjState() {}
+
+ int get_rados_obj(RGWSI_RADOS *rados_svc,
+ RGWSI_Zone *zone_svc,
+ const rgw_raw_obj& obj,
+ RGWSI_RADOS::Obj **pobj);
+ };
+
+
+ void core_init(RGWSI_RADOS *_rados_svc,
+ RGWSI_Zone *_zone_svc) {
+ rados_svc = _rados_svc;
+ zone_svc = _zone_svc;
+ }
+ int get_rados_obj(RGWSI_Zone *zone_svc, const rgw_raw_obj& obj, RGWSI_RADOS::Obj *pobj);
+
+ virtual int raw_stat(const rgw_raw_obj& obj, uint64_t *psize, real_time *pmtime, uint64_t *epoch,
+ map<string, bufferlist> *attrs, bufferlist *first_chunk,
+ RGWObjVersionTracker *objv_tracker);
+
+ virtual int read(RGWSysObjectCtxBase& obj_ctx,
+ GetObjState& read_state,
+ RGWObjVersionTracker *objv_tracker,
+ const rgw_raw_obj& obj,
+ bufferlist *bl, off_t ofs, off_t end,
+ map<string, bufferlist> *attrs,
+ bool raw_attrs,
+ rgw_cache_entry_info *cache_info,
+ boost::optional<obj_version>);
+
+ virtual int remove(RGWSysObjectCtxBase& obj_ctx,
+ RGWObjVersionTracker *objv_tracker,
+ const rgw_raw_obj& obj);
+
+ virtual int write(const rgw_raw_obj& obj,
+ real_time *pmtime,
+ map<std::string, bufferlist>& attrs,
+ bool exclusive,
+ const bufferlist& data,
+ RGWObjVersionTracker *objv_tracker,
+ real_time set_mtime);
+
+ virtual int write_data(const rgw_raw_obj& obj,
+ const bufferlist& bl,
+ bool exclusive,
+ RGWObjVersionTracker *objv_tracker);
+
+ virtual int get_attr(const rgw_raw_obj& obj, const char *name, bufferlist *dest);
+
+ virtual int set_attrs(const rgw_raw_obj& obj,
+ map<string, bufferlist>& attrs,
+ map<string, bufferlist> *rmattrs,
+ RGWObjVersionTracker *objv_tracker);
+
+ virtual int omap_get_all(const rgw_raw_obj& obj, std::map<string, bufferlist> *m);
+ virtual int omap_get_vals(const rgw_raw_obj& obj,
+ const string& marker,
+ uint64_t count,
+ std::map<string, bufferlist> *m,
+ bool *pmore);
+ virtual int omap_set(const rgw_raw_obj& obj, const std::string& key, bufferlist& bl, bool must_exist = false);
+ virtual int omap_set(const rgw_raw_obj& obj, const map<std::string, bufferlist>& m, bool must_exist = false);
+ virtual int omap_del(const rgw_raw_obj& obj, const std::string& key);
+
+ virtual int notify(const rgw_raw_obj& obj,
+ bufferlist& bl,
+ uint64_t timeout_ms,
+ bufferlist *pbl);
+
+ /* wrappers */
+ int get_system_obj_state_impl(RGWSysObjectCtxBase *rctx, const rgw_raw_obj& obj, RGWSysObjState **state, RGWObjVersionTracker *objv_tracker);
+ int get_system_obj_state(RGWSysObjectCtxBase *rctx, const rgw_raw_obj& obj, RGWSysObjState **state, RGWObjVersionTracker *objv_tracker);
+
+ int stat(RGWSysObjectCtxBase& obj_ctx,
+ GetObjState& state,
+ const rgw_raw_obj& obj,
+ map<string, bufferlist> *attrs,
+ bool raw_attrs,
+ real_time *lastmod,
+ uint64_t *obj_size,
+ RGWObjVersionTracker *objv_tracker);
+
+public:
+ RGWSI_SysObj_Core(CephContext *cct): RGWServiceInstance(cct) {}
+
+ RGWSI_Zone *get_zone_svc() {
+ return zone_svc;
+ }
+};
+
+#endif
diff --git a/src/rgw/services/svc_zone.cc b/src/rgw/services/svc_zone.cc
new file mode 100644
index 00000000..724f83ae
--- /dev/null
+++ b/src/rgw/services/svc_zone.cc
@@ -0,0 +1,1250 @@
+#include "svc_zone.h"
+#include "svc_rados.h"
+#include "svc_sys_obj.h"
+#include "svc_sync_modules.h"
+
+#include "rgw/rgw_zone.h"
+#include "rgw/rgw_rest_conn.h"
+
+#include "common/errno.h"
+#include "include/random.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+using namespace rgw_zone_defaults;
+
+RGWSI_Zone::RGWSI_Zone(CephContext *cct) : RGWServiceInstance(cct)
+{
+}
+
+void RGWSI_Zone::init(RGWSI_SysObj *_sysobj_svc,
+ RGWSI_RADOS * _rados_svc,
+ RGWSI_SyncModules * _sync_modules_svc)
+{
+ sysobj_svc = _sysobj_svc;
+ rados_svc = _rados_svc;
+ sync_modules_svc = _sync_modules_svc;
+
+ realm = new RGWRealm();
+ zonegroup = new RGWZoneGroup();
+ zone_public_config = new RGWZone();
+ zone_params = new RGWZoneParams();
+ current_period = new RGWPeriod();
+}
+
+RGWSI_Zone::~RGWSI_Zone()
+{
+ delete realm;
+ delete zonegroup;
+ delete zone_public_config;
+ delete zone_params;
+ delete current_period;
+}
+
+bool RGWSI_Zone::zone_syncs_from(const RGWZone& target_zone, const RGWZone& source_zone) const
+{
+ return target_zone.syncs_from(source_zone.name) &&
+ sync_modules_svc->get_manager()->supports_data_export(source_zone.tier_type);
+}
+
+int RGWSI_Zone::do_start()
+{
+ int ret = sysobj_svc->start();
+ if (ret < 0) {
+ return ret;
+ }
+
+ assert(sysobj_svc->is_started()); /* if not then there's ordering issue */
+
+ ret = rados_svc->start();
+ if (ret < 0) {
+ return ret;
+ }
+ ret = sync_modules_svc->start();
+ if (ret < 0) {
+ return ret;
+ }
+ ret = realm->init(cct, sysobj_svc);
+ if (ret < 0 && ret != -ENOENT) {
+ ldout(cct, 0) << "failed reading realm info: ret "<< ret << " " << cpp_strerror(-ret) << dendl;
+ return ret;
+ } else if (ret != -ENOENT) {
+ ldout(cct, 20) << "realm " << realm->get_name() << " " << realm->get_id() << dendl;
+ ret = current_period->init(cct, sysobj_svc, realm->get_id(), realm->get_name());
+ if (ret < 0 && ret != -ENOENT) {
+ ldout(cct, 0) << "failed reading current period info: " << " " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ ldout(cct, 20) << "current period " << current_period->get_id() << dendl;
+ }
+
+ ret = replace_region_with_zonegroup();
+ if (ret < 0) {
+ lderr(cct) << "failed converting region to zonegroup : ret "<< ret << " " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ ret = convert_regionmap();
+ if (ret < 0) {
+ lderr(cct) << "failed converting regionmap: " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ bool zg_initialized = false;
+
+ if (!current_period->get_id().empty()) {
+ ret = init_zg_from_period(&zg_initialized);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ bool creating_defaults = false;
+ bool using_local = (!zg_initialized);
+ if (using_local) {
+ ldout(cct, 10) << " cannot find current period zonegroup using local zonegroup" << dendl;
+ ret = init_zg_from_local(&creating_defaults);
+ if (ret < 0) {
+ return ret;
+ }
+ // read period_config into current_period
+ auto& period_config = current_period->get_config();
+ ret = period_config.read(sysobj_svc, zonegroup->realm_id);
+ if (ret < 0 && ret != -ENOENT) {
+ ldout(cct, 0) << "ERROR: failed to read period config: "
+ << cpp_strerror(ret) << dendl;
+ return ret;
+ }
+ }
+
+ ldout(cct, 10) << "Cannot find current period zone using local zone" << dendl;
+ if (creating_defaults && cct->_conf->rgw_zone.empty()) {
+ ldout(cct, 10) << " Using default name "<< default_zone_name << dendl;
+ zone_params->set_name(default_zone_name);
+ }
+
+ ret = zone_params->init(cct, sysobj_svc);
+ if (ret < 0 && ret != -ENOENT) {
+ lderr(cct) << "failed reading zone info: ret "<< ret << " " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ auto zone_iter = zonegroup->zones.find(zone_params->get_id());
+ if (zone_iter == zonegroup->zones.end()) {
+ if (using_local) {
+ lderr(cct) << "Cannot find zone id=" << zone_params->get_id() << " (name=" << zone_params->get_name() << ")" << dendl;
+ return -EINVAL;
+ }
+ ldout(cct, 1) << "Cannot find zone id=" << zone_params->get_id() << " (name=" << zone_params->get_name() << "), switching to local zonegroup configuration" << dendl;
+ ret = init_zg_from_local(&creating_defaults);
+ if (ret < 0) {
+ return ret;
+ }
+ zone_iter = zonegroup->zones.find(zone_params->get_id());
+ }
+ if (zone_iter != zonegroup->zones.end()) {
+ *zone_public_config = zone_iter->second;
+ ldout(cct, 20) << "zone " << zone_params->get_name() << dendl;
+ } else {
+ lderr(cct) << "Cannot find zone id=" << zone_params->get_id() << " (name=" << zone_params->get_name() << ")" << dendl;
+ return -EINVAL;
+ }
+
+ zone_short_id = current_period->get_map().get_zone_short_id(zone_params->get_id());
+
+ RGWSyncModuleRef sm;
+ if (!sync_modules_svc->get_manager()->get_module(zone_public_config->tier_type, &sm)) {
+ lderr(cct) << "ERROR: tier type not found: " << zone_public_config->tier_type << dendl;
+ return -EINVAL;
+ }
+
+ writeable_zone = sm->supports_writes();
+
+ /* first build all zones index */
+ for (auto ziter : zonegroup->zones) {
+ const string& id = ziter.first;
+ RGWZone& z = ziter.second;
+ zone_id_by_name[z.name] = id;
+ zone_by_id[id] = z;
+ }
+
+ if (zone_by_id.find(zone_id()) == zone_by_id.end()) {
+ ldout(cct, 0) << "WARNING: could not find zone config in zonegroup for local zone (" << zone_id() << "), will use defaults" << dendl;
+ }
+ *zone_public_config = zone_by_id[zone_id()];
+ for (const auto& ziter : zonegroup->zones) {
+ const string& id = ziter.first;
+ const RGWZone& z = ziter.second;
+ if (id == zone_id()) {
+ continue;
+ }
+ if (z.endpoints.empty()) {
+ ldout(cct, 0) << "WARNING: can't generate connection for zone " << z.id << " id " << z.name << ": no endpoints defined" << dendl;
+ continue;
+ }
+ ldout(cct, 20) << "generating connection object for zone " << z.name << " id " << z.id << dendl;
+ RGWRESTConn *conn = new RGWRESTConn(cct, this, z.id, z.endpoints);
+ zone_conn_map[id] = conn;
+ if (zone_syncs_from(*zone_public_config, z) ||
+ zone_syncs_from(z, *zone_public_config)) {
+ if (zone_syncs_from(*zone_public_config, z)) {
+ data_sync_source_zones.push_back(&z);
+ }
+ if (zone_syncs_from(z, *zone_public_config)) {
+ zone_data_notify_to_map[id] = conn;
+ }
+ } else {
+ ldout(cct, 20) << "NOTICE: not syncing to/from zone " << z.name << " id " << z.id << dendl;
+ }
+ }
+
+ return 0;
+}
+
+void RGWSI_Zone::shutdown()
+{
+ delete rest_master_conn;
+
+ map<string, RGWRESTConn *>::iterator iter;
+ for (iter = zone_conn_map.begin(); iter != zone_conn_map.end(); ++iter) {
+ RGWRESTConn *conn = iter->second;
+ delete conn;
+ }
+
+ for (iter = zonegroup_conn_map.begin(); iter != zonegroup_conn_map.end(); ++iter) {
+ RGWRESTConn *conn = iter->second;
+ delete conn;
+ }
+}
+
+int RGWSI_Zone::list_regions(list<string>& regions)
+{
+ RGWZoneGroup zonegroup;
+ RGWSI_SysObj::Pool syspool = sysobj_svc->get_pool(zonegroup.get_pool(cct));
+
+ return syspool.op().list_prefixed_objs(region_info_oid_prefix, &regions);
+}
+
+int RGWSI_Zone::list_zonegroups(list<string>& zonegroups)
+{
+ RGWZoneGroup zonegroup;
+ RGWSI_SysObj::Pool syspool = sysobj_svc->get_pool(zonegroup.get_pool(cct));
+
+ return syspool.op().list_prefixed_objs(zonegroup_names_oid_prefix, &zonegroups);
+}
+
+int RGWSI_Zone::list_zones(list<string>& zones)
+{
+ RGWZoneParams zoneparams;
+ RGWSI_SysObj::Pool syspool = sysobj_svc->get_pool(zoneparams.get_pool(cct));
+
+ return syspool.op().list_prefixed_objs(zone_names_oid_prefix, &zones);
+}
+
+int RGWSI_Zone::list_realms(list<string>& realms)
+{
+ RGWRealm realm(cct, sysobj_svc);
+ RGWSI_SysObj::Pool syspool = sysobj_svc->get_pool(realm.get_pool(cct));
+
+ return syspool.op().list_prefixed_objs(realm_names_oid_prefix, &realms);
+}
+
+int RGWSI_Zone::list_periods(list<string>& periods)
+{
+ RGWPeriod period;
+ list<string> raw_periods;
+ RGWSI_SysObj::Pool syspool = sysobj_svc->get_pool(period.get_pool(cct));
+ int ret = syspool.op().list_prefixed_objs(period.get_info_oid_prefix(), &raw_periods);
+ if (ret < 0) {
+ return ret;
+ }
+ for (const auto& oid : raw_periods) {
+ size_t pos = oid.find(".");
+ if (pos != std::string::npos) {
+ periods.push_back(oid.substr(0, pos));
+ } else {
+ periods.push_back(oid);
+ }
+ }
+ periods.sort(); // unique() only detects duplicates if they're adjacent
+ periods.unique();
+ return 0;
+}
+
+
+int RGWSI_Zone::list_periods(const string& current_period, list<string>& periods)
+{
+ int ret = 0;
+ string period_id = current_period;
+ while(!period_id.empty()) {
+ RGWPeriod period(period_id);
+ ret = period.init(cct, sysobj_svc);
+ if (ret < 0) {
+ return ret;
+ }
+ periods.push_back(period.get_id());
+ period_id = period.get_predecessor();
+ }
+
+ return ret;
+}
+
+/**
+ * Replace all region configuration with zonegroup for
+ * backward compatability
+ * Returns 0 on success, -ERR# on failure.
+ */
+int RGWSI_Zone::replace_region_with_zonegroup()
+{
+ /* copy default region */
+ /* convert default region to default zonegroup */
+ string default_oid = cct->_conf->rgw_default_region_info_oid;
+ if (default_oid.empty()) {
+ default_oid = default_region_info_oid;
+ }
+
+ RGWZoneGroup default_zonegroup;
+ rgw_pool pool{default_zonegroup.get_pool(cct)};
+ string oid = "converted";
+ bufferlist bl;
+
+ RGWSysObjectCtx obj_ctx = sysobj_svc->init_obj_ctx();
+ RGWSysObj sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj(pool, oid));
+
+ int ret = sysobj.rop().read(&bl);
+ if (ret < 0 && ret != -ENOENT) {
+ ldout(cct, 0) << __func__ << " failed to read converted: ret "<< ret << " " << cpp_strerror(-ret)
+ << dendl;
+ return ret;
+ } else if (ret != -ENOENT) {
+ ldout(cct, 20) << "System already converted " << dendl;
+ return 0;
+ }
+
+ string default_region;
+ ret = default_zonegroup.init(cct, sysobj_svc, false, true);
+ if (ret < 0) {
+ ldout(cct, 0) << __func__ << " failed init default region: ret "<< ret << " " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ ret = default_zonegroup.read_default_id(default_region, true);
+ if (ret < 0 && ret != -ENOENT) {
+ ldout(cct, 0) << __func__ << " failed reading old default region: ret "<< ret << " " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ /* convert regions to zonegroups */
+ list<string> regions;
+ ret = list_regions(regions);
+ if (ret < 0 && ret != -ENOENT) {
+ ldout(cct, 0) << __func__ << " failed to list regions: ret "<< ret << " " << cpp_strerror(-ret) << dendl;
+ return ret;
+ } else if (ret == -ENOENT || regions.empty()) {
+ RGWZoneParams zoneparams(default_zone_name);
+ int ret = zoneparams.init(cct, sysobj_svc);
+ if (ret < 0 && ret != -ENOENT) {
+ ldout(cct, 0) << __func__ << ": error initializing default zone params: " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ /* update master zone */
+ RGWZoneGroup default_zg(default_zonegroup_name);
+ ret = default_zg.init(cct, sysobj_svc);
+ if (ret < 0 && ret != -ENOENT) {
+ ldout(cct, 0) << __func__ << ": error in initializing default zonegroup: " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ if (ret != -ENOENT && default_zg.master_zone.empty()) {
+ default_zg.master_zone = zoneparams.get_id();
+ return default_zg.update();
+ }
+ return 0;
+ }
+
+ string master_region, master_zone;
+ for (list<string>::iterator iter = regions.begin(); iter != regions.end(); ++iter) {
+ if (*iter != default_zonegroup_name){
+ RGWZoneGroup region(*iter);
+ int ret = region.init(cct, sysobj_svc, true, true);
+ if (ret < 0) {
+ ldout(cct, 0) << __func__ << " failed init region "<< *iter << ": " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ if (region.is_master_zonegroup()) {
+ master_region = region.get_id();
+ master_zone = region.master_zone;
+ }
+ }
+ }
+
+ /* create realm if there is none.
+ The realm name will be the region and zone concatenated
+ realm id will be mds of its name */
+ if (realm->get_id().empty() && !master_region.empty() && !master_zone.empty()) {
+ string new_realm_name = master_region + "." + master_zone;
+ unsigned char md5[CEPH_CRYPTO_MD5_DIGESTSIZE];
+ char md5_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 1];
+ MD5 hash;
+ hash.Update((const unsigned char *)new_realm_name.c_str(), new_realm_name.length());
+ hash.Final(md5);
+ buf_to_hex(md5, CEPH_CRYPTO_MD5_DIGESTSIZE, md5_str);
+ string new_realm_id(md5_str);
+ RGWRealm new_realm(new_realm_id,new_realm_name);
+ ret = new_realm.init(cct, sysobj_svc, false);
+ if (ret < 0) {
+ ldout(cct, 0) << __func__ << " Error initing new realm: " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ ret = new_realm.create();
+ if (ret < 0 && ret != -EEXIST) {
+ ldout(cct, 0) << __func__ << " Error creating new realm: " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ ret = new_realm.set_as_default();
+ if (ret < 0) {
+ ldout(cct, 0) << __func__ << " Error setting realm as default: " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ ret = realm->init(cct, sysobj_svc);
+ if (ret < 0) {
+ ldout(cct, 0) << __func__ << " Error initing realm: " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ ret = current_period->init(cct, sysobj_svc, realm->get_id(), realm->get_name());
+ if (ret < 0) {
+ ldout(cct, 0) << __func__ << " Error initing current period: " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ }
+
+ list<string>::iterator iter;
+ /* create zonegroups */
+ for (iter = regions.begin(); iter != regions.end(); ++iter)
+ {
+ ldout(cct, 0) << __func__ << " Converting " << *iter << dendl;
+ /* check to see if we don't have already a zonegroup with this name */
+ RGWZoneGroup new_zonegroup(*iter);
+ ret = new_zonegroup.init(cct , sysobj_svc);
+ if (ret == 0 && new_zonegroup.get_id() != *iter) {
+ ldout(cct, 0) << __func__ << " zonegroup "<< *iter << " already exists id " << new_zonegroup.get_id () <<
+ " skipping conversion " << dendl;
+ continue;
+ }
+ RGWZoneGroup zonegroup(*iter);
+ zonegroup.set_id(*iter);
+ int ret = zonegroup.init(cct, sysobj_svc, true, true);
+ if (ret < 0) {
+ ldout(cct, 0) << __func__ << " failed init zonegroup: ret "<< ret << " " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ zonegroup.realm_id = realm->get_id();
+ /* fix default region master zone */
+ if (*iter == default_zonegroup_name && zonegroup.master_zone.empty()) {
+ ldout(cct, 0) << __func__ << " Setting default zone as master for default region" << dendl;
+ zonegroup.master_zone = default_zone_name;
+ }
+ ret = zonegroup.update();
+ if (ret < 0 && ret != -EEXIST) {
+ ldout(cct, 0) << __func__ << " failed to update zonegroup " << *iter << ": ret "<< ret << " " << cpp_strerror(-ret)
+ << dendl;
+ return ret;
+ }
+ ret = zonegroup.update_name();
+ if (ret < 0 && ret != -EEXIST) {
+ ldout(cct, 0) << __func__ << " failed to update_name for zonegroup " << *iter << ": ret "<< ret << " " << cpp_strerror(-ret)
+ << dendl;
+ return ret;
+ }
+ if (zonegroup.get_name() == default_region) {
+ ret = zonegroup.set_as_default();
+ if (ret < 0) {
+ ldout(cct, 0) << __func__ << " failed to set_as_default " << *iter << ": ret "<< ret << " " << cpp_strerror(-ret)
+ << dendl;
+ return ret;
+ }
+ }
+ for (map<string, RGWZone>::const_iterator iter = zonegroup.zones.begin(); iter != zonegroup.zones.end();
+ ++iter) {
+ ldout(cct, 0) << __func__ << " Converting zone" << iter->first << dendl;
+ RGWZoneParams zoneparams(iter->first, iter->first);
+ zoneparams.set_id(iter->first);
+ zoneparams.realm_id = realm->get_id();
+ ret = zoneparams.init(cct, sysobj_svc);
+ if (ret < 0 && ret != -ENOENT) {
+ ldout(cct, 0) << __func__ << " failed to init zoneparams " << iter->first << ": " << cpp_strerror(-ret) << dendl;
+ return ret;
+ } else if (ret == -ENOENT) {
+ ldout(cct, 0) << __func__ << " zone is part of another cluster " << iter->first << " skipping " << dendl;
+ continue;
+ }
+ zonegroup.realm_id = realm->get_id();
+ ret = zoneparams.update();
+ if (ret < 0 && ret != -EEXIST) {
+ ldout(cct, 0) << __func__ << " failed to update zoneparams " << iter->first << ": " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ ret = zoneparams.update_name();
+ if (ret < 0 && ret != -EEXIST) {
+ ldout(cct, 0) << __func__ << " failed to init zoneparams " << iter->first << ": " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ }
+
+ if (!current_period->get_id().empty()) {
+ ret = current_period->add_zonegroup(zonegroup);
+ if (ret < 0) {
+ ldout(cct, 0) << __func__ << " failed to add zonegroup to current_period: " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ }
+ }
+
+ if (!current_period->get_id().empty()) {
+ ret = current_period->update();
+ if (ret < 0) {
+ ldout(cct, 0) << __func__ << " failed to update new period: " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ ret = current_period->store_info(false);
+ if (ret < 0) {
+ ldout(cct, 0) << __func__ << " failed to store new period: " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ ret = current_period->reflect();
+ if (ret < 0) {
+ ldout(cct, 0) << __func__ << " failed to update local objects: " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ }
+
+ for (auto const& iter : regions) {
+ RGWZoneGroup zonegroup(iter);
+ int ret = zonegroup.init(cct, sysobj_svc, true, true);
+ if (ret < 0) {
+ ldout(cct, 0) << __func__ << " failed init zonegroup" << iter << ": ret "<< ret << " " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ ret = zonegroup.delete_obj(true);
+ if (ret < 0 && ret != -ENOENT) {
+ ldout(cct, 0) << __func__ << " failed to delete region " << iter << ": ret "<< ret << " " << cpp_strerror(-ret)
+ << dendl;
+ return ret;
+ }
+ }
+
+ /* mark as converted */
+ ret = sysobj.wop()
+ .set_exclusive(true)
+ .write(bl);
+ if (ret < 0 ) {
+ ldout(cct, 0) << __func__ << " failed to mark cluster as converted: ret "<< ret << " " << cpp_strerror(-ret)
+ << dendl;
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * Add new connection to connections map
+ * @param zonegroup_conn_map map which new connection will be added to
+ * @param zonegroup zonegroup which new connection will connect to
+ * @param new_connection pointer to new connection instance
+ */
+static void add_new_connection_to_map(map<string, RGWRESTConn *> &zonegroup_conn_map,
+ const RGWZoneGroup &zonegroup, RGWRESTConn *new_connection)
+{
+ // Delete if connection is already exists
+ map<string, RGWRESTConn *>::iterator iterZoneGroup = zonegroup_conn_map.find(zonegroup.get_id());
+ if (iterZoneGroup != zonegroup_conn_map.end()) {
+ delete iterZoneGroup->second;
+ }
+
+ // Add new connection to connections map
+ zonegroup_conn_map[zonegroup.get_id()] = new_connection;
+}
+
+int RGWSI_Zone::init_zg_from_period(bool *initialized)
+{
+ *initialized = false;
+
+ if (current_period->get_id().empty()) {
+ return 0;
+ }
+
+ int ret = zonegroup->init(cct, sysobj_svc);
+ ldout(cct, 20) << "period zonegroup init ret " << ret << dendl;
+ if (ret == -ENOENT) {
+ return 0;
+ }
+ if (ret < 0) {
+ ldout(cct, 0) << "failed reading zonegroup info: " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ ldout(cct, 20) << "period zonegroup name " << zonegroup->get_name() << dendl;
+
+ map<string, RGWZoneGroup>::const_iterator iter =
+ current_period->get_map().zonegroups.find(zonegroup->get_id());
+
+ if (iter != current_period->get_map().zonegroups.end()) {
+ ldout(cct, 20) << "using current period zonegroup " << zonegroup->get_name() << dendl;
+ *zonegroup = iter->second;
+ ret = zonegroup->init(cct, sysobj_svc, false);
+ if (ret < 0) {
+ ldout(cct, 0) << "failed init zonegroup: " << " " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ ret = zone_params->init(cct, sysobj_svc);
+ if (ret < 0 && ret != -ENOENT) {
+ ldout(cct, 0) << "failed reading zone params info: " << " " << cpp_strerror(-ret) << dendl;
+ return ret;
+ } if (ret ==-ENOENT && zonegroup->get_name() == default_zonegroup_name) {
+ ldout(cct, 10) << " Using default name "<< default_zone_name << dendl;
+ zone_params->set_name(default_zone_name);
+ ret = zone_params->init(cct, sysobj_svc);
+ if (ret < 0 && ret != -ENOENT) {
+ ldout(cct, 0) << "failed reading zone params info: " << " " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ }
+ }
+ for (iter = current_period->get_map().zonegroups.begin();
+ iter != current_period->get_map().zonegroups.end(); ++iter){
+ const RGWZoneGroup& zg = iter->second;
+ // use endpoints from the zonegroup's master zone
+ auto master = zg.zones.find(zg.master_zone);
+ if (master == zg.zones.end()) {
+ // Check for empty zonegroup which can happen if zone was deleted before removal
+ if (zg.zones.size() == 0)
+ continue;
+ // fix missing master zone for a single zone zonegroup
+ if (zg.master_zone.empty() && zg.zones.size() == 1) {
+ master = zg.zones.begin();
+ ldout(cct, 0) << "zonegroup " << zg.get_name() << " missing master_zone, setting zone " <<
+ master->second.name << " id:" << master->second.id << " as master" << dendl;
+ if (zonegroup->get_id() == zg.get_id()) {
+ zonegroup->master_zone = master->second.id;
+ ret = zonegroup->update();
+ if (ret < 0) {
+ ldout(cct, 0) << "error updating zonegroup : " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ } else {
+ RGWZoneGroup fixed_zg(zg.get_id(),zg.get_name());
+ ret = fixed_zg.init(cct, sysobj_svc);
+ if (ret < 0) {
+ ldout(cct, 0) << "error initializing zonegroup : " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ fixed_zg.master_zone = master->second.id;
+ ret = fixed_zg.update();
+ if (ret < 0) {
+ ldout(cct, 0) << "error initializing zonegroup : " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ }
+ } else {
+ ldout(cct, 0) << "zonegroup " << zg.get_name() << " missing zone for master_zone=" <<
+ zg.master_zone << dendl;
+ return -EINVAL;
+ }
+ }
+ const auto& endpoints = master->second.endpoints;
+ add_new_connection_to_map(zonegroup_conn_map, zg, new RGWRESTConn(cct, this, zg.get_id(), endpoints));
+ if (!current_period->get_master_zonegroup().empty() &&
+ zg.get_id() == current_period->get_master_zonegroup()) {
+ rest_master_conn = new RGWRESTConn(cct, this, zg.get_id(), endpoints);
+ }
+ }
+
+ *initialized = true;
+
+ return 0;
+}
+
+int RGWSI_Zone::init_zg_from_local(bool *creating_defaults)
+{
+ int ret = zonegroup->init(cct, sysobj_svc);
+ if ( (ret < 0 && ret != -ENOENT) || (ret == -ENOENT && !cct->_conf->rgw_zonegroup.empty())) {
+ ldout(cct, 0) << "failed reading zonegroup info: ret "<< ret << " " << cpp_strerror(-ret) << dendl;
+ return ret;
+ } else if (ret == -ENOENT) {
+ *creating_defaults = true;
+ ldout(cct, 10) << "Creating default zonegroup " << dendl;
+ ret = zonegroup->create_default();
+ if (ret < 0) {
+ ldout(cct, 0) << "failure in zonegroup create_default: ret "<< ret << " " << cpp_strerror(-ret)
+ << dendl;
+ return ret;
+ }
+ ret = zonegroup->init(cct, sysobj_svc);
+ if (ret < 0) {
+ ldout(cct, 0) << "failure in zonegroup create_default: ret "<< ret << " " << cpp_strerror(-ret)
+ << dendl;
+ return ret;
+ }
+ }
+ ldout(cct, 20) << "zonegroup " << zonegroup->get_name() << dendl;
+ if (zonegroup->is_master_zonegroup()) {
+ // use endpoints from the zonegroup's master zone
+ auto master = zonegroup->zones.find(zonegroup->master_zone);
+ if (master == zonegroup->zones.end()) {
+ // fix missing master zone for a single zone zonegroup
+ if (zonegroup->master_zone.empty() && zonegroup->zones.size() == 1) {
+ master = zonegroup->zones.begin();
+ ldout(cct, 0) << "zonegroup " << zonegroup->get_name() << " missing master_zone, setting zone " <<
+ master->second.name << " id:" << master->second.id << " as master" << dendl;
+ zonegroup->master_zone = master->second.id;
+ ret = zonegroup->update();
+ if (ret < 0) {
+ ldout(cct, 0) << "error initializing zonegroup : " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ } else {
+ ldout(cct, 0) << "zonegroup " << zonegroup->get_name() << " missing zone for "
+ "master_zone=" << zonegroup->master_zone << dendl;
+ return -EINVAL;
+ }
+ }
+ const auto& endpoints = master->second.endpoints;
+ rest_master_conn = new RGWRESTConn(cct, this, zonegroup->get_id(), endpoints);
+ }
+
+ return 0;
+}
+
+int RGWSI_Zone::convert_regionmap()
+{
+ RGWZoneGroupMap zonegroupmap;
+
+ string pool_name = cct->_conf->rgw_zone_root_pool;
+ if (pool_name.empty()) {
+ pool_name = RGW_DEFAULT_ZONE_ROOT_POOL;
+ }
+ string oid = region_map_oid;
+
+ rgw_pool pool(pool_name);
+ bufferlist bl;
+
+ RGWSysObjectCtx obj_ctx = sysobj_svc->init_obj_ctx();
+ RGWSysObj sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj(pool, oid));
+
+ int ret = sysobj.rop().read(&bl);
+ if (ret < 0 && ret != -ENOENT) {
+ return ret;
+ } else if (ret == -ENOENT) {
+ return 0;
+ }
+
+ try {
+ auto iter = bl.cbegin();
+ decode(zonegroupmap, iter);
+ } catch (buffer::error& err) {
+ ldout(cct, 0) << "error decoding regionmap from " << pool << ":" << oid << dendl;
+ return -EIO;
+ }
+
+ for (map<string, RGWZoneGroup>::iterator iter = zonegroupmap.zonegroups.begin();
+ iter != zonegroupmap.zonegroups.end(); ++iter) {
+ RGWZoneGroup& zonegroup = iter->second;
+ ret = zonegroup.init(cct, sysobj_svc, false);
+ ret = zonegroup.update();
+ if (ret < 0 && ret != -ENOENT) {
+ ldout(cct, 0) << "Error could not update zonegroup " << zonegroup.get_name() << ": " <<
+ cpp_strerror(-ret) << dendl;
+ return ret;
+ } else if (ret == -ENOENT) {
+ ret = zonegroup.create();
+ if (ret < 0) {
+ ldout(cct, 0) << "Error could not create " << zonegroup.get_name() << ": " <<
+ cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ }
+ }
+
+ current_period->set_user_quota(zonegroupmap.user_quota);
+ current_period->set_bucket_quota(zonegroupmap.bucket_quota);
+
+ // remove the region_map so we don't try to convert again
+ ret = sysobj.wop().remove();
+ if (ret < 0) {
+ ldout(cct, 0) << "Error could not remove " << sysobj.get_obj()
+ << " after upgrading to zonegroup map: " << cpp_strerror(ret) << dendl;
+ return ret;
+ }
+
+ return 0;
+}
+
+const RGWZoneParams& RGWSI_Zone::get_zone_params() const
+{
+ return *zone_params;
+}
+
+const RGWZone& RGWSI_Zone::get_zone() const
+{
+ return *zone_public_config;
+}
+
+const RGWZoneGroup& RGWSI_Zone::get_zonegroup() const
+{
+ return *zonegroup;
+}
+
+int RGWSI_Zone::get_zonegroup(const string& id, RGWZoneGroup& zg) const
+{
+ int ret = 0;
+ if (id == zonegroup->get_id()) {
+ zg = *zonegroup;
+ } else if (!current_period->get_id().empty()) {
+ ret = current_period->get_zonegroup(zg, id);
+ }
+ return ret;
+}
+
+const RGWRealm& RGWSI_Zone::get_realm() const
+{
+ return *realm;
+}
+
+const RGWPeriod& RGWSI_Zone::get_current_period() const
+{
+ return *current_period;
+}
+
+const string& RGWSI_Zone::get_current_period_id()
+{
+ return current_period->get_id();
+}
+
+bool RGWSI_Zone::has_zonegroup_api(const std::string& api) const
+{
+ if (!current_period->get_id().empty()) {
+ const auto& zonegroups_by_api = current_period->get_map().zonegroups_by_api;
+ if (zonegroups_by_api.find(api) != zonegroups_by_api.end())
+ return true;
+ } else if (zonegroup->api_name == api) {
+ return true;
+ }
+ return false;
+}
+
+bool RGWSI_Zone::zone_is_writeable()
+{
+ return writeable_zone && !get_zone().is_read_only();
+}
+
+uint32_t RGWSI_Zone::get_zone_short_id() const
+{
+ return zone_short_id;
+}
+
+const string& RGWSI_Zone::zone_name()
+{
+ return get_zone_params().get_name();
+}
+const string& RGWSI_Zone::zone_id()
+{
+ return get_zone_params().get_id();
+}
+
+bool RGWSI_Zone::find_zone_by_id(const string& id, RGWZone **zone)
+{
+ auto iter = zone_by_id.find(id);
+ if (iter == zone_by_id.end()) {
+ return false;
+ }
+ *zone = &(iter->second);
+ return true;
+}
+
+RGWRESTConn *RGWSI_Zone::get_zone_conn_by_id(const string& id) {
+ auto citer = zone_conn_map.find(id);
+ if (citer == zone_conn_map.end()) {
+ return NULL;
+ }
+
+ return citer->second;
+}
+
+RGWRESTConn *RGWSI_Zone::get_zone_conn_by_name(const string& name) {
+ auto i = zone_id_by_name.find(name);
+ if (i == zone_id_by_name.end()) {
+ return NULL;
+ }
+
+ return get_zone_conn_by_id(i->second);
+}
+
+bool RGWSI_Zone::find_zone_id_by_name(const string& name, string *id) {
+ auto i = zone_id_by_name.find(name);
+ if (i == zone_id_by_name.end()) {
+ return false;
+ }
+ *id = i->second;
+ return true;
+}
+
+bool RGWSI_Zone::need_to_log_data() const
+{
+ return zone_public_config->log_data;
+}
+
+bool RGWSI_Zone::is_meta_master() const
+{
+ if (!zonegroup->is_master_zonegroup()) {
+ return false;
+ }
+
+ return (zonegroup->master_zone == zone_public_config->id);
+}
+
+bool RGWSI_Zone::need_to_log_metadata() const
+{
+ return is_meta_master() &&
+ (zonegroup->zones.size() > 1 || current_period->is_multi_zonegroups_with_zones());
+}
+
+bool RGWSI_Zone::can_reshard() const
+{
+ return current_period->get_id().empty() ||
+ (zonegroup->zones.size() == 1 && current_period->is_single_zonegroup());
+}
+
+/**
+ * Check to see if the bucket metadata could be synced
+ * bucket: the bucket to check
+ * Returns false is the bucket is not synced
+ */
+bool RGWSI_Zone::is_syncing_bucket_meta(const rgw_bucket& bucket)
+{
+
+ /* no current period */
+ if (current_period->get_id().empty()) {
+ return false;
+ }
+
+ /* zonegroup is not master zonegroup */
+ if (!zonegroup->is_master_zonegroup()) {
+ return false;
+ }
+
+ /* single zonegroup and a single zone */
+ if (current_period->is_single_zonegroup() && zonegroup->zones.size() == 1) {
+ return false;
+ }
+
+ /* zone is not master */
+ if (zonegroup->master_zone.compare(zone_public_config->id) != 0) {
+ return false;
+ }
+
+ return true;
+}
+
+
+int RGWSI_Zone::select_new_bucket_location(const RGWUserInfo& user_info, const string& zonegroup_id,
+ const rgw_placement_rule& request_rule,
+ rgw_placement_rule *pselected_rule_name, RGWZonePlacementInfo *rule_info)
+{
+ /* first check that zonegroup exists within current period. */
+ RGWZoneGroup zonegroup;
+ int ret = get_zonegroup(zonegroup_id, zonegroup);
+ if (ret < 0) {
+ ldout(cct, 0) << "could not find zonegroup " << zonegroup_id << " in current period" << dendl;
+ return ret;
+ }
+
+ const rgw_placement_rule *used_rule;
+
+ /* find placement rule. Hierarchy: request rule > user default rule > zonegroup default rule */
+ std::map<std::string, RGWZoneGroupPlacementTarget>::const_iterator titer;
+
+ if (!request_rule.name.empty()) {
+ used_rule = &request_rule;
+ titer = zonegroup.placement_targets.find(request_rule.name);
+ if (titer == zonegroup.placement_targets.end()) {
+ ldout(cct, 0) << "could not find requested placement id " << request_rule
+ << " within zonegroup " << dendl;
+ return -ERR_INVALID_LOCATION_CONSTRAINT;
+ }
+ } else if (!user_info.default_placement.name.empty()) {
+ used_rule = &user_info.default_placement;
+ titer = zonegroup.placement_targets.find(user_info.default_placement.name);
+ if (titer == zonegroup.placement_targets.end()) {
+ ldout(cct, 0) << "could not find user default placement id " << user_info.default_placement
+ << " within zonegroup " << dendl;
+ return -ERR_INVALID_LOCATION_CONSTRAINT;
+ }
+ } else {
+ if (zonegroup.default_placement.name.empty()) { // zonegroup default rule as fallback, it should not be empty.
+ ldout(cct, 0) << "misconfiguration, zonegroup default placement id should not be empty." << dendl;
+ return -ERR_ZONEGROUP_DEFAULT_PLACEMENT_MISCONFIGURATION;
+ } else {
+ used_rule = &zonegroup.default_placement;
+ titer = zonegroup.placement_targets.find(zonegroup.default_placement.name);
+ if (titer == zonegroup.placement_targets.end()) {
+ ldout(cct, 0) << "could not find zonegroup default placement id " << zonegroup.default_placement
+ << " within zonegroup " << dendl;
+ return -ERR_INVALID_LOCATION_CONSTRAINT;
+ }
+ }
+ }
+
+ /* now check tag for the rule, whether user is permitted to use rule */
+ const auto& target_rule = titer->second;
+ if (!target_rule.user_permitted(user_info.placement_tags)) {
+ ldout(cct, 0) << "user not permitted to use placement rule " << titer->first << dendl;
+ return -EPERM;
+ }
+
+ const string *storage_class = &request_rule.storage_class;
+
+ if (storage_class->empty()) {
+ storage_class = &used_rule->storage_class;
+ }
+
+ rgw_placement_rule rule(titer->first, *storage_class);
+
+ if (pselected_rule_name) {
+ *pselected_rule_name = rule;
+ }
+
+ return select_bucket_location_by_rule(rule, rule_info);
+}
+
+int RGWSI_Zone::select_bucket_location_by_rule(const rgw_placement_rule& location_rule, RGWZonePlacementInfo *rule_info)
+{
+ if (location_rule.name.empty()) {
+ /* we can only reach here if we're trying to set a bucket location from a bucket
+ * created on a different zone, using a legacy / default pool configuration
+ */
+ if (rule_info) {
+ return select_legacy_bucket_placement(rule_info);
+ }
+
+ return 0;
+ }
+
+ /*
+ * make sure that zone has this rule configured. We're
+ * checking it for the local zone, because that's where this bucket object is going to
+ * reside.
+ */
+ auto piter = zone_params->placement_pools.find(location_rule.name);
+ if (piter == zone_params->placement_pools.end()) {
+ /* couldn't find, means we cannot really place data for this bucket in this zone */
+ ldout(cct, 0) << "ERROR: This zone does not contain placement rule "
+ << location_rule << " present in the zonegroup!" << dendl;
+ return -EINVAL;
+ }
+
+ auto storage_class = location_rule.get_storage_class();
+ if (!piter->second.storage_class_exists(storage_class)) {
+ ldout(cct, 5) << "requested storage class does not exist: " << storage_class << dendl;
+ return -EINVAL;
+ }
+
+
+ RGWZonePlacementInfo& placement_info = piter->second;
+
+ if (rule_info) {
+ *rule_info = placement_info;
+ }
+
+ return 0;
+}
+
+int RGWSI_Zone::select_bucket_placement(const RGWUserInfo& user_info, const string& zonegroup_id,
+ const rgw_placement_rule& placement_rule,
+ rgw_placement_rule *pselected_rule, RGWZonePlacementInfo *rule_info)
+{
+ if (!zone_params->placement_pools.empty()) {
+ return select_new_bucket_location(user_info, zonegroup_id, placement_rule,
+ pselected_rule, rule_info);
+ }
+
+ if (pselected_rule) {
+ pselected_rule->clear();
+ }
+
+ if (rule_info) {
+ return select_legacy_bucket_placement(rule_info);
+ }
+
+ return 0;
+}
+
+int RGWSI_Zone::select_legacy_bucket_placement(RGWZonePlacementInfo *rule_info)
+{
+ bufferlist map_bl;
+ map<string, bufferlist> m;
+ string pool_name;
+ bool write_map = false;
+
+ rgw_raw_obj obj(zone_params->domain_root, avail_pools);
+
+ auto obj_ctx = sysobj_svc->init_obj_ctx();
+ auto sysobj = obj_ctx.get_obj(obj);
+
+ int ret = sysobj.rop().read(&map_bl);
+ if (ret < 0) {
+ goto read_omap;
+ }
+
+ try {
+ auto iter = map_bl.cbegin();
+ decode(m, iter);
+ } catch (buffer::error& err) {
+ ldout(cct, 0) << "ERROR: couldn't decode avail_pools" << dendl;
+ }
+
+read_omap:
+ if (m.empty()) {
+ ret = sysobj.omap().get_all(&m);
+
+ write_map = true;
+ }
+
+ if (ret < 0 || m.empty()) {
+ vector<rgw_pool> pools;
+ string s = string("default.") + default_storage_pool_suffix;
+ pools.push_back(rgw_pool(s));
+ vector<int> retcodes;
+ bufferlist bl;
+ ret = rados_svc->pool().create(pools, &retcodes);
+ if (ret < 0)
+ return ret;
+ ret = sysobj.omap().set(s, bl);
+ if (ret < 0)
+ return ret;
+ m[s] = bl;
+ }
+
+ if (write_map) {
+ bufferlist new_bl;
+ encode(m, new_bl);
+ ret = sysobj.wop().write(new_bl);
+ if (ret < 0) {
+ ldout(cct, 0) << "WARNING: could not save avail pools map info ret=" << ret << dendl;
+ }
+ }
+
+ auto miter = m.begin();
+ if (m.size() > 1) {
+ // choose a pool at random
+ auto r = ceph::util::generate_random_number<size_t>(0, m.size() - 1);
+ std::advance(miter, r);
+ }
+ pool_name = miter->first;
+
+ rgw_pool pool = pool_name;
+
+ rule_info->storage_classes.set_storage_class(RGW_STORAGE_CLASS_STANDARD, &pool, nullptr);
+ rule_info->data_extra_pool = pool_name;
+ rule_info->index_pool = pool_name;
+ rule_info->index_type = RGWBIType_Normal;
+
+ return 0;
+}
+
+int RGWSI_Zone::update_placement_map()
+{
+ bufferlist header;
+ map<string, bufferlist> m;
+ rgw_raw_obj obj(zone_params->domain_root, avail_pools);
+
+ auto obj_ctx = sysobj_svc->init_obj_ctx();
+ auto sysobj = obj_ctx.get_obj(obj);
+
+ int ret = sysobj.omap().get_all(&m);
+ if (ret < 0)
+ return ret;
+
+ bufferlist new_bl;
+ encode(m, new_bl);
+ ret = sysobj.wop().write(new_bl);
+ if (ret < 0) {
+ ldout(cct, 0) << "WARNING: could not save avail pools map info ret=" << ret << dendl;
+ }
+
+ return ret;
+}
+
+int RGWSI_Zone::add_bucket_placement(const rgw_pool& new_pool)
+{
+ int ret = rados_svc->pool(new_pool).lookup();
+ if (ret < 0) { // DNE, or something
+ return ret;
+ }
+
+ rgw_raw_obj obj(zone_params->domain_root, avail_pools);
+ auto obj_ctx = sysobj_svc->init_obj_ctx();
+ auto sysobj = obj_ctx.get_obj(obj);
+
+ bufferlist empty_bl;
+ ret = sysobj.omap().set(new_pool.to_str(), empty_bl);
+
+ // don't care about return value
+ update_placement_map();
+
+ return ret;
+}
+
+int RGWSI_Zone::remove_bucket_placement(const rgw_pool& old_pool)
+{
+ rgw_raw_obj obj(zone_params->domain_root, avail_pools);
+ auto obj_ctx = sysobj_svc->init_obj_ctx();
+ auto sysobj = obj_ctx.get_obj(obj);
+
+ int ret = sysobj.omap().del(old_pool.to_str());
+
+ // don't care about return value
+ update_placement_map();
+
+ return ret;
+}
+
+int RGWSI_Zone::list_placement_set(set<rgw_pool>& names)
+{
+ bufferlist header;
+ map<string, bufferlist> m;
+
+ rgw_raw_obj obj(zone_params->domain_root, avail_pools);
+ auto obj_ctx = sysobj_svc->init_obj_ctx();
+ auto sysobj = obj_ctx.get_obj(obj);
+ int ret = sysobj.omap().get_all(&m);
+ if (ret < 0)
+ return ret;
+
+ names.clear();
+ map<string, bufferlist>::iterator miter;
+ for (miter = m.begin(); miter != m.end(); ++miter) {
+ names.insert(rgw_pool(miter->first));
+ }
+
+ return names.size();
+}
+
+bool RGWSI_Zone::get_redirect_zone_endpoint(string *endpoint)
+{
+ if (zone_public_config->redirect_zone.empty()) {
+ return false;
+ }
+
+ auto iter = zone_conn_map.find(zone_public_config->redirect_zone);
+ if (iter == zone_conn_map.end()) {
+ ldout(cct, 0) << "ERROR: cannot find entry for redirect zone: " << zone_public_config->redirect_zone << dendl;
+ return false;
+ }
+
+ RGWRESTConn *conn = iter->second;
+
+ int ret = conn->get_url(*endpoint);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: redirect zone, conn->get_endpoint() returned ret=" << ret << dendl;
+ return false;
+ }
+
+ return true;
+}
+
diff --git a/src/rgw/services/svc_zone.h b/src/rgw/services/svc_zone.h
new file mode 100644
index 00000000..8c8dbeba
--- /dev/null
+++ b/src/rgw/services/svc_zone.h
@@ -0,0 +1,134 @@
+#ifndef CEPH_RGW_SERVICES_ZONE_H
+#define CEPH_RGW_SERVICES_ZONE_H
+
+
+#include "rgw/rgw_service.h"
+
+
+class RGWSI_RADOS;
+class RGWSI_SysObj;
+class RGWSI_SyncModules;
+
+class RGWRealm;
+class RGWZoneGroup;
+class RGWZone;
+class RGWZoneParams;
+class RGWPeriod;
+class RGWZonePlacementInfo;
+
+class RGWRESTConn;
+
+class RGWSI_Zone : public RGWServiceInstance
+{
+ friend struct RGWServices_Def;
+
+ RGWSI_SysObj *sysobj_svc{nullptr};
+ RGWSI_RADOS *rados_svc{nullptr};
+ RGWSI_SyncModules *sync_modules_svc{nullptr};
+
+ RGWRealm *realm{nullptr};
+ RGWZoneGroup *zonegroup{nullptr};
+ RGWZone *zone_public_config{nullptr}; /* external zone params, e.g., entrypoints, log flags, etc. */
+ RGWZoneParams *zone_params{nullptr}; /* internal zone params, e.g., rados pools */
+ RGWPeriod *current_period{nullptr};
+ uint32_t zone_short_id{0};
+ bool writeable_zone{false};
+
+ RGWRESTConn *rest_master_conn{nullptr};
+ map<string, RGWRESTConn *> zone_conn_map;
+ std::vector<const RGWZone*> data_sync_source_zones;
+ map<string, RGWRESTConn *> zone_data_notify_to_map;
+ map<string, RGWRESTConn *> zonegroup_conn_map;
+
+ map<string, string> zone_id_by_name;
+ map<string, RGWZone> zone_by_id;
+
+ void init(RGWSI_SysObj *_sysobj_svc,
+ RGWSI_RADOS *_rados_svc,
+ RGWSI_SyncModules *_sync_modules_svc);
+ int do_start() override;
+ void shutdown() override;
+
+ int replace_region_with_zonegroup();
+ int init_zg_from_period(bool *initialized);
+ int init_zg_from_local(bool *creating_defaults);
+ int convert_regionmap();
+
+ int update_placement_map();
+public:
+ RGWSI_Zone(CephContext *cct);
+ ~RGWSI_Zone();
+
+ const RGWZoneParams& get_zone_params() const;
+ const RGWPeriod& get_current_period() const;
+ const RGWRealm& get_realm() const;
+ const RGWZoneGroup& get_zonegroup() const;
+ int get_zonegroup(const string& id, RGWZoneGroup& zonegroup) const;
+ const RGWZone& get_zone() const;
+
+ const string& zone_name();
+ const string& zone_id();
+ uint32_t get_zone_short_id() const;
+
+ const string& get_current_period_id();
+ bool has_zonegroup_api(const std::string& api) const;
+
+ bool zone_is_writeable();
+ bool zone_syncs_from(const RGWZone& target_zone, const RGWZone& source_zone) const;
+ bool get_redirect_zone_endpoint(string *endpoint);
+
+ RGWRESTConn *get_master_conn() {
+ return rest_master_conn;
+ }
+
+ map<string, RGWRESTConn *>& get_zonegroup_conn_map() {
+ return zonegroup_conn_map;
+ }
+
+ map<string, RGWRESTConn *>& get_zone_conn_map() {
+ return zone_conn_map;
+ }
+
+ std::vector<const RGWZone*>& get_data_sync_source_zones() {
+ return data_sync_source_zones;
+ }
+
+ map<string, RGWRESTConn *>& get_zone_data_notify_to_map() {
+ return zone_data_notify_to_map;
+ }
+
+ bool find_zone_by_id(const string& id, RGWZone **zone);
+
+ RGWRESTConn *get_zone_conn_by_id(const string& id);
+ RGWRESTConn *get_zone_conn_by_name(const string& name);
+ bool find_zone_id_by_name(const string& name, string *id);
+
+ int select_bucket_placement(const RGWUserInfo& user_info, const string& zonegroup_id,
+ const rgw_placement_rule& rule,
+ rgw_placement_rule *pselected_rule, RGWZonePlacementInfo *rule_info);
+ int select_legacy_bucket_placement(RGWZonePlacementInfo *rule_info);
+ int select_new_bucket_location(const RGWUserInfo& user_info, const string& zonegroup_id,
+ const rgw_placement_rule& rule,
+ rgw_placement_rule *pselected_rule_name, RGWZonePlacementInfo *rule_info);
+ int select_bucket_location_by_rule(const rgw_placement_rule& location_rule, RGWZonePlacementInfo *rule_info);
+
+ int add_bucket_placement(const rgw_pool& new_pool);
+ int remove_bucket_placement(const rgw_pool& old_pool);
+ int list_placement_set(set<rgw_pool>& names);
+
+ bool is_meta_master() const;
+
+ bool need_to_log_data() const;
+ bool need_to_log_metadata() const;
+ bool can_reshard() const;
+ bool is_syncing_bucket_meta(const rgw_bucket& bucket);
+
+ int list_zonegroups(list<string>& zonegroups);
+ int list_regions(list<string>& regions);
+ int list_zones(list<string>& zones);
+ int list_realms(list<string>& realms);
+ int list_periods(list<string>& periods);
+ int list_periods(const string& current_period, list<string>& periods);
+};
+
+#endif
diff --git a/src/rgw/services/svc_zone_utils.cc b/src/rgw/services/svc_zone_utils.cc
new file mode 100644
index 00000000..ef9c9c88
--- /dev/null
+++ b/src/rgw/services/svc_zone_utils.cc
@@ -0,0 +1,59 @@
+#include "svc_zone_utils.h"
+#include "svc_rados.h"
+#include "svc_zone.h"
+
+#include "rgw/rgw_zone.h"
+
+int RGWSI_ZoneUtils::do_start()
+{
+ init_unique_trans_id_deps();
+
+ return 0;
+}
+
+string RGWSI_ZoneUtils::gen_host_id() {
+ /* uint64_t needs 16, two '-' separators and a trailing null */
+ const string& zone_name = zone_svc->get_zone().name;
+ const string& zonegroup_name = zone_svc->get_zonegroup().get_name();
+ char charbuf[16 + zone_name.size() + zonegroup_name.size() + 2 + 1];
+ snprintf(charbuf, sizeof(charbuf), "%llx-%s-%s", (unsigned long long)rados_svc->instance_id(), zone_name.c_str(), zonegroup_name.c_str());
+ return string(charbuf);
+}
+
+string RGWSI_ZoneUtils::unique_id(uint64_t unique_num)
+{
+ char buf[32];
+ snprintf(buf, sizeof(buf), ".%llu.%llu", (unsigned long long)rados_svc->instance_id(), (unsigned long long)unique_num);
+ string s = zone_svc->get_zone_params().get_id() + buf;
+ return s;
+}
+
+void RGWSI_ZoneUtils::init_unique_trans_id_deps() {
+ char buf[16 + 2 + 1]; /* uint64_t needs 16, 2 hyphens add further 2 */
+
+ snprintf(buf, sizeof(buf), "-%llx-", (unsigned long long)rados_svc->instance_id());
+ url_encode(string(buf) + zone_svc->get_zone().name, trans_id_suffix);
+}
+
+/* In order to preserve compatibility with Swift API, transaction ID
+ * should contain at least 32 characters satisfying following spec:
+ * - first 21 chars must be in range [0-9a-f]. Swift uses this
+ * space for storing fragment of UUID obtained through a call to
+ * uuid4() function of Python's uuid module;
+ * - char no. 22 must be a hyphen;
+ * - at least 10 next characters constitute hex-formatted timestamp
+ * padded with zeroes if necessary. All bytes must be in [0-9a-f]
+ * range;
+ * - last, optional part of transaction ID is any url-encoded string
+ * without restriction on length. */
+string RGWSI_ZoneUtils::unique_trans_id(const uint64_t unique_num) {
+ char buf[41]; /* 2 + 21 + 1 + 16 (timestamp can consume up to 16) + 1 */
+ time_t timestamp = time(NULL);
+
+ snprintf(buf, sizeof(buf), "tx%021llx-%010llx",
+ (unsigned long long)unique_num,
+ (unsigned long long)timestamp);
+
+ return string(buf) + trans_id_suffix;
+}
+
diff --git a/src/rgw/services/svc_zone_utils.h b/src/rgw/services/svc_zone_utils.h
new file mode 100644
index 00000000..158d2a92
--- /dev/null
+++ b/src/rgw/services/svc_zone_utils.h
@@ -0,0 +1,39 @@
+#ifndef CEPH_RGW_SERVICES_ZONEUTILS_H
+#define CEPH_RGW_SERVICES_ZONEUTILS_H
+
+
+#include "rgw/rgw_service.h"
+
+
+class RGWSI_RADOS;
+class RGWSI_Zone;
+
+class RGWSI_ZoneUtils : public RGWServiceInstance
+{
+ friend struct RGWServices_Def;
+
+ RGWSI_RADOS *rados_svc{nullptr};
+ RGWSI_Zone *zone_svc{nullptr};
+
+ string trans_id_suffix;
+
+ void init(RGWSI_RADOS *_rados_svc,
+ RGWSI_Zone *_zone_svc) {
+ rados_svc = _rados_svc;
+ zone_svc = _zone_svc;
+ }
+
+ int do_start() override;
+
+ void init_unique_trans_id_deps();
+
+public:
+ RGWSI_ZoneUtils(CephContext *cct): RGWServiceInstance(cct) {}
+
+ string gen_host_id();
+ string unique_id(uint64_t unique_num);
+
+ string unique_trans_id(const uint64_t unique_num);
+};
+
+#endif