summaryrefslogtreecommitdiffstats
path: root/src/crimson/osd/scheduler/scheduler.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/crimson/osd/scheduler/scheduler.cc')
-rw-r--r--src/crimson/osd/scheduler/scheduler.cc181
1 files changed, 181 insertions, 0 deletions
diff --git a/src/crimson/osd/scheduler/scheduler.cc b/src/crimson/osd/scheduler/scheduler.cc
new file mode 100644
index 000000000..c85cb388e
--- /dev/null
+++ b/src/crimson/osd/scheduler/scheduler.cc
@@ -0,0 +1,181 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2019 Red Hat Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include <ostream>
+
+#include <seastar/core/print.hh>
+
+#include "crimson/osd/scheduler/scheduler.h"
+#include "crimson/osd/scheduler/mclock_scheduler.h"
+#include "common/WeightedPriorityQueue.h"
+
+namespace crimson::osd::scheduler {
+
+std::ostream &operator<<(std::ostream &lhs, const scheduler_class_t &c)
+{
+ switch (c) {
+ case scheduler_class_t::background_best_effort:
+ return lhs << "background_best_effort";
+ case scheduler_class_t::background_recovery:
+ return lhs << "background_recovery";
+ case scheduler_class_t::client:
+ return lhs << "client";
+ case scheduler_class_t::repop:
+ return lhs << "repop";
+ case scheduler_class_t::immediate:
+ return lhs << "immediate";
+ default:
+ return lhs;
+ }
+}
+
+/**
+ * Implements Scheduler in terms of OpQueue
+ *
+ * Templated on queue type to avoid dynamic dispatch, T should implement
+ * OpQueue<Scheduleritem_t, client_t>. This adapter is mainly responsible for
+ * the boilerplate priority cutoff/strict concept which is needed for
+ * OpQueue based implementations.
+ */
+template <typename T>
+class ClassedOpQueueScheduler final : public Scheduler {
+ const scheduler_class_t cutoff;
+ T queue;
+
+ using priority_t = uint64_t;
+ std::array<
+ priority_t,
+ static_cast<size_t>(scheduler_class_t::immediate)
+ > priority_map = {
+ // Placeholder, gets replaced with configured values
+ 0, 0, 0
+ };
+
+ static scheduler_class_t get_io_prio_cut(ConfigProxy &conf) {
+ if (conf.get_val<std::string>("osd_op_queue_cut_off") == "debug_random") {
+ srand(time(NULL));
+ return (rand() % 2 < 1) ?
+ scheduler_class_t::repop : scheduler_class_t::immediate;
+ } else if (conf.get_val<std::string>("osd_op_queue_cut_off") == "high") {
+ return scheduler_class_t::immediate;
+ } else {
+ return scheduler_class_t::repop;
+ }
+ }
+
+ bool use_strict(scheduler_class_t kl) const {
+ return static_cast<uint8_t>(kl) >= static_cast<uint8_t>(cutoff);
+ }
+
+ priority_t get_priority(scheduler_class_t kl) const {
+ ceph_assert(static_cast<size_t>(kl) <
+ static_cast<size_t>(scheduler_class_t::immediate));
+ return priority_map[static_cast<size_t>(kl)];
+ }
+
+public:
+ template <typename... Args>
+ ClassedOpQueueScheduler(ConfigProxy &conf, Args&&... args) :
+ cutoff(get_io_prio_cut(conf)),
+ queue(std::forward<Args>(args)...)
+ {
+ priority_map[
+ static_cast<size_t>(scheduler_class_t::background_best_effort)
+ ] = conf.get_val<uint64_t>("osd_scrub_priority");
+ priority_map[
+ static_cast<size_t>(scheduler_class_t::background_recovery)
+ ] = conf.get_val<uint64_t>("osd_recovery_op_priority");
+ priority_map[
+ static_cast<size_t>(scheduler_class_t::client)
+ ] = conf.get_val<uint64_t>("osd_client_op_priority");
+ priority_map[
+ static_cast<size_t>(scheduler_class_t::repop)
+ ] = conf.get_val<uint64_t>("osd_client_op_priority");
+ }
+
+ void enqueue(item_t &&item) final {
+ if (use_strict(item.params.klass))
+ queue.enqueue_strict(
+ item.params.owner, get_priority(item.params.klass), std::move(item));
+ else
+ queue.enqueue(
+ item.params.owner, get_priority(item.params.klass),
+ item.params.cost, std::move(item));
+ }
+
+ void enqueue_front(item_t &&item) final {
+ if (use_strict(item.params.klass))
+ queue.enqueue_strict_front(
+ item.params.owner, get_priority(item.params.klass), std::move(item));
+ else
+ queue.enqueue_front(
+ item.params.owner, get_priority(item.params.klass),
+ item.params.cost, std::move(item));
+ }
+
+ bool empty() const final {
+ return queue.empty();
+ }
+
+ item_t dequeue() final {
+ return queue.dequeue();
+ }
+
+ void dump(ceph::Formatter &f) const final {
+ return queue.dump(&f);
+ }
+
+ void print(std::ostream &out) const final {
+ out << "ClassedOpQueueScheduler(queue=";
+ queue.print(out);
+ out << ", cutoff=" << cutoff << ")";
+ }
+
+ ~ClassedOpQueueScheduler() final {};
+};
+
+SchedulerRef make_scheduler(ConfigProxy &conf)
+{
+ const std::string _type = conf.get_val<std::string>("osd_op_queue");
+ const std::string *type = &_type;
+ if (*type == "debug_random") {
+ static const std::string index_lookup[] = { "mclock_scheduler",
+ "wpq" };
+ srand(time(NULL));
+ unsigned which = rand() % (sizeof(index_lookup) / sizeof(index_lookup[0]));
+ type = &index_lookup[which];
+ }
+
+ if (*type == "wpq" ) {
+ // default is 'wpq'
+ return std::make_unique<
+ ClassedOpQueueScheduler<WeightedPriorityQueue<item_t, client_t>>>(
+ conf,
+ conf.get_val<uint64_t>("osd_op_pq_max_tokens_per_priority"),
+ conf->osd_op_pq_min_cost
+ );
+ } else if (*type == "mclock_scheduler") {
+ return std::make_unique<mClockScheduler>(conf);
+ } else {
+ ceph_assert("Invalid choice of wq" == 0);
+ return std::unique_ptr<mClockScheduler>();
+ }
+}
+
+std::ostream &operator<<(std::ostream &lhs, const Scheduler &rhs) {
+ rhs.print(lhs);
+ return lhs;
+}
+
+}