summaryrefslogtreecommitdiffstats
path: root/streaming/replication.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--streaming/replication.c340
1 files changed, 211 insertions, 129 deletions
diff --git a/streaming/replication.c b/streaming/replication.c
index 7c1f16b4c..a50913a1a 100644
--- a/streaming/replication.c
+++ b/streaming/replication.c
@@ -88,6 +88,7 @@ struct replication_query {
bool locked_data_collection;
bool execute;
bool interrupted;
+ STREAM_CAPABILITIES capabilities;
} query;
time_t wall_clock_time;
@@ -95,7 +96,7 @@ struct replication_query {
size_t points_read;
size_t points_generated;
- struct storage_engine_query_ops *ops;
+ STORAGE_ENGINE_BACKEND backend;
struct replication_request *rq;
size_t dimensions;
@@ -112,7 +113,8 @@ static struct replication_query *replication_query_prepare(
time_t query_after,
time_t query_before,
bool query_enable_streaming,
- time_t wall_clock_time
+ time_t wall_clock_time,
+ STREAM_CAPABILITIES capabilities
) {
size_t dimensions = rrdset_number_of_dimensions(st);
struct replication_query *q = callocz(1, sizeof(struct replication_query) + dimensions * sizeof(struct replication_dimension));
@@ -131,6 +133,7 @@ static struct replication_query *replication_query_prepare(
q->query.after = query_after;
q->query.before = query_before;
q->query.enable_streaming = query_enable_streaming;
+ q->query.capabilities = capabilities;
q->wall_clock_time = wall_clock_time;
@@ -159,7 +162,7 @@ static struct replication_query *replication_query_prepare(
}
}
- q->ops = &st->rrdhost->db[0].eng->api.query_ops;
+ q->backend = st->rrdhost->db[0].eng->backend;
// prepare our array of dimensions
size_t count = 0;
@@ -181,7 +184,7 @@ static struct replication_query *replication_query_prepare(
d->rda = dictionary_acquired_item_dup(rd_dfe.dict, rd_dfe.item);
d->rd = rd;
- q->ops->init(rd->tiers[0].db_metric_handle, &d->handle, q->query.after, q->query.before,
+ storage_engine_query_init(q->backend, rd->tiers[0].db_metric_handle, &d->handle, q->query.after, q->query.before,
q->query.locked_data_collection ? STORAGE_PRIORITY_HIGH : STORAGE_PRIORITY_LOW);
d->enabled = true;
d->skip = false;
@@ -209,32 +212,40 @@ static struct replication_query *replication_query_prepare(
return q;
}
-static void replication_send_chart_collection_state(BUFFER *wb, RRDSET *st) {
+static void replication_send_chart_collection_state(BUFFER *wb, RRDSET *st, STREAM_CAPABILITIES capabilities) {
+ NUMBER_ENCODING encoding = (capabilities & STREAM_CAP_IEEE754) ? NUMBER_ENCODING_BASE64 : NUMBER_ENCODING_DECIMAL;
RRDDIM *rd;
- rrddim_foreach_read(rd, st) {
- if(!rd->exposed) continue;
-
- buffer_sprintf(wb, PLUGINSD_KEYWORD_REPLAY_RRDDIM_STATE " \"%s\" %llu %lld " NETDATA_DOUBLE_FORMAT " " NETDATA_DOUBLE_FORMAT "\n",
- rrddim_id(rd),
- (usec_t)rd->last_collected_time.tv_sec * USEC_PER_SEC + (usec_t)rd->last_collected_time.tv_usec,
- rd->last_collected_value,
- rd->last_calculated_value,
- rd->last_stored_value
- );
- }
+ rrddim_foreach_read(rd, st){
+ if (!rd->exposed) continue;
+
+ buffer_fast_strcat(wb, PLUGINSD_KEYWORD_REPLAY_RRDDIM_STATE " '",
+ sizeof(PLUGINSD_KEYWORD_REPLAY_RRDDIM_STATE) - 1 + 2);
+ buffer_fast_strcat(wb, rrddim_id(rd), string_strlen(rd->id));
+ buffer_fast_strcat(wb, "' ", 2);
+ buffer_print_uint64_encoded(wb, encoding, (usec_t) rd->last_collected_time.tv_sec * USEC_PER_SEC +
+ (usec_t) rd->last_collected_time.tv_usec);
+ buffer_fast_strcat(wb, " ", 1);
+ buffer_print_int64_encoded(wb, encoding, rd->last_collected_value);
+ buffer_fast_strcat(wb, " ", 1);
+ buffer_print_netdata_double_encoded(wb, encoding, rd->last_calculated_value);
+ buffer_fast_strcat(wb, " ", 1);
+ buffer_print_netdata_double_encoded(wb, encoding, rd->last_stored_value);
+ buffer_fast_strcat(wb, "\n", 1);
+ }
rrddim_foreach_done(rd);
- buffer_sprintf(wb, PLUGINSD_KEYWORD_REPLAY_RRDSET_STATE " %llu %llu\n",
- (usec_t)st->last_collected_time.tv_sec * USEC_PER_SEC + (usec_t)st->last_collected_time.tv_usec,
- (usec_t)st->last_updated.tv_sec * USEC_PER_SEC + (usec_t)st->last_updated.tv_usec
- );
+ buffer_fast_strcat(wb, PLUGINSD_KEYWORD_REPLAY_RRDSET_STATE " ", sizeof(PLUGINSD_KEYWORD_REPLAY_RRDSET_STATE) - 1 + 1);
+ buffer_print_uint64_encoded(wb, encoding, (usec_t) st->last_collected_time.tv_sec * USEC_PER_SEC + (usec_t) st->last_collected_time.tv_usec);
+ buffer_fast_strcat(wb, " ", 1);
+ buffer_print_uint64_encoded(wb, encoding, (usec_t) st->last_updated.tv_sec * USEC_PER_SEC + (usec_t) st->last_updated.tv_usec);
+ buffer_fast_strcat(wb, "\n", 1);
}
static void replication_query_finalize(BUFFER *wb, struct replication_query *q, bool executed) {
size_t dimensions = q->dimensions;
if(wb && q->query.enable_streaming)
- replication_send_chart_collection_state(wb, q->st);
+ replication_send_chart_collection_state(wb, q->st, q->query.capabilities);
if(q->query.locked_data_collection) {
netdata_spinlock_unlock(&q->st->data_collection_lock);
@@ -249,7 +260,7 @@ static void replication_query_finalize(BUFFER *wb, struct replication_query *q,
struct replication_dimension *d = &q->data[i];
if (unlikely(!d->enabled)) continue;
- q->ops->finalize(&d->handle);
+ storage_engine_query_finalize(&d->handle);
dictionary_acquired_item_release(d->dict, d->rda);
@@ -281,7 +292,7 @@ static void replication_query_align_to_optimal_before(struct replication_query *
struct replication_dimension *d = &q->data[i];
if(unlikely(!d->enabled)) continue;
- time_t new_before = q->ops->align_to_optimal_before(&d->handle);
+ time_t new_before = storage_engine_align_to_optimal_before(&d->handle);
if (!expanded_before || new_before < expanded_before)
expanded_before = new_before;
}
@@ -296,13 +307,14 @@ static void replication_query_align_to_optimal_before(struct replication_query *
static bool replication_query_execute(BUFFER *wb, struct replication_query *q, size_t max_msg_size) {
replication_query_align_to_optimal_before(q);
+ NUMBER_ENCODING encoding = (q->query.capabilities & STREAM_CAP_IEEE754) ? NUMBER_ENCODING_BASE64 : NUMBER_ENCODING_DECIMAL;
time_t after = q->query.after;
time_t before = q->query.before;
size_t dimensions = q->dimensions;
- struct storage_engine_query_ops *ops = q->ops;
time_t wall_clock_time = q->wall_clock_time;
- size_t points_read = q->points_read, points_generated = q->points_generated;
+ bool finished_with_gap = false;
+ size_t points_read = 0, points_generated = 0;
#ifdef NETDATA_LOG_REPLICATION_REQUESTS
time_t actual_after = 0, actual_before = 0;
@@ -318,8 +330,8 @@ static bool replication_query_execute(BUFFER *wb, struct replication_query *q, s
// fetch the first valid point for the dimension
int max_skip = 1000;
- while(d->sp.end_time_s < now && !ops->is_finished(&d->handle) && max_skip-- >= 0) {
- d->sp = ops->next_metric(&d->handle);
+ while(d->sp.end_time_s < now && !storage_engine_query_is_finished(&d->handle) && max_skip-- >= 0) {
+ d->sp = storage_engine_query_next_metric(&d->handle);
points_read++;
}
@@ -328,9 +340,10 @@ static bool replication_query_execute(BUFFER *wb, struct replication_query *q, s
error_limit_static_global_var(erl, 1, 0);
error_limit(&erl,
- "STREAM_SENDER REPLAY ERROR: 'host:%s/chart:%s/dim:%s': db does not advance the query beyond time %llu (tried 1000 times to get the next point and always got back a point in the past)",
- rrdhost_hostname(q->st->rrdhost), rrdset_id(q->st), rrddim_id(d->rd),
- (unsigned long long) now);
+ "STREAM_SENDER REPLAY ERROR: 'host:%s/chart:%s/dim:%s': db does not advance the query "
+ "beyond time %llu (tried 1000 times to get the next point and always got back a point in the past)",
+ rrdhost_hostname(q->st->rrdhost), rrdset_id(q->st), rrddim_id(d->rd),
+ (unsigned long long) now);
continue;
}
@@ -374,9 +387,10 @@ static bool replication_query_execute(BUFFER *wb, struct replication_query *q, s
else
fix_min_start_time = min_end_time - min_update_every;
+#ifdef NETDATA_INTERNAL_CHECKS
error_limit_static_global_var(erl, 1, 0);
error_limit(&erl, "REPLAY WARNING: 'host:%s/chart:%s' "
- "misaligned dimensions "
+ "misaligned dimensions, "
"update every (min: %ld, max: %ld), "
"start time (min: %ld, max: %ld), "
"end time (min %ld, max %ld), "
@@ -389,6 +403,7 @@ static bool replication_query_execute(BUFFER *wb, struct replication_query *q, s
now, last_end_time_in_buffer,
fix_min_start_time
);
+#endif
min_start_time = fix_min_start_time;
}
@@ -410,7 +425,8 @@ static bool replication_query_execute(BUFFER *wb, struct replication_query *q, s
q->query.before = last_end_time_in_buffer;
q->query.enable_streaming = false;
- internal_error(true, "REPLICATION: buffer size %zu is more than the max message size %zu for chart '%s' of host '%s'. "
+ internal_error(true, "REPLICATION: current buffer size %zu is more than the "
+ "max message size %zu for chart '%s' of host '%s'. "
"Interrupting replication request (%ld to %ld, %s) at %ld to %ld, %s.",
buffer_strlen(wb), max_msg_size, rrdset_id(q->st), rrdhost_hostname(q->st->rrdhost),
q->request.after, q->request.before, q->request.enable_streaming?"true":"false",
@@ -422,11 +438,13 @@ static bool replication_query_execute(BUFFER *wb, struct replication_query *q, s
}
last_end_time_in_buffer = min_end_time;
- buffer_sprintf(wb, PLUGINSD_KEYWORD_REPLAY_BEGIN " '' %llu %llu %llu\n",
- (unsigned long long) min_start_time,
- (unsigned long long) min_end_time,
- (unsigned long long) wall_clock_time
- );
+ buffer_fast_strcat(wb, PLUGINSD_KEYWORD_REPLAY_BEGIN " '' ", sizeof(PLUGINSD_KEYWORD_REPLAY_BEGIN) - 1 + 4);
+ buffer_print_uint64_encoded(wb, encoding, min_start_time);
+ buffer_fast_strcat(wb, " ", 1);
+ buffer_print_uint64_encoded(wb, encoding, min_end_time);
+ buffer_fast_strcat(wb, " ", 1);
+ buffer_print_uint64_encoded(wb, encoding, wall_clock_time);
+ buffer_fast_strcat(wb, "\n", 1);
// output the replay values for this time
for (size_t i = 0; i < dimensions; i++) {
@@ -438,8 +456,13 @@ static bool replication_query_execute(BUFFER *wb, struct replication_query *q, s
!storage_point_is_unset(d->sp) &&
!storage_point_is_gap(d->sp))) {
- buffer_sprintf(wb, PLUGINSD_KEYWORD_REPLAY_SET " \"%s\" " NETDATA_DOUBLE_FORMAT " \"%s\"\n",
- rrddim_id(d->rd), d->sp.sum, d->sp.flags & SN_FLAG_RESET ? "R" : "");
+ buffer_fast_strcat(wb, PLUGINSD_KEYWORD_REPLAY_SET " \"", sizeof(PLUGINSD_KEYWORD_REPLAY_SET) - 1 + 2);
+ buffer_fast_strcat(wb, rrddim_id(d->rd), string_strlen(d->rd->id));
+ buffer_fast_strcat(wb, "\" ", 2);
+ buffer_print_netdata_double_encoded(wb, encoding, d->sp.sum);
+ buffer_fast_strcat(wb, " ", 1);
+ buffer_print_sn_flags(wb, d->sp.flags, q->query.capabilities & STREAM_CAP_INTERPOLATED);
+ buffer_fast_strcat(wb, "\n", 1);
points_generated++;
}
@@ -450,9 +473,16 @@ static bool replication_query_execute(BUFFER *wb, struct replication_query *q, s
else if(unlikely(min_end_time < now))
// the query does not progress
break;
- else
+ else {
// we have gap - all points are in the future
now = min_start_time;
+
+ if(min_start_time > before && !points_generated) {
+ before = q->query.before = min_start_time - 1;
+ finished_with_gap = true;
+ break;
+ }
+ }
}
#ifdef NETDATA_LOG_REPLICATION_REQUESTS
@@ -462,28 +492,33 @@ static bool replication_query_execute(BUFFER *wb, struct replication_query *q, s
log_date(actual_before_buf, LOG_DATE_LENGTH, actual_before);
internal_error(true,
"STREAM_SENDER REPLAY: 'host:%s/chart:%s': sending data %llu [%s] to %llu [%s] (requested %llu [delta %lld] to %llu [delta %lld])",
- rrdhost_hostname(st->rrdhost), rrdset_id(st),
+ rrdhost_hostname(q->st->rrdhost), rrdset_id(q->st),
(unsigned long long)actual_after, actual_after_buf, (unsigned long long)actual_before, actual_before_buf,
(unsigned long long)after, (long long)(actual_after - after), (unsigned long long)before, (long long)(actual_before - before));
}
else
internal_error(true,
"STREAM_SENDER REPLAY: 'host:%s/chart:%s': nothing to send (requested %llu to %llu)",
- rrdhost_hostname(st->rrdhost), rrdset_id(st),
+ rrdhost_hostname(q->st->rrdhost), rrdset_id(q->st),
(unsigned long long)after, (unsigned long long)before);
#endif // NETDATA_LOG_REPLICATION_REQUESTS
- q->points_read = points_read;
- q->points_generated = points_generated;
+ q->points_read += points_read;
+ q->points_generated += points_generated;
- bool finished_with_gap = false;
if(last_end_time_in_buffer < before - q->st->update_every)
finished_with_gap = true;
return finished_with_gap;
}
-static struct replication_query *replication_response_prepare(RRDSET *st, bool requested_enable_streaming, time_t requested_after, time_t requested_before) {
+static struct replication_query *replication_response_prepare(
+ RRDSET *st,
+ bool requested_enable_streaming,
+ time_t requested_after,
+ time_t requested_before,
+ STREAM_CAPABILITIES capabilities
+ ) {
time_t wall_clock_time = now_realtime_sec();
if(requested_after > requested_before) {
@@ -509,7 +544,8 @@ static struct replication_query *replication_response_prepare(RRDSET *st, bool r
bool query_enable_streaming = requested_enable_streaming;
time_t db_first_entry = 0, db_last_entry = 0;
- rrdset_get_retention_of_tier_for_collected_chart(st, &db_first_entry, &db_last_entry, wall_clock_time, 0);
+ rrdset_get_retention_of_tier_for_collected_chart(
+ st, &db_first_entry, &db_last_entry, wall_clock_time, 0);
if(requested_after == 0 && requested_before == 0 && requested_enable_streaming == true) {
// no data requested - just enable streaming
@@ -543,7 +579,7 @@ static struct replication_query *replication_response_prepare(RRDSET *st, bool r
db_first_entry, db_last_entry,
requested_after, requested_before, requested_enable_streaming,
query_after, query_before, query_enable_streaming,
- wall_clock_time);
+ wall_clock_time, capabilities);
}
void replication_response_cancel_and_finalize(struct replication_query *q) {
@@ -553,6 +589,7 @@ void replication_response_cancel_and_finalize(struct replication_query *q) {
static bool sender_is_still_connected_for_this_request(struct replication_request *rq);
bool replication_response_execute_and_finalize(struct replication_query *q, size_t max_msg_size) {
+ NUMBER_ENCODING encoding = (q->query.capabilities & STREAM_CAP_IEEE754) ? NUMBER_ENCODING_BASE64 : NUMBER_ENCODING_DECIMAL;
struct replication_request *rq = q->rq;
RRDSET *st = q->st;
RRDHOST *host = st->rrdhost;
@@ -562,7 +599,11 @@ bool replication_response_execute_and_finalize(struct replication_query *q, size
// holding the host's buffer lock for too long
BUFFER *wb = sender_start(host->sender);
- buffer_sprintf(wb, PLUGINSD_KEYWORD_REPLAY_BEGIN " \"%s\"\n", rrdset_id(st));
+ buffer_fast_strcat(wb, PLUGINSD_KEYWORD_REPLAY_BEGIN " '", sizeof(PLUGINSD_KEYWORD_REPLAY_BEGIN) - 1 + 2);
+ buffer_fast_strcat(wb, rrdset_id(st), string_strlen(st->id));
+ buffer_fast_strcat(wb, "'\n", 2);
+
+// buffer_sprintf(wb, PLUGINSD_KEYWORD_REPLAY_BEGIN " \"%s\"\n", rrdset_id(st));
bool locked_data_collection = q->query.locked_data_collection;
q->query.locked_data_collection = false;
@@ -585,23 +626,22 @@ bool replication_response_execute_and_finalize(struct replication_query *q, size
// end with first/last entries we have, and the first start time and
// last end time of the data we sent
- buffer_sprintf(wb, PLUGINSD_KEYWORD_REPLAY_END " %d %llu %llu %s %llu %llu %llu\n",
- // current chart update every
- (int)st->update_every
+ buffer_fast_strcat(wb, PLUGINSD_KEYWORD_REPLAY_END " ", sizeof(PLUGINSD_KEYWORD_REPLAY_END) - 1 + 1);
+ buffer_print_int64_encoded(wb, encoding, st->update_every);
+ buffer_fast_strcat(wb, " ", 1);
+ buffer_print_uint64_encoded(wb, encoding, db_first_entry);
+ buffer_fast_strcat(wb, " ", 1);
+ buffer_print_uint64_encoded(wb, encoding, db_last_entry);
- // child first db time, child end db time
- , (unsigned long long)db_first_entry, (unsigned long long)db_last_entry
+ buffer_fast_strcat(wb, enable_streaming ? " true " : " false ", 7);
- // start streaming boolean
- , enable_streaming ? "true" : "false"
-
- // after requested, before requested ('before' can be altered by the child when the request had enable_streaming true)
- , (unsigned long long)after, (unsigned long long)before
-
- // child world clock time
- , (unsigned long long)wall_clock_time
- );
+ buffer_print_uint64_encoded(wb, encoding, after);
+ buffer_fast_strcat(wb, " ", 1);
+ buffer_print_uint64_encoded(wb, encoding, before);
+ buffer_fast_strcat(wb, " ", 1);
+ buffer_print_uint64_encoded(wb, encoding, wall_clock_time);
+ buffer_fast_strcat(wb, "\n", 1);
worker_is_busy(WORKER_JOB_BUFFER_COMMIT);
sender_commit(host->sender, wb);
@@ -733,9 +773,9 @@ static bool send_replay_chart_cmd(struct replication_request_details *r, const c
, msg
, r->last_request.after, r->last_request.before
, r->child_db.first_entry_t, r->child_db.last_entry_t
- , r->child_db.world_time_t, (r->child_db.world_time_t == r->local_db.now) ? "SAME" : (r->child_db.world_time_t < r->local_db.now) ? "BEHIND" : "AHEAD"
+ , r->child_db.wall_clock_time, (r->child_db.wall_clock_time == r->local_db.wall_clock_time) ? "SAME" : (r->child_db.wall_clock_time < r->local_db.wall_clock_time) ? "BEHIND" : "AHEAD"
, r->local_db.first_entry_t, r->local_db.last_entry_t
- , r->local_db.now
+ , r->local_db.wall_clock_time
, r->gap.from, r->gap.to
, (r->gap.from == r->wanted.after) ? "FULL" : "PARTIAL"
, (st->replay.after != 0 || st->replay.before != 0) ? "OVERLAPPING" : ""
@@ -928,11 +968,12 @@ struct replication_sort_entry {
// the global variables for the replication thread
static struct replication_thread {
+ ARAL *aral_rse;
+
SPINLOCK spinlock;
struct {
size_t pending; // number of requests pending in the queue
- Word_t unique_id; // the last unique id we gave to a request (auto-increment, starting from 1)
// statistics
size_t added; // number of requests added to the queue
@@ -951,6 +992,7 @@ static struct replication_thread {
} unsafe; // protected from replication_recursive_lock()
struct {
+ Word_t unique_id; // the last unique id we gave to a request (auto-increment, starting from 1)
size_t executed; // the number of replication requests executed
size_t latest_first_time; // the 'after' timestamp of the last request we executed
size_t memory; // the total memory allocated by replication
@@ -964,10 +1006,10 @@ static struct replication_thread {
} main_thread; // access is allowed only by the main thread
} replication_globals = {
+ .aral_rse = NULL,
.spinlock = NETDATA_SPINLOCK_INITIALIZER,
.unsafe = {
.pending = 0,
- .unique_id = 0,
.added = 0,
.removed = 0,
@@ -984,6 +1026,7 @@ static struct replication_thread {
},
},
.atomic = {
+ .unique_id = 0,
.executed = 0,
.latest_first_time = 0,
.memory = 0,
@@ -1047,17 +1090,15 @@ void replication_set_next_point_in_time(time_t after, size_t unique_id) {
// ----------------------------------------------------------------------------
// replication sort entry management
-static struct replication_sort_entry *replication_sort_entry_create_unsafe(struct replication_request *rq) {
- fatal_when_replication_is_not_locked_for_me();
-
- struct replication_sort_entry *rse = mallocz(sizeof(struct replication_sort_entry));
+static struct replication_sort_entry *replication_sort_entry_create(struct replication_request *rq) {
+ struct replication_sort_entry *rse = aral_mallocz(replication_globals.aral_rse);
__atomic_add_fetch(&replication_globals.atomic.memory, sizeof(struct replication_sort_entry), __ATOMIC_RELAXED);
rrdpush_sender_pending_replication_requests_plus_one(rq->sender);
// copy the request
rse->rq = rq;
- rse->unique_id = ++replication_globals.unsafe.unique_id;
+ rse->unique_id = __atomic_add_fetch(&replication_globals.atomic.unique_id, 1, __ATOMIC_SEQ_CST);
// save the unique id into the request, to be able to delete it later
rq->unique_id = rse->unique_id;
@@ -1068,26 +1109,30 @@ static struct replication_sort_entry *replication_sort_entry_create_unsafe(struc
}
static void replication_sort_entry_destroy(struct replication_sort_entry *rse) {
- freez(rse);
+ aral_freez(replication_globals.aral_rse, rse);
__atomic_sub_fetch(&replication_globals.atomic.memory, sizeof(struct replication_sort_entry), __ATOMIC_RELAXED);
}
static void replication_sort_entry_add(struct replication_request *rq) {
- replication_recursive_lock();
-
if(rrdpush_sender_replication_buffer_full_get(rq->sender)) {
rq->indexed_in_judy = false;
rq->not_indexed_buffer_full = true;
rq->not_indexed_preprocessing = false;
+ replication_recursive_lock();
replication_globals.unsafe.pending_no_room++;
replication_recursive_unlock();
return;
}
- if(rq->not_indexed_buffer_full)
- replication_globals.unsafe.pending_no_room--;
+ // cache this, because it will be changed
+ bool decrement_no_room = rq->not_indexed_buffer_full;
+
+ struct replication_sort_entry *rse = replication_sort_entry_create(rq);
+
+ replication_recursive_lock();
- struct replication_sort_entry *rse = replication_sort_entry_create_unsafe(rq);
+ if(decrement_no_room)
+ replication_globals.unsafe.pending_no_room--;
// if(rq->after < (time_t)replication_globals.protected.queue.after &&
// rq->sender->buffer_used_percentage <= MAX_SENDER_BUFFER_PERCENTAGE_ALLOWED &&
@@ -1371,7 +1416,12 @@ static bool replication_execute_request(struct replication_request *rq, bool wor
if(likely(workers))
worker_is_busy(WORKER_JOB_PREPARE_QUERY);
- rq->q = replication_response_prepare(rq->st, rq->start_streaming, rq->after, rq->before);
+ rq->q = replication_response_prepare(
+ rq->st,
+ rq->start_streaming,
+ rq->after,
+ rq->before,
+ rq->sender->capabilities);
}
if(likely(workers))
@@ -1580,67 +1630,85 @@ static void replication_initialize_workers(bool master) {
#define REQUEST_QUEUE_EMPTY (-1)
#define REQUEST_CHART_NOT_FOUND (-2)
-static int replication_execute_next_pending_request(bool cancel) {
- static __thread int max_requests_ahead = 0;
- static __thread struct replication_request *rqs = NULL;
- static __thread int rqs_last_executed = 0, rqs_last_prepared = 0;
- static __thread size_t queue_rounds = 0; (void)queue_rounds;
+static __thread struct replication_thread_pipeline {
+ int max_requests_ahead;
+ struct replication_request *rqs;
+ int rqs_last_executed, rqs_last_prepared;
+ size_t queue_rounds;
+} rtp = {
+ .max_requests_ahead = 0,
+ .rqs = NULL,
+ .rqs_last_executed = 0,
+ .rqs_last_prepared = 0,
+ .queue_rounds = 0,
+};
+
+static void replication_pipeline_cancel_and_cleanup(void) {
+ if(!rtp.rqs)
+ return;
+
struct replication_request *rq;
+ size_t cancelled = 0;
+
+ do {
+ if (++rtp.rqs_last_executed >= rtp.max_requests_ahead)
+ rtp.rqs_last_executed = 0;
- if(unlikely(cancel)) {
- if(rqs) {
- size_t cancelled = 0;
- do {
- if (++rqs_last_executed >= max_requests_ahead)
- rqs_last_executed = 0;
+ rq = &rtp.rqs[rtp.rqs_last_executed];
- rq = &rqs[rqs_last_executed];
+ if (rq->q) {
+ internal_fatal(rq->executed, "REPLAY FATAL: query has already been executed!");
+ internal_fatal(!rq->found, "REPLAY FATAL: orphan q in rq");
- if (rq->q) {
- internal_fatal(rq->executed, "REPLAY FATAL: query has already been executed!");
- internal_fatal(!rq->found, "REPLAY FATAL: orphan q in rq");
+ replication_response_cancel_and_finalize(rq->q);
+ rq->q = NULL;
+ cancelled++;
+ }
- replication_response_cancel_and_finalize(rq->q);
- rq->q = NULL;
- cancelled++;
- }
+ rq->executed = true;
+ rq->found = false;
- rq->executed = true;
- rq->found = false;
+ } while (rtp.rqs_last_executed != rtp.rqs_last_prepared);
- } while (rqs_last_executed != rqs_last_prepared);
+ internal_error(true, "REPLICATION: cancelled %zu inflight queries", cancelled);
- internal_error(true, "REPLICATION: cancelled %zu inflight queries", cancelled);
- }
- return REQUEST_QUEUE_EMPTY;
- }
+ freez(rtp.rqs);
+ rtp.rqs = NULL;
+ rtp.max_requests_ahead = 0;
+ rtp.rqs_last_executed = 0;
+ rtp.rqs_last_prepared = 0;
+ rtp.queue_rounds = 0;
+}
+
+static int replication_pipeline_execute_next(void) {
+ struct replication_request *rq;
- if(unlikely(!rqs)) {
- max_requests_ahead = get_netdata_cpus() / 2;
+ if(unlikely(!rtp.rqs)) {
+ rtp.max_requests_ahead = (int)get_netdata_cpus() / 2;
- if(max_requests_ahead > libuv_worker_threads * 2)
- max_requests_ahead = libuv_worker_threads * 2;
+ if(rtp.max_requests_ahead > libuv_worker_threads * 2)
+ rtp.max_requests_ahead = libuv_worker_threads * 2;
- if(max_requests_ahead < 2)
- max_requests_ahead = 2;
+ if(rtp.max_requests_ahead < 2)
+ rtp.max_requests_ahead = 2;
- rqs = callocz(max_requests_ahead, sizeof(struct replication_request));
- __atomic_add_fetch(&replication_buffers_allocated, max_requests_ahead * sizeof(struct replication_request), __ATOMIC_RELAXED);
+ rtp.rqs = callocz(rtp.max_requests_ahead, sizeof(struct replication_request));
+ __atomic_add_fetch(&replication_buffers_allocated, rtp.max_requests_ahead * sizeof(struct replication_request), __ATOMIC_RELAXED);
}
// fill the queue
do {
- if(++rqs_last_prepared >= max_requests_ahead) {
- rqs_last_prepared = 0;
- queue_rounds++;
+ if(++rtp.rqs_last_prepared >= rtp.max_requests_ahead) {
+ rtp.rqs_last_prepared = 0;
+ rtp.queue_rounds++;
}
- internal_fatal(rqs[rqs_last_prepared].q,
+ internal_fatal(rtp.rqs[rtp.rqs_last_prepared].q,
"REPLAY FATAL: slot is used by query that has not been executed!");
worker_is_busy(WORKER_JOB_FIND_NEXT);
- rqs[rqs_last_prepared] = replication_request_get_first_available();
- rq = &rqs[rqs_last_prepared];
+ rtp.rqs[rtp.rqs_last_prepared] = replication_request_get_first_available();
+ rq = &rtp.rqs[rtp.rqs_last_prepared];
if(rq->found) {
if (!rq->st) {
@@ -1650,20 +1718,25 @@ static int replication_execute_next_pending_request(bool cancel) {
if (rq->st && !rq->q) {
worker_is_busy(WORKER_JOB_PREPARE_QUERY);
- rq->q = replication_response_prepare(rq->st, rq->start_streaming, rq->after, rq->before);
+ rq->q = replication_response_prepare(
+ rq->st,
+ rq->start_streaming,
+ rq->after,
+ rq->before,
+ rq->sender->capabilities);
}
rq->executed = false;
}
- } while(rq->found && rqs_last_prepared != rqs_last_executed);
+ } while(rq->found && rtp.rqs_last_prepared != rtp.rqs_last_executed);
// pick the first usable
do {
- if (++rqs_last_executed >= max_requests_ahead)
- rqs_last_executed = 0;
+ if (++rtp.rqs_last_executed >= rtp.max_requests_ahead)
+ rtp.rqs_last_executed = 0;
- rq = &rqs[rqs_last_executed];
+ rq = &rtp.rqs[rtp.rqs_last_executed];
if(rq->found) {
internal_fatal(rq->executed, "REPLAY FATAL: query has already been executed!");
@@ -1696,7 +1769,7 @@ static int replication_execute_next_pending_request(bool cancel) {
else
internal_fatal(rq->q, "REPLAY FATAL: slot status says slot is empty, but it has a pending query!");
- } while(!rq->found && rqs_last_executed != rqs_last_prepared);
+ } while(!rq->found && rtp.rqs_last_executed != rtp.rqs_last_prepared);
if(unlikely(!rq->found)) {
worker_is_idle();
@@ -1720,7 +1793,7 @@ static int replication_execute_next_pending_request(bool cancel) {
}
static void replication_worker_cleanup(void *ptr __maybe_unused) {
- replication_execute_next_pending_request(true);
+ replication_pipeline_cancel_and_cleanup();
worker_unregister();
}
@@ -1730,7 +1803,7 @@ static void *replication_worker_thread(void *ptr) {
netdata_thread_cleanup_push(replication_worker_cleanup, ptr);
while(service_running(SERVICE_REPLICATION)) {
- if(unlikely(replication_execute_next_pending_request(false) == REQUEST_QUEUE_EMPTY)) {
+ if(unlikely(replication_pipeline_execute_next() == REQUEST_QUEUE_EMPTY)) {
sender_thread_buffer_free();
worker_is_busy(WORKER_JOB_WAIT);
worker_is_idle();
@@ -1746,7 +1819,7 @@ static void replication_main_cleanup(void *ptr) {
struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr;
static_thread->enabled = NETDATA_MAIN_THREAD_EXITING;
- replication_execute_next_pending_request(true);
+ replication_pipeline_cancel_and_cleanup();
int threads = (int)replication_globals.main_thread.threads;
for(int i = 0; i < threads ;i++) {
@@ -1758,12 +1831,21 @@ static void replication_main_cleanup(void *ptr) {
replication_globals.main_thread.threads_ptrs = NULL;
__atomic_sub_fetch(&replication_buffers_allocated, threads * sizeof(netdata_thread_t *), __ATOMIC_RELAXED);
+ aral_destroy(replication_globals.aral_rse);
+ replication_globals.aral_rse = NULL;
+
// custom code
worker_unregister();
static_thread->enabled = NETDATA_MAIN_THREAD_EXITED;
}
+void replication_initialize(void) {
+ replication_globals.aral_rse = aral_create("rse", sizeof(struct replication_sort_entry),
+ 0, 65536, aral_by_size_statistics(),
+ NULL, NULL, false, false);
+}
+
void *replication_thread_main(void *ptr __maybe_unused) {
replication_initialize_workers(true);
@@ -1863,7 +1945,7 @@ void *replication_thread_main(void *ptr __maybe_unused) {
worker_is_idle();
}
- if(unlikely(replication_execute_next_pending_request(false) == REQUEST_QUEUE_EMPTY)) {
+ if(unlikely(replication_pipeline_execute_next() == REQUEST_QUEUE_EMPTY)) {
worker_is_busy(WORKER_JOB_WAIT);
replication_recursive_lock();