Adding upstream version 5:7.0.15.upstream/5%7.0.15 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-04 17:31:02 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-04 17:31:02 +0000
commit: bb12c1fd00eb51118749bbbc69c5596835fcbd3b (patch)
tree: 88038a98bd31c1b765f3390767a2ec12e37c79ec /src/blocked.c
parent: Initial commit. (diff)
download: redis-upstream.tar.xz
redis-upstream.zip
1 files changed, 866 insertions, 0 deletions
diff --git a/src/blocked.c b/src/blocked.c
new file mode 100644
index 0000000..65b5842
--- /dev/null
+++ b/src/blocked.c
@@ -0,0 +1,866 @@
+/* blocked.c - generic support for blocking operations like BLPOP & WAIT.
+ *
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *   * Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *   * Neither the name of Redis nor the names of its contributors may be used
+ *     to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---------------------------------------------------------------------------
+ *
+ * API:
+ *
+ * blockClient() set the CLIENT_BLOCKED flag in the client, and set the
+ * specified block type 'btype' filed to one of BLOCKED_* macros.
+ *
+ * unblockClient() unblocks the client doing the following:
+ * 1) It calls the btype-specific function to cleanup the state.
+ * 2) It unblocks the client by unsetting the CLIENT_BLOCKED flag.
+ * 3) It puts the client into a list of just unblocked clients that are
+ *    processed ASAP in the beforeSleep() event loop callback, so that
+ *    if there is some query buffer to process, we do it. This is also
+ *    required because otherwise there is no 'readable' event fired, we
+ *    already read the pending commands. We also set the CLIENT_UNBLOCKED
+ *    flag to remember the client is in the unblocked_clients list.
+ *
+ * processUnblockedClients() is called inside the beforeSleep() function
+ * to process the query buffer from unblocked clients and remove the clients
+ * from the blocked_clients queue.
+ *
+ * replyToBlockedClientTimedOut() is called by the cron function when
+ * a client blocked reaches the specified timeout (if the timeout is set
+ * to 0, no timeout is processed).
+ * It usually just needs to send a reply to the client.
+ *
+ * When implementing a new type of blocking operation, the implementation
+ * should modify unblockClient() and replyToBlockedClientTimedOut() in order
+ * to handle the btype-specific behavior of this two functions.
+ * If the blocking operation waits for certain keys to change state, the
+ * clusterRedirectBlockedClientIfNeeded() function should also be updated.
+ */
+
+#include "server.h"
+#include "slowlog.h"
+#include "latency.h"
+#include "monotonic.h"
+
+void serveClientBlockedOnList(client *receiver, robj *o, robj *key, robj *dstkey, redisDb *db, int wherefrom, int whereto, int *deleted);
+int getListPositionFromObjectOrReply(client *c, robj *arg, int *position);
+
+/* This structure represents the blocked key information that we store
+ * in the client structure. Each client blocked on keys, has a
+ * client->bpop.keys hash table. The keys of the hash table are Redis
+ * keys pointers to 'robj' structures. The value is this structure.
+ * The structure has two goals: firstly we store the list node that this
+ * client uses to be listed in the database "blocked clients for this key"
+ * list, so we can later unblock in O(1) without a list scan.
+ * Secondly for certain blocking types, we have additional info. Right now
+ * the only use for additional info we have is when clients are blocked
+ * on streams, as we have to remember the ID it blocked for. */
+typedef struct bkinfo {
+    listNode *listnode;     /* List node for db->blocking_keys[key] list. */
+    streamID stream_id;     /* Stream ID if we blocked in a stream. */
+} bkinfo;
+
+/* Block a client for the specific operation type. Once the CLIENT_BLOCKED
+ * flag is set client query buffer is not longer processed, but accumulated,
+ * and will be processed when the client is unblocked. */
+void blockClient(client *c, int btype) {
+    /* Master client should never be blocked unless pause or module */
+    serverAssert(!(c->flags & CLIENT_MASTER &&
+                   btype != BLOCKED_MODULE &&
+                   btype != BLOCKED_POSTPONE));
+
+    c->flags |= CLIENT_BLOCKED;
+    c->btype = btype;
+    server.blocked_clients++;
+    server.blocked_clients_by_type[btype]++;
+    addClientToTimeoutTable(c);
+    if (btype == BLOCKED_POSTPONE) {
+        listAddNodeTail(server.postponed_clients, c);
+        c->postponed_list_node = listLast(server.postponed_clients);
+        /* Mark this client to execute its command */
+        c->flags |= CLIENT_PENDING_COMMAND;
+    }
+}
+
+/* This function is called after a client has finished a blocking operation
+ * in order to update the total command duration, log the command into
+ * the Slow log if needed, and log the reply duration event if needed. */
+void updateStatsOnUnblock(client *c, long blocked_us, long reply_us, int had_errors){
+    const ustime_t total_cmd_duration = c->duration + blocked_us + reply_us;
+    c->lastcmd->microseconds += total_cmd_duration;
+    if (had_errors)
+        c->lastcmd->failed_calls++;
+    if (server.latency_tracking_enabled)
+        updateCommandLatencyHistogram(&(c->lastcmd->latency_histogram), total_cmd_duration*1000);
+    /* Log the command into the Slow log if needed. */
+    slowlogPushCurrentCommand(c, c->lastcmd, total_cmd_duration);
+    /* Log the reply duration event. */
+    latencyAddSampleIfNeeded("command-unblocking",reply_us/1000);
+}
+
+/* This function is called in the beforeSleep() function of the event loop
+ * in order to process the pending input buffer of clients that were
+ * unblocked after a blocking operation. */
+void processUnblockedClients(void) {
+    listNode *ln;
+    client *c;
+
+    while (listLength(server.unblocked_clients)) {
+        ln = listFirst(server.unblocked_clients);
+        serverAssert(ln != NULL);
+        c = ln->value;
+        listDelNode(server.unblocked_clients,ln);
+        c->flags &= ~CLIENT_UNBLOCKED;
+
+        /* Process remaining data in the input buffer, unless the client
+         * is blocked again. Actually processInputBuffer() checks that the
+         * client is not blocked before to proceed, but things may change and
+         * the code is conceptually more correct this way. */
+        if (!(c->flags & CLIENT_BLOCKED)) {
+            /* If we have a queued command, execute it now. */
+            if (processPendingCommandAndInputBuffer(c) == C_ERR) {
+                c = NULL;
+            }
+        }
+        beforeNextClient(c);
+    }
+}
+
+/* This function will schedule the client for reprocessing at a safe time.
+ *
+ * This is useful when a client was blocked for some reason (blocking operation,
+ * CLIENT PAUSE, or whatever), because it may end with some accumulated query
+ * buffer that needs to be processed ASAP:
+ *
+ * 1. When a client is blocked, its readable handler is still active.
+ * 2. However in this case it only gets data into the query buffer, but the
+ *    query is not parsed or executed once there is enough to proceed as
+ *    usually (because the client is blocked... so we can't execute commands).
+ * 3. When the client is unblocked, without this function, the client would
+ *    have to write some query in order for the readable handler to finally
+ *    call processQueryBuffer*() on it.
+ * 4. With this function instead we can put the client in a queue that will
+ *    process it for queries ready to be executed at a safe time.
+ */
+void queueClientForReprocessing(client *c) {
+    /* The client may already be into the unblocked list because of a previous
+     * blocking operation, don't add back it into the list multiple times. */
+    if (!(c->flags & CLIENT_UNBLOCKED)) {
+        c->flags |= CLIENT_UNBLOCKED;
+        listAddNodeTail(server.unblocked_clients,c);
+    }
+}
+
+/* Unblock a client calling the right function depending on the kind
+ * of operation the client is blocking for. */
+void unblockClient(client *c) {
+    if (c->btype == BLOCKED_LIST ||
+        c->btype == BLOCKED_ZSET ||
+        c->btype == BLOCKED_STREAM) {
+        unblockClientWaitingData(c);
+    } else if (c->btype == BLOCKED_WAIT) {
+        unblockClientWaitingReplicas(c);
+    } else if (c->btype == BLOCKED_MODULE) {
+        if (moduleClientIsBlockedOnKeys(c)) unblockClientWaitingData(c);
+        unblockClientFromModule(c);
+    } else if (c->btype == BLOCKED_POSTPONE) {
+        listDelNode(server.postponed_clients,c->postponed_list_node);
+        c->postponed_list_node = NULL;
+    } else if (c->btype == BLOCKED_SHUTDOWN) {
+        /* No special cleanup. */
+    } else {
+        serverPanic("Unknown btype in unblockClient().");
+    }
+
+    /* Reset the client for a new query since, for blocking commands
+     * we do not do it immediately after the command returns (when the
+     * client got blocked) in order to be still able to access the argument
+     * vector from module callbacks and updateStatsOnUnblock. */
+    if (c->btype != BLOCKED_POSTPONE && c->btype != BLOCKED_SHUTDOWN) {
+        freeClientOriginalArgv(c);
+        resetClient(c);
+    }
+
+    /* Clear the flags, and put the client in the unblocked list so that
+     * we'll process new commands in its query buffer ASAP. */
+    server.blocked_clients--;
+    server.blocked_clients_by_type[c->btype]--;
+    c->flags &= ~CLIENT_BLOCKED;
+    c->btype = BLOCKED_NONE;
+    removeClientFromTimeoutTable(c);
+    queueClientForReprocessing(c);
+}
+
+/* This function gets called when a blocked client timed out in order to
+ * send it a reply of some kind. After this function is called,
+ * unblockClient() will be called with the same client as argument. */
+void replyToBlockedClientTimedOut(client *c) {
+    if (c->btype == BLOCKED_LIST ||
+        c->btype == BLOCKED_ZSET ||
+        c->btype == BLOCKED_STREAM) {
+        addReplyNullArray(c);
+    } else if (c->btype == BLOCKED_WAIT) {
+        addReplyLongLong(c,replicationCountAcksByOffset(c->bpop.reploffset));
+    } else if (c->btype == BLOCKED_MODULE) {
+        moduleBlockedClientTimedOut(c);
+    } else {
+        serverPanic("Unknown btype in replyToBlockedClientTimedOut().");
+    }
+}
+
+/* If one or more clients are blocked on the SHUTDOWN command, this function
+ * sends them an error reply and unblocks them. */
+void replyToClientsBlockedOnShutdown(void) {
+    if (server.blocked_clients_by_type[BLOCKED_SHUTDOWN] == 0) return;
+    listNode *ln;
+    listIter li;
+    listRewind(server.clients, &li);
+    while((ln = listNext(&li))) {
+        client *c = listNodeValue(ln);
+        if (c->flags & CLIENT_BLOCKED && c->btype == BLOCKED_SHUTDOWN) {
+            addReplyError(c, "Errors trying to SHUTDOWN. Check logs.");
+            unblockClient(c);
+        }
+    }
+}
+
+/* Mass-unblock clients because something changed in the instance that makes
+ * blocking no longer safe. For example clients blocked in list operations
+ * in an instance which turns from master to slave is unsafe, so this function
+ * is called when a master turns into a slave.
+ *
+ * The semantics is to send an -UNBLOCKED error to the client, disconnecting
+ * it at the same time. */
+void disconnectAllBlockedClients(void) {
+    listNode *ln;
+    listIter li;
+
+    listRewind(server.clients,&li);
+    while((ln = listNext(&li))) {
+        client *c = listNodeValue(ln);
+
+        if (c->flags & CLIENT_BLOCKED) {
+            /* POSTPONEd clients are an exception, when they'll be unblocked, the
+             * command processing will start from scratch, and the command will
+             * be either executed or rejected. (unlike LIST blocked clients for
+             * which the command is already in progress in a way. */
+            if (c->btype == BLOCKED_POSTPONE)
+                continue;
+
+            addReplyError(c,
+                "-UNBLOCKED force unblock from blocking operation, "
+                "instance state changed (master -> replica?)");
+            unblockClient(c);
+            c->flags |= CLIENT_CLOSE_AFTER_REPLY;
+        }
+    }
+}
+
+/* Helper function for handleClientsBlockedOnKeys(). This function is called
+ * when there may be clients blocked on a list key, and there may be new
+ * data to fetch (the key is ready). */
+void serveClientsBlockedOnListKey(robj *o, readyList *rl) {
+    /* Optimization: If no clients are in type BLOCKED_LIST,
+     * we can skip this loop. */
+    if (!server.blocked_clients_by_type[BLOCKED_LIST]) return;
+
+    /* We serve clients in the same order they blocked for
+     * this key, from the first blocked to the last. */
+    dictEntry *de = dictFind(rl->db->blocking_keys,rl->key);
+    if (de) {
+        list *clients = dictGetVal(de);
+        listNode *ln;
+        listIter li;
+        listRewind(clients,&li);
+
+        while((ln = listNext(&li))) {
+            client *receiver = listNodeValue(ln);
+            if (receiver->btype != BLOCKED_LIST) continue;
+
+            int deleted = 0;
+            robj *dstkey = receiver->bpop.target;
+            int wherefrom = receiver->bpop.blockpos.wherefrom;
+            int whereto = receiver->bpop.blockpos.whereto;
+
+            /* Protect receiver->bpop.target, that will be
+             * freed by the next unblockClient()
+             * call. */
+            if (dstkey) incrRefCount(dstkey);
+
+            long long prev_error_replies = server.stat_total_error_replies;
+            client *old_client = server.current_client;
+            server.current_client = receiver;
+            monotime replyTimer;
+            elapsedStart(&replyTimer);
+            serveClientBlockedOnList(receiver, o,
+                                     rl->key, dstkey, rl->db,
+                                     wherefrom, whereto,
+                                     &deleted);
+            updateStatsOnUnblock(receiver, 0, elapsedUs(replyTimer), server.stat_total_error_replies != prev_error_replies);
+            unblockClient(receiver);
+            afterCommand(receiver);
+            server.current_client = old_client;
+
+            if (dstkey) decrRefCount(dstkey);
+
+            /* The list is empty and has been deleted. */
+            if (deleted) break;
+        }
+    }
+}
+
+/* Helper function for handleClientsBlockedOnKeys(). This function is called
+ * when there may be clients blocked on a sorted set key, and there may be new
+ * data to fetch (the key is ready). */
+void serveClientsBlockedOnSortedSetKey(robj *o, readyList *rl) {
+    /* Optimization: If no clients are in type BLOCKED_ZSET,
+     * we can skip this loop. */
+    if (!server.blocked_clients_by_type[BLOCKED_ZSET]) return;
+
+    /* We serve clients in the same order they blocked for
+     * this key, from the first blocked to the last. */
+    dictEntry *de = dictFind(rl->db->blocking_keys,rl->key);
+    if (de) {
+        list *clients = dictGetVal(de);
+        listNode *ln;
+        listIter li;
+        listRewind(clients,&li);
+
+        while((ln = listNext(&li))) {
+            client *receiver = listNodeValue(ln);
+            if (receiver->btype != BLOCKED_ZSET) continue;
+
+            int deleted = 0;
+            long llen = zsetLength(o);
+            long count = receiver->bpop.count;
+            int where = receiver->bpop.blockpos.wherefrom;
+            int use_nested_array = (receiver->lastcmd &&
+                                    receiver->lastcmd->proc == bzmpopCommand)
+                                    ? 1 : 0;
+            int reply_nil_when_empty = use_nested_array;
+
+            long long prev_error_replies = server.stat_total_error_replies;
+            client *old_client = server.current_client;
+            server.current_client = receiver;
+            monotime replyTimer;
+            elapsedStart(&replyTimer);
+            genericZpopCommand(receiver, &rl->key, 1, where, 1, count, use_nested_array, reply_nil_when_empty, &deleted);
+
+            /* Replicate the command. */
+            int argc = 2;
+            robj *argv[3];
+            argv[0] = where == ZSET_MIN ? shared.zpopmin : shared.zpopmax;
+            argv[1] = rl->key;
+            incrRefCount(rl->key);
+            if (count != -1) {
+                /* Replicate it as command with COUNT. */
+                robj *count_obj = createStringObjectFromLongLong((count > llen) ? llen : count);
+                argv[2] = count_obj;
+                argc++;
+            }
+            alsoPropagate(receiver->db->id, argv, argc, PROPAGATE_AOF|PROPAGATE_REPL);
+            decrRefCount(argv[1]);
+            if (count != -1) decrRefCount(argv[2]);
+
+            updateStatsOnUnblock(receiver, 0, elapsedUs(replyTimer), server.stat_total_error_replies != prev_error_replies);
+            unblockClient(receiver);
+            afterCommand(receiver);
+            server.current_client = old_client;
+
+            /* The zset is empty and has been deleted. */
+            if (deleted) break;
+        }
+    }
+}
+
+/* Helper function for handleClientsBlockedOnKeys(). This function is called
+ * when there may be clients blocked on a stream key, and there may be new
+ * data to fetch (the key is ready). */
+void serveClientsBlockedOnStreamKey(robj *o, readyList *rl) {
+    /* Optimization: If no clients are in type BLOCKED_STREAM,
+     * we can skip this loop. */
+    if (!server.blocked_clients_by_type[BLOCKED_STREAM]) return;
+
+    dictEntry *de = dictFind(rl->db->blocking_keys,rl->key);
+    stream *s = o->ptr;
+
+    /* We need to provide the new data arrived on the stream
+     * to all the clients that are waiting for an offset smaller
+     * than the current top item. */
+    if (de) {
+        list *clients = dictGetVal(de);
+        listNode *ln;
+        listIter li;
+        listRewind(clients,&li);
+
+        while((ln = listNext(&li))) {
+            client *receiver = listNodeValue(ln);
+            if (receiver->btype != BLOCKED_STREAM) continue;
+            bkinfo *bki = dictFetchValue(receiver->bpop.keys,rl->key);
+            streamID *gt = &bki->stream_id;
+
+            long long prev_error_replies = server.stat_total_error_replies;
+            client *old_client = server.current_client;
+            server.current_client = receiver;
+            monotime replyTimer;
+            elapsedStart(&replyTimer);
+
+            /* If we blocked in the context of a consumer
+             * group, we need to resolve the group and update the
+             * last ID the client is blocked for: this is needed
+             * because serving other clients in the same consumer
+             * group will alter the "last ID" of the consumer
+             * group, and clients blocked in a consumer group are
+             * always blocked for the ">" ID: we need to deliver
+             * only new messages and avoid unblocking the client
+             * otherwise. */
+            streamCG *group = NULL;
+            if (receiver->bpop.xread_group) {
+                group = streamLookupCG(s,
+                        receiver->bpop.xread_group->ptr);
+                /* If the group was not found, send an error
+                 * to the consumer. */
+                if (!group) {
+                    addReplyError(receiver,
+                        "-NOGROUP the consumer group this client "
+                        "was blocked on no longer exists");
+                    goto unblock_receiver;
+                } else {
+                    *gt = group->last_id;
+                }
+            }
+
+            if (streamCompareID(&s->last_id, gt) > 0) {
+                streamID start = *gt;
+                streamIncrID(&start);
+
+                /* Lookup the consumer for the group, if any. */
+                streamConsumer *consumer = NULL;
+                int noack = 0;
+
+                if (group) {
+                    noack = receiver->bpop.xread_group_noack;
+                    sds name = receiver->bpop.xread_consumer->ptr;
+                    consumer = streamLookupConsumer(group,name,SLC_DEFAULT);
+                    if (consumer == NULL) {
+                        consumer = streamCreateConsumer(group,name,rl->key,
+                                                        rl->db->id,SCC_DEFAULT);
+                        if (noack) {
+                            streamPropagateConsumerCreation(receiver,rl->key,
+                                                            receiver->bpop.xread_group,
+                                                            consumer->name);
+                        }
+                    }
+                }
+
+                /* Emit the two elements sub-array consisting of
+                 * the name of the stream and the data we
+                 * extracted from it. Wrapped in a single-item
+                 * array, since we have just one key. */
+                if (receiver->resp == 2) {
+                    addReplyArrayLen(receiver,1);
+                    addReplyArrayLen(receiver,2);
+                } else {
+                    addReplyMapLen(receiver,1);
+                }
+                addReplyBulk(receiver,rl->key);
+
+                streamPropInfo pi = {
+                    rl->key,
+                    receiver->bpop.xread_group
+                };
+                streamReplyWithRange(receiver,s,&start,NULL,
+                                     receiver->bpop.xread_count,
+                                     0, group, consumer, noack, &pi);
+                /* Note that after we unblock the client, 'gt'
+                 * and other receiver->bpop stuff are no longer
+                 * valid, so we must do the setup above before
+                 * the unblockClient call. */
+
+unblock_receiver:
+                updateStatsOnUnblock(receiver, 0, elapsedUs(replyTimer), server.stat_total_error_replies != prev_error_replies);
+                unblockClient(receiver);
+                afterCommand(receiver);
+                server.current_client = old_client;
+            }
+        }
+    }
+}
+
+/* Helper function for handleClientsBlockedOnKeys(). This function is called
+ * in order to check if we can serve clients blocked by modules using
+ * RM_BlockClientOnKeys(), when the corresponding key was signaled as ready:
+ * our goal here is to call the RedisModuleBlockedClient reply() callback to
+ * see if the key is really able to serve the client, and in that case,
+ * unblock it. */
+void serveClientsBlockedOnKeyByModule(readyList *rl) {
+    /* Optimization: If no clients are in type BLOCKED_MODULE,
+     * we can skip this loop. */
+    if (!server.blocked_clients_by_type[BLOCKED_MODULE]) return;
+
+    /* We serve clients in the same order they blocked for
+     * this key, from the first blocked to the last. */
+    dictEntry *de = dictFind(rl->db->blocking_keys,rl->key);
+    if (de) {
+        list *clients = dictGetVal(de);
+        listNode *ln;
+        listIter li;
+        listRewind(clients,&li);
+
+        while((ln = listNext(&li))) {
+            client *receiver = listNodeValue(ln);
+            if (receiver->btype != BLOCKED_MODULE) continue;
+
+            /* Note that if *this* client cannot be served by this key,
+             * it does not mean that another client that is next into the
+             * list cannot be served as well: they may be blocked by
+             * different modules with different triggers to consider if a key
+             * is ready or not. This means we can't exit the loop but need
+             * to continue after the first failure. */
+            long long prev_error_replies = server.stat_total_error_replies;
+            client *old_client = server.current_client;
+            server.current_client = receiver;
+            monotime replyTimer;
+            elapsedStart(&replyTimer);
+            if (!moduleTryServeClientBlockedOnKey(receiver, rl->key)) continue;
+            updateStatsOnUnblock(receiver, 0, elapsedUs(replyTimer), server.stat_total_error_replies != prev_error_replies);
+            moduleUnblockClient(receiver);
+            afterCommand(receiver);
+            server.current_client = old_client;
+        }
+    }
+}
+
+/* Helper function for handleClientsBlockedOnKeys(). This function is called
+ * when there may be clients blocked, via XREADGROUP, on an existing stream which
+ * was deleted. We need to unblock the clients in that case.
+ * The idea is that a client that is blocked via XREADGROUP is different from
+ * any other blocking type in the sense that it depends on the existence of both
+ * the key and the group. Even if the key is deleted and then revived with XADD
+ * it won't help any clients blocked on XREADGROUP because the group no longer
+ * exist, so they would fail with -NOGROUP anyway.
+ * The conclusion is that it's better to unblock these client (with error) upon
+ * the deletion of the key, rather than waiting for the first XADD. */
+void unblockDeletedStreamReadgroupClients(readyList *rl) {
+    /* Optimization: If no clients are in type BLOCKED_STREAM,
+     * we can skip this loop. */
+    if (!server.blocked_clients_by_type[BLOCKED_STREAM]) return;
+
+    /* We serve clients in the same order they blocked for
+     * this key, from the first blocked to the last. */
+    dictEntry *de = dictFind(rl->db->blocking_keys,rl->key);
+    if (de) {
+        list *clients = dictGetVal(de);
+        listNode *ln;
+        listIter li;
+        listRewind(clients,&li);
+
+        while((ln = listNext(&li))) {
+            client *receiver = listNodeValue(ln);
+            if (receiver->btype != BLOCKED_STREAM || !receiver->bpop.xread_group)
+                continue;
+
+            long long prev_error_replies = server.stat_total_error_replies;
+            client *old_client = server.current_client;
+            server.current_client = receiver;
+            monotime replyTimer;
+            elapsedStart(&replyTimer);
+            addReplyError(receiver, "-UNBLOCKED the stream key no longer exists");
+            updateStatsOnUnblock(receiver, 0, elapsedUs(replyTimer), server.stat_total_error_replies != prev_error_replies);
+            unblockClient(receiver);
+            afterCommand(receiver);
+            server.current_client = old_client;
+        }
+    }
+}
+
+/* This function should be called by Redis every time a single command,
+ * a MULTI/EXEC block, or a Lua script, terminated its execution after
+ * being called by a client. It handles serving clients blocked in
+ * lists, streams, and sorted sets, via a blocking commands.
+ *
+ * All the keys with at least one client blocked that received at least
+ * one new element via some write operation are accumulated into
+ * the server.ready_keys list. This function will run the list and will
+ * serve clients accordingly. Note that the function will iterate again and
+ * again as a result of serving BLMOVE we can have new blocking clients
+ * to serve because of the PUSH side of BLMOVE.
+ *
+ * This function is normally "fair", that is, it will server clients
+ * using a FIFO behavior. However this fairness is violated in certain
+ * edge cases, that is, when we have clients blocked at the same time
+ * in a sorted set and in a list, for the same key (a very odd thing to
+ * do client side, indeed!). Because mismatching clients (blocking for
+ * a different type compared to the current key type) are moved in the
+ * other side of the linked list. However as long as the key starts to
+ * be used only for a single type, like virtually any Redis application will
+ * do, the function is already fair. */
+void handleClientsBlockedOnKeys(void) {
+    /* This function is called only when also_propagate is in its basic state
+     * (i.e. not from call(), module context, etc.) */
+    serverAssert(server.also_propagate.numops == 0);
+    server.core_propagates = 1;
+
+    while(listLength(server.ready_keys) != 0) {
+        list *l;
+
+        /* Point server.ready_keys to a fresh list and save the current one
+         * locally. This way as we run the old list we are free to call
+         * signalKeyAsReady() that may push new elements in server.ready_keys
+         * when handling clients blocked into BLMOVE. */
+        l = server.ready_keys;
+        server.ready_keys = listCreate();
+
+        while(listLength(l) != 0) {
+            listNode *ln = listFirst(l);
+            readyList *rl = ln->value;
+
+            /* First of all remove this key from db->ready_keys so that
+             * we can safely call signalKeyAsReady() against this key. */
+            dictDelete(rl->db->ready_keys,rl->key);
+
+            /* Even if we are not inside call(), increment the call depth
+             * in order to make sure that keys are expired against a fixed
+             * reference time, and not against the wallclock time. This
+             * way we can lookup an object multiple times (BLMOVE does
+             * that) without the risk of it being freed in the second
+             * lookup, invalidating the first one.
+             * See https://github.com/redis/redis/pull/6554. */
+            server.fixed_time_expire++;
+            updateCachedTime(0);
+
+            /* Serve clients blocked on the key. */
+            robj *o = lookupKeyReadWithFlags(rl->db, rl->key, LOOKUP_NONOTIFY | LOOKUP_NOSTATS);
+            if (o != NULL) {
+                int objtype = o->type;
+                if (objtype == OBJ_LIST)
+                    serveClientsBlockedOnListKey(o,rl);
+                else if (objtype == OBJ_ZSET)
+                    serveClientsBlockedOnSortedSetKey(o,rl);
+                else if (objtype == OBJ_STREAM)
+                    serveClientsBlockedOnStreamKey(o,rl);
+                /* We want to serve clients blocked on module keys
+                 * regardless of the object type: we don't know what the
+                 * module is trying to accomplish right now. */
+                serveClientsBlockedOnKeyByModule(rl);
+                /* If we have XREADGROUP clients blocked on this key, and
+                 * the key is not a stream, it must mean that the key was
+                 * overwritten by either SET or something like
+                 * (MULTI, DEL key, SADD key e, EXEC).
+                 * In this case we need to unblock all these clients. */
+                 if (objtype != OBJ_STREAM)
+                     unblockDeletedStreamReadgroupClients(rl);
+            } else {
+                /* Unblock all XREADGROUP clients of this deleted key */
+                unblockDeletedStreamReadgroupClients(rl);
+                /* Edge case: If lookupKeyReadWithFlags decides to expire the key we have to
+                 * take care of the propagation here, because afterCommand wasn't called */
+                if (server.also_propagate.numops > 0)
+                    propagatePendingCommands();
+            }
+            server.fixed_time_expire--;
+
+            /* Free this item. */
+            decrRefCount(rl->key);
+            zfree(rl);
+            listDelNode(l,ln);
+        }
+        listRelease(l); /* We have the new list on place at this point. */
+    }
+
+    serverAssert(server.core_propagates); /* This function should not be re-entrant */
+
+    server.core_propagates = 0;
+}
+
+/* This is how the current blocking lists/sorted sets/streams work, we use
+ * BLPOP as example, but the concept is the same for other list ops, sorted
+ * sets and XREAD.
+ * - If the user calls BLPOP and the key exists and contains a non empty list
+ *   then LPOP is called instead. So BLPOP is semantically the same as LPOP
+ *   if blocking is not required.
+ * - If instead BLPOP is called and the key does not exists or the list is
+ *   empty we need to block. In order to do so we remove the notification for
+ *   new data to read in the client socket (so that we'll not serve new
+ *   requests if the blocking request is not served). Also we put the client
+ *   in a dictionary (db->blocking_keys) mapping keys to a list of clients
+ *   blocking for this keys.
+ * - If a PUSH operation against a key with blocked clients waiting is
+ *   performed, we mark this key as "ready", and after the current command,
+ *   MULTI/EXEC block, or script, is executed, we serve all the clients waiting
+ *   for this list, from the one that blocked first, to the last, accordingly
+ *   to the number of elements we have in the ready list.
+ */
+
+/* Set a client in blocking mode for the specified key (list, zset or stream),
+ * with the specified timeout. The 'type' argument is BLOCKED_LIST,
+ * BLOCKED_ZSET or BLOCKED_STREAM depending on the kind of operation we are
+ * waiting for an empty key in order to awake the client. The client is blocked
+ * for all the 'numkeys' keys as in the 'keys' argument. When we block for
+ * stream keys, we also provide an array of streamID structures: clients will
+ * be unblocked only when items with an ID greater or equal to the specified
+ * one is appended to the stream.
+ *
+ * 'count' for those commands that support the optional count argument.
+ * Otherwise the value is 0. */
+void blockForKeys(client *c, int btype, robj **keys, int numkeys, long count, mstime_t timeout, robj *target, struct blockPos *blockpos, streamID *ids) {
+    dictEntry *de;
+    list *l;
+    int j;
+
+    c->bpop.count = count;
+    c->bpop.timeout = timeout;
+    c->bpop.target = target;
+
+    if (blockpos != NULL) c->bpop.blockpos = *blockpos;
+
+    if (target != NULL) incrRefCount(target);
+
+    for (j = 0; j < numkeys; j++) {
+        /* Allocate our bkinfo structure, associated to each key the client
+         * is blocked for. */
+        bkinfo *bki = zmalloc(sizeof(*bki));
+        if (btype == BLOCKED_STREAM)
+            bki->stream_id = ids[j];
+
+        /* If the key already exists in the dictionary ignore it. */
+        if (dictAdd(c->bpop.keys,keys[j],bki) != DICT_OK) {
+            zfree(bki);
+            continue;
+        }
+        incrRefCount(keys[j]);
+
+        /* And in the other "side", to map keys -> clients */
+        de = dictFind(c->db->blocking_keys,keys[j]);
+        if (de == NULL) {
+            int retval;
+
+            /* For every key we take a list of clients blocked for it */
+            l = listCreate();
+            retval = dictAdd(c->db->blocking_keys,keys[j],l);
+            incrRefCount(keys[j]);
+            serverAssertWithInfo(c,keys[j],retval == DICT_OK);
+        } else {
+            l = dictGetVal(de);
+        }
+        listAddNodeTail(l,c);
+        bki->listnode = listLast(l);
+    }
+    blockClient(c,btype);
+}
+
+/* Unblock a client that's waiting in a blocking operation such as BLPOP.
+ * You should never call this function directly, but unblockClient() instead. */
+void unblockClientWaitingData(client *c) {
+    dictEntry *de;
+    dictIterator *di;
+    list *l;
+
+    serverAssertWithInfo(c,NULL,dictSize(c->bpop.keys) != 0);
+    di = dictGetIterator(c->bpop.keys);
+    /* The client may wait for multiple keys, so unblock it for every key. */
+    while((de = dictNext(di)) != NULL) {
+        robj *key = dictGetKey(de);
+        bkinfo *bki = dictGetVal(de);
+
+        /* Remove this client from the list of clients waiting for this key. */
+        l = dictFetchValue(c->db->blocking_keys,key);
+        serverAssertWithInfo(c,key,l != NULL);
+        listDelNode(l,bki->listnode);
+        /* If the list is empty we need to remove it to avoid wasting memory */
+        if (listLength(l) == 0)
+            dictDelete(c->db->blocking_keys,key);
+    }
+    dictReleaseIterator(di);
+
+    /* Cleanup the client structure */
+    dictEmpty(c->bpop.keys,NULL);
+    if (c->bpop.target) {
+        decrRefCount(c->bpop.target);
+        c->bpop.target = NULL;
+    }
+    if (c->bpop.xread_group) {
+        decrRefCount(c->bpop.xread_group);
+        decrRefCount(c->bpop.xread_consumer);
+        c->bpop.xread_group = NULL;
+        c->bpop.xread_consumer = NULL;
+    }
+}
+
+static int getBlockedTypeByType(int type) {
+    switch (type) {
+        case OBJ_LIST: return BLOCKED_LIST;
+        case OBJ_ZSET: return BLOCKED_ZSET;
+        case OBJ_MODULE: return BLOCKED_MODULE;
+        case OBJ_STREAM: return BLOCKED_STREAM;
+        default: return BLOCKED_NONE;
+    }
+}
+
+/* If the specified key has clients blocked waiting for list pushes, this
+ * function will put the key reference into the server.ready_keys list.
+ * Note that db->ready_keys is a hash table that allows us to avoid putting
+ * the same key again and again in the list in case of multiple pushes
+ * made by a script or in the context of MULTI/EXEC.
+ *
+ * The list will be finally processed by handleClientsBlockedOnKeys() */
+void signalKeyAsReady(redisDb *db, robj *key, int type) {
+    readyList *rl;
+
+    /* Quick returns. */
+    int btype = getBlockedTypeByType(type);
+    if (btype == BLOCKED_NONE) {
+        /* The type can never block. */
+        return;
+    }
+    if (!server.blocked_clients_by_type[btype] &&
+        !server.blocked_clients_by_type[BLOCKED_MODULE]) {
+        /* No clients block on this type. Note: Blocked modules are represented
+         * by BLOCKED_MODULE, even if the intention is to wake up by normal
+         * types (list, zset, stream), so we need to check that there are no
+         * blocked modules before we do a quick return here. */
+        return;
+    }
+
+    /* No clients blocking for this key? No need to queue it. */
+    if (dictFind(db->blocking_keys,key) == NULL) return;
+
+    /* Key was already signaled? No need to queue it again. */
+    if (dictFind(db->ready_keys,key) != NULL) return;
+
+    /* Ok, we need to queue this key into server.ready_keys. */
+    rl = zmalloc(sizeof(*rl));
+    rl->key = key;
+    rl->db = db;
+    incrRefCount(key);
+    listAddNodeTail(server.ready_keys,rl);
+
+    /* We also add the key in the db->ready_keys dictionary in order
+     * to avoid adding it multiple times into a list with a simple O(1)
+     * check. */
+    incrRefCount(key);
+    serverAssert(dictAdd(db->ready_keys,key,NULL) == DICT_OK);
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-04 17:31:02 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-04 17:31:02 +0000
commit	bb12c1fd00eb51118749bbbc69c5596835fcbd3b (patch)
tree	88038a98bd31c1b765f3390767a2ec12e37c79ec /src/blocked.c
parent	Initial commit. (diff)
download	redis-upstream.tar.xz redis-upstream.zip