summaryrefslogtreecommitdiffstats
path: root/src/multi.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 17:31:02 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 17:31:02 +0000
commitbb12c1fd00eb51118749bbbc69c5596835fcbd3b (patch)
tree88038a98bd31c1b765f3390767a2ec12e37c79ec /src/multi.c
parentInitial commit. (diff)
downloadredis-bb12c1fd00eb51118749bbbc69c5596835fcbd3b.tar.xz
redis-bb12c1fd00eb51118749bbbc69c5596835fcbd3b.zip
Adding upstream version 5:7.0.15.upstream/5%7.0.15upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/multi.c')
-rw-r--r--src/multi.c480
1 files changed, 480 insertions, 0 deletions
diff --git a/src/multi.c b/src/multi.c
new file mode 100644
index 0000000..5acffde
--- /dev/null
+++ b/src/multi.c
@@ -0,0 +1,480 @@
+/*
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+
+/* ================================ MULTI/EXEC ============================== */
+
+/* Client state initialization for MULTI/EXEC */
+void initClientMultiState(client *c) {
+ c->mstate.commands = NULL;
+ c->mstate.count = 0;
+ c->mstate.cmd_flags = 0;
+ c->mstate.cmd_inv_flags = 0;
+ c->mstate.argv_len_sums = 0;
+ c->mstate.alloc_count = 0;
+}
+
+/* Release all the resources associated with MULTI/EXEC state */
+void freeClientMultiState(client *c) {
+ int j;
+
+ for (j = 0; j < c->mstate.count; j++) {
+ int i;
+ multiCmd *mc = c->mstate.commands+j;
+
+ for (i = 0; i < mc->argc; i++)
+ decrRefCount(mc->argv[i]);
+ zfree(mc->argv);
+ }
+ zfree(c->mstate.commands);
+}
+
+/* Add a new command into the MULTI commands queue */
+void queueMultiCommand(client *c, uint64_t cmd_flags) {
+ multiCmd *mc;
+
+ /* No sense to waste memory if the transaction is already aborted.
+ * this is useful in case client sends these in a pipeline, or doesn't
+ * bother to read previous responses and didn't notice the multi was already
+ * aborted. */
+ if (c->flags & (CLIENT_DIRTY_CAS|CLIENT_DIRTY_EXEC))
+ return;
+ if (c->mstate.count == 0) {
+ /* If a client is using multi/exec, assuming it is used to execute at least
+ * two commands. Hence, creating by default size of 2. */
+ c->mstate.commands = zmalloc(sizeof(multiCmd)*2);
+ c->mstate.alloc_count = 2;
+ }
+ if (c->mstate.count == c->mstate.alloc_count) {
+ c->mstate.alloc_count = c->mstate.alloc_count < INT_MAX/2 ? c->mstate.alloc_count*2 : INT_MAX;
+ c->mstate.commands = zrealloc(c->mstate.commands, sizeof(multiCmd)*(c->mstate.alloc_count));
+ }
+ mc = c->mstate.commands+c->mstate.count;
+ mc->cmd = c->cmd;
+ mc->argc = c->argc;
+ mc->argv = c->argv;
+ mc->argv_len = c->argv_len;
+
+ c->mstate.count++;
+ c->mstate.cmd_flags |= cmd_flags;
+ c->mstate.cmd_inv_flags |= ~cmd_flags;
+ c->mstate.argv_len_sums += c->argv_len_sum + sizeof(robj*)*c->argc;
+
+ /* Reset the client's args since we copied them into the mstate and shouldn't
+ * reference them from c anymore. */
+ c->argv = NULL;
+ c->argc = 0;
+ c->argv_len_sum = 0;
+ c->argv_len = 0;
+}
+
+void discardTransaction(client *c) {
+ freeClientMultiState(c);
+ initClientMultiState(c);
+ c->flags &= ~(CLIENT_MULTI|CLIENT_DIRTY_CAS|CLIENT_DIRTY_EXEC);
+ unwatchAllKeys(c);
+}
+
+/* Flag the transaction as DIRTY_EXEC so that EXEC will fail.
+ * Should be called every time there is an error while queueing a command. */
+void flagTransaction(client *c) {
+ if (c->flags & CLIENT_MULTI)
+ c->flags |= CLIENT_DIRTY_EXEC;
+}
+
+void multiCommand(client *c) {
+ if (c->flags & CLIENT_MULTI) {
+ addReplyError(c,"MULTI calls can not be nested");
+ return;
+ }
+ c->flags |= CLIENT_MULTI;
+
+ addReply(c,shared.ok);
+}
+
+void discardCommand(client *c) {
+ if (!(c->flags & CLIENT_MULTI)) {
+ addReplyError(c,"DISCARD without MULTI");
+ return;
+ }
+ discardTransaction(c);
+ addReply(c,shared.ok);
+}
+
+/* Aborts a transaction, with a specific error message.
+ * The transaction is always aborted with -EXECABORT so that the client knows
+ * the server exited the multi state, but the actual reason for the abort is
+ * included too.
+ * Note: 'error' may or may not end with \r\n. see addReplyErrorFormat. */
+void execCommandAbort(client *c, sds error) {
+ discardTransaction(c);
+
+ if (error[0] == '-') error++;
+ addReplyErrorFormat(c, "-EXECABORT Transaction discarded because of: %s", error);
+
+ /* Send EXEC to clients waiting data from MONITOR. We did send a MULTI
+ * already, and didn't send any of the queued commands, now we'll just send
+ * EXEC so it is clear that the transaction is over. */
+ replicationFeedMonitors(c,server.monitors,c->db->id,c->argv,c->argc);
+}
+
+void execCommand(client *c) {
+ int j;
+ robj **orig_argv;
+ int orig_argc, orig_argv_len;
+ struct redisCommand *orig_cmd;
+
+ if (!(c->flags & CLIENT_MULTI)) {
+ addReplyError(c,"EXEC without MULTI");
+ return;
+ }
+
+ /* EXEC with expired watched key is disallowed*/
+ if (isWatchedKeyExpired(c)) {
+ c->flags |= (CLIENT_DIRTY_CAS);
+ }
+
+ /* Check if we need to abort the EXEC because:
+ * 1) Some WATCHed key was touched.
+ * 2) There was a previous error while queueing commands.
+ * A failed EXEC in the first case returns a multi bulk nil object
+ * (technically it is not an error but a special behavior), while
+ * in the second an EXECABORT error is returned. */
+ if (c->flags & (CLIENT_DIRTY_CAS | CLIENT_DIRTY_EXEC)) {
+ if (c->flags & CLIENT_DIRTY_EXEC) {
+ addReplyErrorObject(c, shared.execaborterr);
+ } else {
+ addReply(c, shared.nullarray[c->resp]);
+ }
+
+ discardTransaction(c);
+ return;
+ }
+
+ uint64_t old_flags = c->flags;
+
+ /* we do not want to allow blocking commands inside multi */
+ c->flags |= CLIENT_DENY_BLOCKING;
+
+ /* Exec all the queued commands */
+ unwatchAllKeys(c); /* Unwatch ASAP otherwise we'll waste CPU cycles */
+
+ server.in_exec = 1;
+
+ orig_argv = c->argv;
+ orig_argv_len = c->argv_len;
+ orig_argc = c->argc;
+ orig_cmd = c->cmd;
+ addReplyArrayLen(c,c->mstate.count);
+ for (j = 0; j < c->mstate.count; j++) {
+ c->argc = c->mstate.commands[j].argc;
+ c->argv = c->mstate.commands[j].argv;
+ c->argv_len = c->mstate.commands[j].argv_len;
+ c->cmd = c->realcmd = c->mstate.commands[j].cmd;
+
+ /* ACL permissions are also checked at the time of execution in case
+ * they were changed after the commands were queued. */
+ int acl_errpos;
+ int acl_retval = ACLCheckAllPerm(c,&acl_errpos);
+ if (acl_retval != ACL_OK) {
+ char *reason;
+ switch (acl_retval) {
+ case ACL_DENIED_CMD:
+ reason = "no permission to execute the command or subcommand";
+ break;
+ case ACL_DENIED_KEY:
+ reason = "no permission to touch the specified keys";
+ break;
+ case ACL_DENIED_CHANNEL:
+ reason = "no permission to access one of the channels used "
+ "as arguments";
+ break;
+ default:
+ reason = "no permission";
+ break;
+ }
+ addACLLogEntry(c,acl_retval,ACL_LOG_CTX_MULTI,acl_errpos,NULL,NULL);
+ addReplyErrorFormat(c,
+ "-NOPERM ACLs rules changed between the moment the "
+ "transaction was accumulated and the EXEC call. "
+ "This command is no longer allowed for the "
+ "following reason: %s", reason);
+ } else {
+ if (c->id == CLIENT_ID_AOF)
+ call(c,CMD_CALL_NONE);
+ else
+ call(c,CMD_CALL_FULL);
+
+ serverAssert((c->flags & CLIENT_BLOCKED) == 0);
+ }
+
+ /* Commands may alter argc/argv, restore mstate. */
+ c->mstate.commands[j].argc = c->argc;
+ c->mstate.commands[j].argv = c->argv;
+ c->mstate.commands[j].argv_len = c->argv_len;
+ c->mstate.commands[j].cmd = c->cmd;
+ }
+
+ // restore old DENY_BLOCKING value
+ if (!(old_flags & CLIENT_DENY_BLOCKING))
+ c->flags &= ~CLIENT_DENY_BLOCKING;
+
+ c->argv = orig_argv;
+ c->argv_len = orig_argv_len;
+ c->argc = orig_argc;
+ c->cmd = c->realcmd = orig_cmd;
+ discardTransaction(c);
+
+ server.in_exec = 0;
+}
+
+/* ===================== WATCH (CAS alike for MULTI/EXEC) ===================
+ *
+ * The implementation uses a per-DB hash table mapping keys to list of clients
+ * WATCHing those keys, so that given a key that is going to be modified
+ * we can mark all the associated clients as dirty.
+ *
+ * Also every client contains a list of WATCHed keys so that's possible to
+ * un-watch such keys when the client is freed or when UNWATCH is called. */
+
+/* In the client->watched_keys list we need to use watchedKey structures
+ * as in order to identify a key in Redis we need both the key name and the
+ * DB. This struct is also referenced from db->watched_keys dict, where the
+ * values are lists of watchedKey pointers. */
+typedef struct watchedKey {
+ robj *key;
+ redisDb *db;
+ client *client;
+ unsigned expired:1; /* Flag that we're watching an already expired key. */
+} watchedKey;
+
+/* Watch for the specified key */
+void watchForKey(client *c, robj *key) {
+ list *clients = NULL;
+ listIter li;
+ listNode *ln;
+ watchedKey *wk;
+
+ /* Check if we are already watching for this key */
+ listRewind(c->watched_keys,&li);
+ while((ln = listNext(&li))) {
+ wk = listNodeValue(ln);
+ if (wk->db == c->db && equalStringObjects(key,wk->key))
+ return; /* Key already watched */
+ }
+ /* This key is not already watched in this DB. Let's add it */
+ clients = dictFetchValue(c->db->watched_keys,key);
+ if (!clients) {
+ clients = listCreate();
+ dictAdd(c->db->watched_keys,key,clients);
+ incrRefCount(key);
+ }
+ /* Add the new key to the list of keys watched by this client */
+ wk = zmalloc(sizeof(*wk));
+ wk->key = key;
+ wk->client = c;
+ wk->db = c->db;
+ wk->expired = keyIsExpired(c->db, key);
+ incrRefCount(key);
+ listAddNodeTail(c->watched_keys,wk);
+ listAddNodeTail(clients,wk);
+}
+
+/* Unwatch all the keys watched by this client. To clean the EXEC dirty
+ * flag is up to the caller. */
+void unwatchAllKeys(client *c) {
+ listIter li;
+ listNode *ln;
+
+ if (listLength(c->watched_keys) == 0) return;
+ listRewind(c->watched_keys,&li);
+ while((ln = listNext(&li))) {
+ list *clients;
+ watchedKey *wk;
+
+ /* Lookup the watched key -> clients list and remove the client's wk
+ * from the list */
+ wk = listNodeValue(ln);
+ clients = dictFetchValue(wk->db->watched_keys, wk->key);
+ serverAssertWithInfo(c,NULL,clients != NULL);
+ listDelNode(clients,listSearchKey(clients,wk));
+ /* Kill the entry at all if this was the only client */
+ if (listLength(clients) == 0)
+ dictDelete(wk->db->watched_keys, wk->key);
+ /* Remove this watched key from the client->watched list */
+ listDelNode(c->watched_keys,ln);
+ decrRefCount(wk->key);
+ zfree(wk);
+ }
+}
+
+/* Iterates over the watched_keys list and looks for an expired key. Keys which
+ * were expired already when WATCH was called are ignored. */
+int isWatchedKeyExpired(client *c) {
+ listIter li;
+ listNode *ln;
+ watchedKey *wk;
+ if (listLength(c->watched_keys) == 0) return 0;
+ listRewind(c->watched_keys,&li);
+ while ((ln = listNext(&li))) {
+ wk = listNodeValue(ln);
+ if (wk->expired) continue; /* was expired when WATCH was called */
+ if (keyIsExpired(wk->db, wk->key)) return 1;
+ }
+
+ return 0;
+}
+
+/* "Touch" a key, so that if this key is being WATCHed by some client the
+ * next EXEC will fail. */
+void touchWatchedKey(redisDb *db, robj *key) {
+ list *clients;
+ listIter li;
+ listNode *ln;
+
+ if (dictSize(db->watched_keys) == 0) return;
+ clients = dictFetchValue(db->watched_keys, key);
+ if (!clients) return;
+
+ /* Mark all the clients watching this key as CLIENT_DIRTY_CAS */
+ /* Check if we are already watching for this key */
+ listRewind(clients,&li);
+ while((ln = listNext(&li))) {
+ watchedKey *wk = listNodeValue(ln);
+ client *c = wk->client;
+
+ if (wk->expired) {
+ /* The key was already expired when WATCH was called. */
+ if (db == wk->db &&
+ equalStringObjects(key, wk->key) &&
+ dictFind(db->dict, key->ptr) == NULL)
+ {
+ /* Already expired key is deleted, so logically no change. Clear
+ * the flag. Deleted keys are not flagged as expired. */
+ wk->expired = 0;
+ goto skip_client;
+ }
+ break;
+ }
+
+ c->flags |= CLIENT_DIRTY_CAS;
+ /* As the client is marked as dirty, there is no point in getting here
+ * again in case that key (or others) are modified again (or keep the
+ * memory overhead till EXEC). */
+ unwatchAllKeys(c);
+
+ skip_client:
+ continue;
+ }
+}
+
+/* Set CLIENT_DIRTY_CAS to all clients of DB when DB is dirty.
+ * It may happen in the following situations:
+ * FLUSHDB, FLUSHALL, SWAPDB, end of successful diskless replication.
+ *
+ * replaced_with: for SWAPDB, the WATCH should be invalidated if
+ * the key exists in either of them, and skipped only if it
+ * doesn't exist in both. */
+void touchAllWatchedKeysInDb(redisDb *emptied, redisDb *replaced_with) {
+ listIter li;
+ listNode *ln;
+ dictEntry *de;
+
+ if (dictSize(emptied->watched_keys) == 0) return;
+
+ dictIterator *di = dictGetSafeIterator(emptied->watched_keys);
+ while((de = dictNext(di)) != NULL) {
+ robj *key = dictGetKey(de);
+ int exists_in_emptied = dictFind(emptied->dict, key->ptr) != NULL;
+ if (exists_in_emptied ||
+ (replaced_with && dictFind(replaced_with->dict, key->ptr)))
+ {
+ list *clients = dictGetVal(de);
+ if (!clients) continue;
+ listRewind(clients,&li);
+ while((ln = listNext(&li))) {
+ watchedKey *wk = listNodeValue(ln);
+ if (wk->expired) {
+ if (!replaced_with || !dictFind(replaced_with->dict, key->ptr)) {
+ /* Expired key now deleted. No logical change. Clear the
+ * flag. Deleted keys are not flagged as expired. */
+ wk->expired = 0;
+ continue;
+ } else if (keyIsExpired(replaced_with, key)) {
+ /* Expired key remains expired. */
+ continue;
+ }
+ } else if (!exists_in_emptied && keyIsExpired(replaced_with, key)) {
+ /* Non-existing key is replaced with an expired key. */
+ wk->expired = 1;
+ continue;
+ }
+ client *c = wk->client;
+ c->flags |= CLIENT_DIRTY_CAS;
+ /* Note - we could potentially call unwatchAllKeys for this specific client in order to reduce
+ * the total number of iterations. BUT this could also free the current next entry pointer
+ * held by the iterator and can lead to use-after-free. */
+ }
+ }
+ }
+ dictReleaseIterator(di);
+}
+
+void watchCommand(client *c) {
+ int j;
+
+ if (c->flags & CLIENT_MULTI) {
+ addReplyError(c,"WATCH inside MULTI is not allowed");
+ return;
+ }
+ /* No point in watching if the client is already dirty. */
+ if (c->flags & CLIENT_DIRTY_CAS) {
+ addReply(c,shared.ok);
+ return;
+ }
+ for (j = 1; j < c->argc; j++)
+ watchForKey(c,c->argv[j]);
+ addReply(c,shared.ok);
+}
+
+void unwatchCommand(client *c) {
+ unwatchAllKeys(c);
+ c->flags &= (~CLIENT_DIRTY_CAS);
+ addReply(c,shared.ok);
+}
+
+size_t multiStateMemOverhead(client *c) {
+ size_t mem = c->mstate.argv_len_sums;
+ /* Add watched keys overhead, Note: this doesn't take into account the watched keys themselves, because they aren't managed per-client. */
+ mem += listLength(c->watched_keys) * (sizeof(listNode) + sizeof(watchedKey));
+ /* Reserved memory for queued multi commands. */
+ mem += c->mstate.alloc_count * sizeof(multiCmd);
+ return mem;
+}