From 317c0644ccf108aa23ef3fd8358bd66c2840bfc0 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 14 Apr 2024 15:40:54 +0200 Subject: Adding upstream version 5:7.2.4. Signed-off-by: Daniel Baumann --- src/t_stream.c | 4038 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 4038 insertions(+) create mode 100644 src/t_stream.c (limited to 'src/t_stream.c') diff --git a/src/t_stream.c b/src/t_stream.c new file mode 100644 index 0000000..5fcb631 --- /dev/null +++ b/src/t_stream.c @@ -0,0 +1,4038 @@ +/* + * Copyright (c) 2017, Salvatore Sanfilippo + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Redis nor the names of its contributors may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "server.h" +#include "endianconv.h" +#include "stream.h" + +/* Every stream item inside the listpack, has a flags field that is used to + * mark the entry as deleted, or having the same field as the "master" + * entry at the start of the listpack> */ +#define STREAM_ITEM_FLAG_NONE 0 /* No special flags. */ +#define STREAM_ITEM_FLAG_DELETED (1<<0) /* Entry is deleted. Skip it. */ +#define STREAM_ITEM_FLAG_SAMEFIELDS (1<<1) /* Same fields as master entry. */ + +/* For stream commands that require multiple IDs + * when the number of IDs is less than 'STREAMID_STATIC_VECTOR_LEN', + * avoid malloc allocation.*/ +#define STREAMID_STATIC_VECTOR_LEN 8 + +/* Max pre-allocation for listpack. This is done to avoid abuse of a user + * setting stream_node_max_bytes to a huge number. */ +#define STREAM_LISTPACK_MAX_PRE_ALLOCATE 4096 + +/* Don't let listpacks grow too big, even if the user config allows it. + * doing so can lead to an overflow (trying to store more than 32bit length + * into the listpack header), or actually an assertion since lpInsert + * will return NULL. */ +#define STREAM_LISTPACK_MAX_SIZE (1<<30) + +void streamFreeCG(streamCG *cg); +void streamFreeNACK(streamNACK *na); +size_t streamReplyWithRangeFromConsumerPEL(client *c, stream *s, streamID *start, streamID *end, size_t count, streamConsumer *consumer); +int streamParseStrictIDOrReply(client *c, robj *o, streamID *id, uint64_t missing_seq, int *seq_given); +int streamParseIDOrReply(client *c, robj *o, streamID *id, uint64_t missing_seq); + +/* ----------------------------------------------------------------------- + * Low level stream encoding: a radix tree of listpacks. + * ----------------------------------------------------------------------- */ + +/* Create a new stream data structure. */ +stream *streamNew(void) { + stream *s = zmalloc(sizeof(*s)); + s->rax = raxNew(); + s->length = 0; + s->first_id.ms = 0; + s->first_id.seq = 0; + s->last_id.ms = 0; + s->last_id.seq = 0; + s->max_deleted_entry_id.seq = 0; + s->max_deleted_entry_id.ms = 0; + s->entries_added = 0; + s->cgroups = NULL; /* Created on demand to save memory when not used. */ + return s; +} + +/* Free a stream, including the listpacks stored inside the radix tree. */ +void freeStream(stream *s) { + raxFreeWithCallback(s->rax,(void(*)(void*))lpFree); + if (s->cgroups) + raxFreeWithCallback(s->cgroups,(void(*)(void*))streamFreeCG); + zfree(s); +} + +/* Return the length of a stream. */ +unsigned long streamLength(const robj *subject) { + stream *s = subject->ptr; + return s->length; +} + +/* Set 'id' to be its successor stream ID. + * If 'id' is the maximal possible id, it is wrapped around to 0-0 and a + * C_ERR is returned. */ +int streamIncrID(streamID *id) { + int ret = C_OK; + if (id->seq == UINT64_MAX) { + if (id->ms == UINT64_MAX) { + /* Special case where 'id' is the last possible streamID... */ + id->ms = id->seq = 0; + ret = C_ERR; + } else { + id->ms++; + id->seq = 0; + } + } else { + id->seq++; + } + return ret; +} + +/* Set 'id' to be its predecessor stream ID. + * If 'id' is the minimal possible id, it remains 0-0 and a C_ERR is + * returned. */ +int streamDecrID(streamID *id) { + int ret = C_OK; + if (id->seq == 0) { + if (id->ms == 0) { + /* Special case where 'id' is the first possible streamID... */ + id->ms = id->seq = UINT64_MAX; + ret = C_ERR; + } else { + id->ms--; + id->seq = UINT64_MAX; + } + } else { + id->seq--; + } + return ret; +} + +/* Generate the next stream item ID given the previous one. If the current + * milliseconds Unix time is greater than the previous one, just use this + * as time part and start with sequence part of zero. Otherwise we use the + * previous time (and never go backward) and increment the sequence. */ +void streamNextID(streamID *last_id, streamID *new_id) { + uint64_t ms = commandTimeSnapshot(); + if (ms > last_id->ms) { + new_id->ms = ms; + new_id->seq = 0; + } else { + *new_id = *last_id; + streamIncrID(new_id); + } +} + +/* This is a helper function for the COPY command. + * Duplicate a Stream object, with the guarantee that the returned object + * has the same encoding as the original one. + * + * The resulting object always has refcount set to 1 */ +robj *streamDup(robj *o) { + robj *sobj; + + serverAssert(o->type == OBJ_STREAM); + + switch (o->encoding) { + case OBJ_ENCODING_STREAM: + sobj = createStreamObject(); + break; + default: + serverPanic("Wrong encoding."); + break; + } + + stream *s; + stream *new_s; + s = o->ptr; + new_s = sobj->ptr; + + raxIterator ri; + uint64_t rax_key[2]; + raxStart(&ri, s->rax); + raxSeek(&ri, "^", NULL, 0); + size_t lp_bytes = 0; /* Total bytes in the listpack. */ + unsigned char *lp = NULL; /* listpack pointer. */ + /* Get a reference to the listpack node. */ + while (raxNext(&ri)) { + lp = ri.data; + lp_bytes = lpBytes(lp); + unsigned char *new_lp = zmalloc(lp_bytes); + memcpy(new_lp, lp, lp_bytes); + memcpy(rax_key, ri.key, sizeof(rax_key)); + raxInsert(new_s->rax, (unsigned char *)&rax_key, sizeof(rax_key), + new_lp, NULL); + } + new_s->length = s->length; + new_s->first_id = s->first_id; + new_s->last_id = s->last_id; + new_s->max_deleted_entry_id = s->max_deleted_entry_id; + new_s->entries_added = s->entries_added; + raxStop(&ri); + + if (s->cgroups == NULL) return sobj; + + /* Consumer Groups */ + raxIterator ri_cgroups; + raxStart(&ri_cgroups, s->cgroups); + raxSeek(&ri_cgroups, "^", NULL, 0); + while (raxNext(&ri_cgroups)) { + streamCG *cg = ri_cgroups.data; + streamCG *new_cg = streamCreateCG(new_s, (char *)ri_cgroups.key, + ri_cgroups.key_len, &cg->last_id, + cg->entries_read); + + serverAssert(new_cg != NULL); + + /* Consumer Group PEL */ + raxIterator ri_cg_pel; + raxStart(&ri_cg_pel,cg->pel); + raxSeek(&ri_cg_pel,"^",NULL,0); + while(raxNext(&ri_cg_pel)){ + streamNACK *nack = ri_cg_pel.data; + streamNACK *new_nack = streamCreateNACK(NULL); + new_nack->delivery_time = nack->delivery_time; + new_nack->delivery_count = nack->delivery_count; + raxInsert(new_cg->pel, ri_cg_pel.key, sizeof(streamID), new_nack, NULL); + } + raxStop(&ri_cg_pel); + + /* Consumers */ + raxIterator ri_consumers; + raxStart(&ri_consumers, cg->consumers); + raxSeek(&ri_consumers, "^", NULL, 0); + while (raxNext(&ri_consumers)) { + streamConsumer *consumer = ri_consumers.data; + streamConsumer *new_consumer; + new_consumer = zmalloc(sizeof(*new_consumer)); + new_consumer->name = sdsdup(consumer->name); + new_consumer->pel = raxNew(); + raxInsert(new_cg->consumers,(unsigned char *)new_consumer->name, + sdslen(new_consumer->name), new_consumer, NULL); + new_consumer->seen_time = consumer->seen_time; + new_consumer->active_time = consumer->active_time; + + /* Consumer PEL */ + raxIterator ri_cpel; + raxStart(&ri_cpel, consumer->pel); + raxSeek(&ri_cpel, "^", NULL, 0); + while (raxNext(&ri_cpel)) { + streamNACK *new_nack = raxFind(new_cg->pel,ri_cpel.key,sizeof(streamID)); + + serverAssert(new_nack != raxNotFound); + + new_nack->consumer = new_consumer; + raxInsert(new_consumer->pel,ri_cpel.key,sizeof(streamID),new_nack,NULL); + } + raxStop(&ri_cpel); + } + raxStop(&ri_consumers); + } + raxStop(&ri_cgroups); + return sobj; +} + +/* This is a wrapper function for lpGet() to directly get an integer value + * from the listpack (that may store numbers as a string), converting + * the string if needed. + * The 'valid" argument is an optional output parameter to get an indication + * if the record was valid, when this parameter is NULL, the function will + * fail with an assertion. */ +static inline int64_t lpGetIntegerIfValid(unsigned char *ele, int *valid) { + int64_t v; + unsigned char *e = lpGet(ele,&v,NULL); + if (e == NULL) { + if (valid) + *valid = 1; + return v; + } + /* The following code path should never be used for how listpacks work: + * they should always be able to store an int64_t value in integer + * encoded form. However the implementation may change. */ + long long ll; + int ret = string2ll((char*)e,v,&ll); + if (valid) + *valid = ret; + else + serverAssert(ret != 0); + v = ll; + return v; +} + +#define lpGetInteger(ele) lpGetIntegerIfValid(ele, NULL) + +/* Get an edge streamID of a given listpack. + * 'master_id' is an input param, used to build the 'edge_id' output param */ +int lpGetEdgeStreamID(unsigned char *lp, int first, streamID *master_id, streamID *edge_id) +{ + if (lp == NULL) + return 0; + + unsigned char *lp_ele; + + /* We need to seek either the first or the last entry depending + * on the direction of the iteration. */ + if (first) { + /* Get the master fields count. */ + lp_ele = lpFirst(lp); /* Seek items count */ + lp_ele = lpNext(lp, lp_ele); /* Seek deleted count. */ + lp_ele = lpNext(lp, lp_ele); /* Seek num fields. */ + int64_t master_fields_count = lpGetInteger(lp_ele); + lp_ele = lpNext(lp, lp_ele); /* Seek first field. */ + + /* If we are iterating in normal order, skip the master fields + * to seek the first actual entry. */ + for (int64_t i = 0; i < master_fields_count; i++) + lp_ele = lpNext(lp, lp_ele); + + /* If we are going forward, skip the previous entry's + * lp-count field (or in case of the master entry, the zero + * term field) */ + lp_ele = lpNext(lp, lp_ele); + if (lp_ele == NULL) + return 0; + } else { + /* If we are iterating in reverse direction, just seek the + * last part of the last entry in the listpack (that is, the + * fields count). */ + lp_ele = lpLast(lp); + + /* If we are going backward, read the number of elements this + * entry is composed of, and jump backward N times to seek + * its start. */ + int64_t lp_count = lpGetInteger(lp_ele); + if (lp_count == 0) /* We reached the master entry. */ + return 0; + + while (lp_count--) + lp_ele = lpPrev(lp, lp_ele); + } + + lp_ele = lpNext(lp, lp_ele); /* Seek ID (lp_ele currently points to 'flags'). */ + + /* Get the ID: it is encoded as difference between the master + * ID and this entry ID. */ + streamID id = *master_id; + id.ms += lpGetInteger(lp_ele); + lp_ele = lpNext(lp, lp_ele); + id.seq += lpGetInteger(lp_ele); + *edge_id = id; + return 1; +} + +/* Debugging function to log the full content of a listpack. Useful + * for development and debugging. */ +void streamLogListpackContent(unsigned char *lp) { + unsigned char *p = lpFirst(lp); + while(p) { + unsigned char buf[LP_INTBUF_SIZE]; + int64_t v; + unsigned char *ele = lpGet(p,&v,buf); + serverLog(LL_WARNING,"- [%d] '%.*s'", (int)v, (int)v, ele); + p = lpNext(lp,p); + } +} + +/* Convert the specified stream entry ID as a 128 bit big endian number, so + * that the IDs can be sorted lexicographically. */ +void streamEncodeID(void *buf, streamID *id) { + uint64_t e[2]; + e[0] = htonu64(id->ms); + e[1] = htonu64(id->seq); + memcpy(buf,e,sizeof(e)); +} + +/* This is the reverse of streamEncodeID(): the decoded ID will be stored + * in the 'id' structure passed by reference. The buffer 'buf' must point + * to a 128 bit big-endian encoded ID. */ +void streamDecodeID(void *buf, streamID *id) { + uint64_t e[2]; + memcpy(e,buf,sizeof(e)); + id->ms = ntohu64(e[0]); + id->seq = ntohu64(e[1]); +} + +/* Compare two stream IDs. Return -1 if a < b, 0 if a == b, 1 if a > b. */ +int streamCompareID(streamID *a, streamID *b) { + if (a->ms > b->ms) return 1; + else if (a->ms < b->ms) return -1; + /* The ms part is the same. Check the sequence part. */ + else if (a->seq > b->seq) return 1; + else if (a->seq < b->seq) return -1; + /* Everything is the same: IDs are equal. */ + return 0; +} + +/* Retrieves the ID of the stream edge entry. An edge is either the first or + * the last ID in the stream, and may be a tombstone. To filter out tombstones, + * set the'skip_tombstones' argument to 1. */ +void streamGetEdgeID(stream *s, int first, int skip_tombstones, streamID *edge_id) +{ + streamIterator si; + int64_t numfields; + streamIteratorStart(&si,s,NULL,NULL,!first); + si.skip_tombstones = skip_tombstones; + int found = streamIteratorGetID(&si,edge_id,&numfields); + if (!found) { + streamID min_id = {0, 0}, max_id = {UINT64_MAX, UINT64_MAX}; + *edge_id = first ? max_id : min_id; + } + streamIteratorStop(&si); +} + +/* Adds a new item into the stream 's' having the specified number of + * field-value pairs as specified in 'numfields' and stored into 'argv'. + * Returns the new entry ID populating the 'added_id' structure. + * + * If 'use_id' is not NULL, the ID is not auto-generated by the function, + * but instead the passed ID is used to add the new entry. In this case + * adding the entry may fail as specified later in this comment. + * + * When 'use_id' is used alongside with a zero 'seq-given', the sequence + * part of the passed ID is ignored and the function will attempt to use an + * auto-generated sequence. + * + * The function returns C_OK if the item was added, this is always true + * if the ID was generated by the function. However the function may return + * C_ERR in several cases: + * 1. If an ID was given via 'use_id', but adding it failed since the + * current top ID is greater or equal. errno will be set to EDOM. + * 2. If a size of a single element or the sum of the elements is too big to + * be stored into the stream. errno will be set to ERANGE. */ +int streamAppendItem(stream *s, robj **argv, int64_t numfields, streamID *added_id, streamID *use_id, int seq_given) { + + /* Generate the new entry ID. */ + streamID id; + if (use_id) { + if (seq_given) { + id = *use_id; + } else { + /* The automatically generated sequence can be either zero (new + * timestamps) or the incremented sequence of the last ID. In the + * latter case, we need to prevent an overflow/advancing forward + * in time. */ + if (s->last_id.ms == use_id->ms) { + if (s->last_id.seq == UINT64_MAX) { + errno = EDOM; + return C_ERR; + } + id = s->last_id; + id.seq++; + } else { + id = *use_id; + } + } + } else { + streamNextID(&s->last_id,&id); + } + + /* Check that the new ID is greater than the last entry ID + * or return an error. Automatically generated IDs might + * overflow (and wrap-around) when incrementing the sequence + part. */ + if (streamCompareID(&id,&s->last_id) <= 0) { + errno = EDOM; + return C_ERR; + } + + /* Avoid overflow when trying to add an element to the stream (listpack + * can only host up to 32bit length strings, and also a total listpack size + * can't be bigger than 32bit length. */ + size_t totelelen = 0; + for (int64_t i = 0; i < numfields*2; i++) { + sds ele = argv[i]->ptr; + totelelen += sdslen(ele); + } + if (totelelen > STREAM_LISTPACK_MAX_SIZE) { + errno = ERANGE; + return C_ERR; + } + + /* Add the new entry. */ + raxIterator ri; + raxStart(&ri,s->rax); + raxSeek(&ri,"$",NULL,0); + + size_t lp_bytes = 0; /* Total bytes in the tail listpack. */ + unsigned char *lp = NULL; /* Tail listpack pointer. */ + + if (!raxEOF(&ri)) { + /* Get a reference to the tail node listpack. */ + lp = ri.data; + lp_bytes = lpBytes(lp); + } + raxStop(&ri); + + /* We have to add the key into the radix tree in lexicographic order, + * to do so we consider the ID as a single 128 bit number written in + * big endian, so that the most significant bytes are the first ones. */ + uint64_t rax_key[2]; /* Key in the radix tree containing the listpack.*/ + streamID master_id; /* ID of the master entry in the listpack. */ + + /* Create a new listpack and radix tree node if needed. Note that when + * a new listpack is created, we populate it with a "master entry". This + * is just a set of fields that is taken as references in order to compress + * the stream entries that we'll add inside the listpack. + * + * Note that while we use the first added entry fields to create + * the master entry, the first added entry is NOT represented in the master + * entry, which is a stand alone object. But of course, the first entry + * will compress well because it's used as reference. + * + * The master entry is composed like in the following example: + * + * +-------+---------+------------+---------+--/--+---------+---------+-+ + * | count | deleted | num-fields | field_1 | field_2 | ... | field_N |0| + * +-------+---------+------------+---------+--/--+---------+---------+-+ + * + * count and deleted just represent respectively the total number of + * entries inside the listpack that are valid, and marked as deleted + * (deleted flag in the entry flags set). So the total number of items + * actually inside the listpack (both deleted and not) is count+deleted. + * + * The real entries will be encoded with an ID that is just the + * millisecond and sequence difference compared to the key stored at + * the radix tree node containing the listpack (delta encoding), and + * if the fields of the entry are the same as the master entry fields, the + * entry flags will specify this fact and the entry fields and number + * of fields will be omitted (see later in the code of this function). + * + * The "0" entry at the end is the same as the 'lp-count' entry in the + * regular stream entries (see below), and marks the fact that there are + * no more entries, when we scan the stream from right to left. */ + + /* First of all, check if we can append to the current macro node or + * if we need to switch to the next one. 'lp' will be set to NULL if + * the current node is full. */ + if (lp != NULL) { + int new_node = 0; + size_t node_max_bytes = server.stream_node_max_bytes; + if (node_max_bytes == 0 || node_max_bytes > STREAM_LISTPACK_MAX_SIZE) + node_max_bytes = STREAM_LISTPACK_MAX_SIZE; + if (lp_bytes + totelelen >= node_max_bytes) { + new_node = 1; + } else if (server.stream_node_max_entries) { + unsigned char *lp_ele = lpFirst(lp); + /* Count both live entries and deleted ones. */ + int64_t count = lpGetInteger(lp_ele) + lpGetInteger(lpNext(lp,lp_ele)); + if (count >= server.stream_node_max_entries) new_node = 1; + } + + if (new_node) { + /* Shrink extra pre-allocated memory */ + lp = lpShrinkToFit(lp); + if (ri.data != lp) + raxInsert(s->rax,ri.key,ri.key_len,lp,NULL); + lp = NULL; + } + } + + int flags = STREAM_ITEM_FLAG_NONE; + if (lp == NULL) { + master_id = id; + streamEncodeID(rax_key,&id); + /* Create the listpack having the master entry ID and fields. + * Pre-allocate some bytes when creating listpack to avoid realloc on + * every XADD. Since listpack.c uses malloc_size, it'll grow in steps, + * and won't realloc on every XADD. + * When listpack reaches max number of entries, we'll shrink the + * allocation to fit the data. */ + size_t prealloc = STREAM_LISTPACK_MAX_PRE_ALLOCATE; + if (server.stream_node_max_bytes > 0 && server.stream_node_max_bytes < prealloc) { + prealloc = server.stream_node_max_bytes; + } + lp = lpNew(prealloc); + lp = lpAppendInteger(lp,1); /* One item, the one we are adding. */ + lp = lpAppendInteger(lp,0); /* Zero deleted so far. */ + lp = lpAppendInteger(lp,numfields); + for (int64_t i = 0; i < numfields; i++) { + sds field = argv[i*2]->ptr; + lp = lpAppend(lp,(unsigned char*)field,sdslen(field)); + } + lp = lpAppendInteger(lp,0); /* Master entry zero terminator. */ + raxInsert(s->rax,(unsigned char*)&rax_key,sizeof(rax_key),lp,NULL); + /* The first entry we insert, has obviously the same fields of the + * master entry. */ + flags |= STREAM_ITEM_FLAG_SAMEFIELDS; + } else { + serverAssert(ri.key_len == sizeof(rax_key)); + memcpy(rax_key,ri.key,sizeof(rax_key)); + + /* Read the master ID from the radix tree key. */ + streamDecodeID(rax_key,&master_id); + unsigned char *lp_ele = lpFirst(lp); + + /* Update count and skip the deleted fields. */ + int64_t count = lpGetInteger(lp_ele); + lp = lpReplaceInteger(lp,&lp_ele,count+1); + lp_ele = lpNext(lp,lp_ele); /* seek deleted. */ + lp_ele = lpNext(lp,lp_ele); /* seek master entry num fields. */ + + /* Check if the entry we are adding, have the same fields + * as the master entry. */ + int64_t master_fields_count = lpGetInteger(lp_ele); + lp_ele = lpNext(lp,lp_ele); + if (numfields == master_fields_count) { + int64_t i; + for (i = 0; i < master_fields_count; i++) { + sds field = argv[i*2]->ptr; + int64_t e_len; + unsigned char buf[LP_INTBUF_SIZE]; + unsigned char *e = lpGet(lp_ele,&e_len,buf); + /* Stop if there is a mismatch. */ + if (sdslen(field) != (size_t)e_len || + memcmp(e,field,e_len) != 0) break; + lp_ele = lpNext(lp,lp_ele); + } + /* All fields are the same! We can compress the field names + * setting a single bit in the flags. */ + if (i == master_fields_count) flags |= STREAM_ITEM_FLAG_SAMEFIELDS; + } + } + + /* Populate the listpack with the new entry. We use the following + * encoding: + * + * +-----+--------+----------+-------+-------+-/-+-------+-------+--------+ + * |flags|entry-id|num-fields|field-1|value-1|...|field-N|value-N|lp-count| + * +-----+--------+----------+-------+-------+-/-+-------+-------+--------+ + * + * However if the SAMEFIELD flag is set, we have just to populate + * the entry with the values, so it becomes: + * + * +-----+--------+-------+-/-+-------+--------+ + * |flags|entry-id|value-1|...|value-N|lp-count| + * +-----+--------+-------+-/-+-------+--------+ + * + * The entry-id field is actually two separated fields: the ms + * and seq difference compared to the master entry. + * + * The lp-count field is a number that states the number of listpack pieces + * that compose the entry, so that it's possible to travel the entry + * in reverse order: we can just start from the end of the listpack, read + * the entry, and jump back N times to seek the "flags" field to read + * the stream full entry. */ + lp = lpAppendInteger(lp,flags); + lp = lpAppendInteger(lp,id.ms - master_id.ms); + lp = lpAppendInteger(lp,id.seq - master_id.seq); + if (!(flags & STREAM_ITEM_FLAG_SAMEFIELDS)) + lp = lpAppendInteger(lp,numfields); + for (int64_t i = 0; i < numfields; i++) { + sds field = argv[i*2]->ptr, value = argv[i*2+1]->ptr; + if (!(flags & STREAM_ITEM_FLAG_SAMEFIELDS)) + lp = lpAppend(lp,(unsigned char*)field,sdslen(field)); + lp = lpAppend(lp,(unsigned char*)value,sdslen(value)); + } + /* Compute and store the lp-count field. */ + int64_t lp_count = numfields; + lp_count += 3; /* Add the 3 fixed fields flags + ms-diff + seq-diff. */ + if (!(flags & STREAM_ITEM_FLAG_SAMEFIELDS)) { + /* If the item is not compressed, it also has the fields other than + * the values, and an additional num-fields field. */ + lp_count += numfields+1; + } + lp = lpAppendInteger(lp,lp_count); + + /* Insert back into the tree in order to update the listpack pointer. */ + if (ri.data != lp) + raxInsert(s->rax,(unsigned char*)&rax_key,sizeof(rax_key),lp,NULL); + s->length++; + s->entries_added++; + s->last_id = id; + if (s->length == 1) s->first_id = id; + if (added_id) *added_id = id; + return C_OK; +} + +typedef struct { + /* XADD options */ + streamID id; /* User-provided ID, for XADD only. */ + int id_given; /* Was an ID different than "*" specified? for XADD only. */ + int seq_given; /* Was an ID different than "ms-*" specified? for XADD only. */ + int no_mkstream; /* if set to 1 do not create new stream */ + + /* XADD + XTRIM common options */ + int trim_strategy; /* TRIM_STRATEGY_* */ + int trim_strategy_arg_idx; /* Index of the count in MAXLEN/MINID, for rewriting. */ + int approx_trim; /* If 1 only delete whole radix tree nodes, so + * the trim argument is not applied verbatim. */ + long long limit; /* Maximum amount of entries to trim. If 0, no limitation + * on the amount of trimming work is enforced. */ + /* TRIM_STRATEGY_MAXLEN options */ + long long maxlen; /* After trimming, leave stream at this length . */ + /* TRIM_STRATEGY_MINID options */ + streamID minid; /* Trim by ID (No stream entries with ID < 'minid' will remain) */ +} streamAddTrimArgs; + +#define TRIM_STRATEGY_NONE 0 +#define TRIM_STRATEGY_MAXLEN 1 +#define TRIM_STRATEGY_MINID 2 + +/* Trim the stream 's' according to args->trim_strategy, and return the + * number of elements removed from the stream. The 'approx' option, if non-zero, + * specifies that the trimming must be performed in a approximated way in + * order to maximize performances. This means that the stream may contain + * entries with IDs < 'id' in case of MINID (or more elements than 'maxlen' + * in case of MAXLEN), and elements are only removed if we can remove + * a *whole* node of the radix tree. The elements are removed from the head + * of the stream (older elements). + * + * The function may return zero if: + * + * 1) The minimal entry ID of the stream is already < 'id' (MINID); or + * 2) The stream is already shorter or equal to the specified max length (MAXLEN); or + * 3) The 'approx' option is true and the head node did not have enough elements + * to be deleted. + * + * args->limit is the maximum number of entries to delete. The purpose is to + * prevent this function from taking to long. + * If 'limit' is 0 then we do not limit the number of deleted entries. + * Much like the 'approx', if 'limit' is smaller than the number of entries + * that should be trimmed, there is a chance we will still have entries with + * IDs < 'id' (or number of elements >= maxlen in case of MAXLEN). + */ +int64_t streamTrim(stream *s, streamAddTrimArgs *args) { + size_t maxlen = args->maxlen; + streamID *id = &args->minid; + int approx = args->approx_trim; + int64_t limit = args->limit; + int trim_strategy = args->trim_strategy; + + if (trim_strategy == TRIM_STRATEGY_NONE) + return 0; + + raxIterator ri; + raxStart(&ri,s->rax); + raxSeek(&ri,"^",NULL,0); + + int64_t deleted = 0; + while (raxNext(&ri)) { + if (trim_strategy == TRIM_STRATEGY_MAXLEN && s->length <= maxlen) + break; + + unsigned char *lp = ri.data, *p = lpFirst(lp); + int64_t entries = lpGetInteger(p); + + /* Check if we exceeded the amount of work we could do */ + if (limit && (deleted + entries) > limit) + break; + + /* Check if we can remove the whole node. */ + int remove_node; + streamID master_id = {0}; /* For MINID */ + if (trim_strategy == TRIM_STRATEGY_MAXLEN) { + remove_node = s->length - entries >= maxlen; + } else { + /* Read the master ID from the radix tree key. */ + streamDecodeID(ri.key, &master_id); + + /* Read last ID. */ + streamID last_id = {0,0}; + lpGetEdgeStreamID(lp, 0, &master_id, &last_id); + + /* We can remove the entire node id its last ID < 'id' */ + remove_node = streamCompareID(&last_id, id) < 0; + } + + if (remove_node) { + lpFree(lp); + raxRemove(s->rax,ri.key,ri.key_len,NULL); + raxSeek(&ri,">=",ri.key,ri.key_len); + s->length -= entries; + deleted += entries; + continue; + } + + /* If we cannot remove a whole element, and approx is true, + * stop here. */ + if (approx) break; + + /* Now we have to trim entries from within 'lp' */ + int64_t deleted_from_lp = 0; + + p = lpNext(lp, p); /* Skip deleted field. */ + p = lpNext(lp, p); /* Skip num-of-fields in the master entry. */ + + /* Skip all the master fields. */ + int64_t master_fields_count = lpGetInteger(p); + p = lpNext(lp,p); /* Skip the first field. */ + for (int64_t j = 0; j < master_fields_count; j++) + p = lpNext(lp,p); /* Skip all master fields. */ + p = lpNext(lp,p); /* Skip the zero master entry terminator. */ + + /* 'p' is now pointing to the first entry inside the listpack. + * We have to run entry after entry, marking entries as deleted + * if they are already not deleted. */ + while (p) { + /* We keep a copy of p (which point to flags part) in order to + * update it after (and if) we actually remove the entry */ + unsigned char *pcopy = p; + + int64_t flags = lpGetInteger(p); + p = lpNext(lp, p); /* Skip flags. */ + int64_t to_skip; + + int64_t ms_delta = lpGetInteger(p); + p = lpNext(lp, p); /* Skip ID ms delta */ + int64_t seq_delta = lpGetInteger(p); + p = lpNext(lp, p); /* Skip ID seq delta */ + + streamID currid = {0}; /* For MINID */ + if (trim_strategy == TRIM_STRATEGY_MINID) { + currid.ms = master_id.ms + ms_delta; + currid.seq = master_id.seq + seq_delta; + } + + int stop; + if (trim_strategy == TRIM_STRATEGY_MAXLEN) { + stop = s->length <= maxlen; + } else { + /* Following IDs will definitely be greater because the rax + * tree is sorted, no point of continuing. */ + stop = streamCompareID(&currid, id) >= 0; + } + if (stop) + break; + + if (flags & STREAM_ITEM_FLAG_SAMEFIELDS) { + to_skip = master_fields_count; + } else { + to_skip = lpGetInteger(p); /* Get num-fields. */ + p = lpNext(lp,p); /* Skip num-fields. */ + to_skip *= 2; /* Fields and values. */ + } + + while(to_skip--) p = lpNext(lp,p); /* Skip the whole entry. */ + p = lpNext(lp,p); /* Skip the final lp-count field. */ + + /* Mark the entry as deleted. */ + if (!(flags & STREAM_ITEM_FLAG_DELETED)) { + intptr_t delta = p - lp; + flags |= STREAM_ITEM_FLAG_DELETED; + lp = lpReplaceInteger(lp, &pcopy, flags); + deleted_from_lp++; + s->length--; + p = lp + delta; + } + } + deleted += deleted_from_lp; + + /* Now we update the entries/deleted counters. */ + p = lpFirst(lp); + lp = lpReplaceInteger(lp,&p,entries-deleted_from_lp); + p = lpNext(lp,p); /* Skip deleted field. */ + int64_t marked_deleted = lpGetInteger(p); + lp = lpReplaceInteger(lp,&p,marked_deleted+deleted_from_lp); + p = lpNext(lp,p); /* Skip num-of-fields in the master entry. */ + + /* Here we should perform garbage collection in case at this point + * there are too many entries deleted inside the listpack. */ + entries -= deleted_from_lp; + marked_deleted += deleted_from_lp; + if (entries + marked_deleted > 10 && marked_deleted > entries/2) { + /* TODO: perform a garbage collection. */ + } + + /* Update the listpack with the new pointer. */ + raxInsert(s->rax,ri.key,ri.key_len,lp,NULL); + + break; /* If we are here, there was enough to delete in the current + node, so no need to go to the next node. */ + } + raxStop(&ri); + + /* Update the stream's first ID after the trimming. */ + if (s->length == 0) { + s->first_id.ms = 0; + s->first_id.seq = 0; + } else if (deleted) { + streamGetEdgeID(s,1,1,&s->first_id); + } + + return deleted; +} + +/* Trims a stream by length. Returns the number of deleted items. */ +int64_t streamTrimByLength(stream *s, long long maxlen, int approx) { + streamAddTrimArgs args = { + .trim_strategy = TRIM_STRATEGY_MAXLEN, + .approx_trim = approx, + .limit = approx ? 100 * server.stream_node_max_entries : 0, + .maxlen = maxlen + }; + return streamTrim(s, &args); +} + +/* Trims a stream by minimum ID. Returns the number of deleted items. */ +int64_t streamTrimByID(stream *s, streamID minid, int approx) { + streamAddTrimArgs args = { + .trim_strategy = TRIM_STRATEGY_MINID, + .approx_trim = approx, + .limit = approx ? 100 * server.stream_node_max_entries : 0, + .minid = minid + }; + return streamTrim(s, &args); +} + +/* Parse the arguments of XADD/XTRIM. + * + * See streamAddTrimArgs for more details about the arguments handled. + * + * This function returns the position of the ID argument (relevant only to XADD). + * On error -1 is returned and a reply is sent. */ +static int streamParseAddOrTrimArgsOrReply(client *c, streamAddTrimArgs *args, int xadd) { + /* Initialize arguments to defaults */ + memset(args, 0, sizeof(*args)); + + /* Parse options. */ + int i = 2; /* This is the first argument position where we could + find an option, or the ID. */ + int limit_given = 0; + for (; i < c->argc; i++) { + int moreargs = (c->argc-1) - i; /* Number of additional arguments. */ + char *opt = c->argv[i]->ptr; + if (xadd && opt[0] == '*' && opt[1] == '\0') { + /* This is just a fast path for the common case of auto-ID + * creation. */ + break; + } else if (!strcasecmp(opt,"maxlen") && moreargs) { + if (args->trim_strategy != TRIM_STRATEGY_NONE) { + addReplyError(c,"syntax error, MAXLEN and MINID options at the same time are not compatible"); + return -1; + } + args->approx_trim = 0; + char *next = c->argv[i+1]->ptr; + /* Check for the form MAXLEN ~ . */ + if (moreargs >= 2 && next[0] == '~' && next[1] == '\0') { + args->approx_trim = 1; + i++; + } else if (moreargs >= 2 && next[0] == '=' && next[1] == '\0') { + i++; + } + if (getLongLongFromObjectOrReply(c,c->argv[i+1],&args->maxlen,NULL) + != C_OK) return -1; + + if (args->maxlen < 0) { + addReplyError(c,"The MAXLEN argument must be >= 0."); + return -1; + } + i++; + args->trim_strategy = TRIM_STRATEGY_MAXLEN; + args->trim_strategy_arg_idx = i; + } else if (!strcasecmp(opt,"minid") && moreargs) { + if (args->trim_strategy != TRIM_STRATEGY_NONE) { + addReplyError(c,"syntax error, MAXLEN and MINID options at the same time are not compatible"); + return -1; + } + args->approx_trim = 0; + char *next = c->argv[i+1]->ptr; + /* Check for the form MINID ~ */ + if (moreargs >= 2 && next[0] == '~' && next[1] == '\0') { + args->approx_trim = 1; + i++; + } else if (moreargs >= 2 && next[0] == '=' && next[1] == '\0') { + i++; + } + + if (streamParseStrictIDOrReply(c,c->argv[i+1],&args->minid,0,NULL) != C_OK) + return -1; + + i++; + args->trim_strategy = TRIM_STRATEGY_MINID; + args->trim_strategy_arg_idx = i; + } else if (!strcasecmp(opt,"limit") && moreargs) { + /* Note about LIMIT: If it was not provided by the caller we set + * it to 100*server.stream_node_max_entries, and that's to prevent the + * trimming from taking too long, on the expense of not deleting entries + * that should be trimmed. + * If user wanted exact trimming (i.e. no '~') we never limit the number + * of trimmed entries */ + if (getLongLongFromObjectOrReply(c,c->argv[i+1],&args->limit,NULL) != C_OK) + return -1; + + if (args->limit < 0) { + addReplyError(c,"The LIMIT argument must be >= 0."); + return -1; + } + limit_given = 1; + i++; + } else if (xadd && !strcasecmp(opt,"nomkstream")) { + args->no_mkstream = 1; + } else if (xadd) { + /* If we are here is a syntax error or a valid ID. */ + if (streamParseStrictIDOrReply(c,c->argv[i],&args->id,0,&args->seq_given) != C_OK) + return -1; + args->id_given = 1; + break; + } else { + addReplyErrorObject(c,shared.syntaxerr); + return -1; + } + } + + if (args->limit && args->trim_strategy == TRIM_STRATEGY_NONE) { + addReplyError(c,"syntax error, LIMIT cannot be used without specifying a trimming strategy"); + return -1; + } + + if (!xadd && args->trim_strategy == TRIM_STRATEGY_NONE) { + addReplyError(c,"syntax error, XTRIM must be called with a trimming strategy"); + return -1; + } + + if (mustObeyClient(c)) { + /* If command came from master or from AOF we must not enforce maxnodes + * (The maxlen/minid argument was re-written to make sure there's no + * inconsistency). */ + args->limit = 0; + } else { + /* We need to set the limit (only if we got '~') */ + if (limit_given) { + if (!args->approx_trim) { + /* LIMIT was provided without ~ */ + addReplyError(c,"syntax error, LIMIT cannot be used without the special ~ option"); + return -1; + } + } else { + /* User didn't provide LIMIT, we must set it. */ + if (args->approx_trim) { + /* In order to prevent from trimming to do too much work and + * cause latency spikes we limit the amount of work it can do. + * We have to cap args->limit from both sides in case + * stream_node_max_entries is 0 or too big (could cause overflow) + */ + args->limit = 100 * server.stream_node_max_entries; /* Maximum 100 rax nodes. */ + if (args->limit <= 0) args->limit = 10000; + if (args->limit > 1000000) args->limit = 1000000; + } else { + /* No LIMIT for exact trimming */ + args->limit = 0; + } + } + } + + return i; +} + +/* Initialize the stream iterator, so that we can call iterating functions + * to get the next items. This requires a corresponding streamIteratorStop() + * at the end. The 'rev' parameter controls the direction. If it's zero the + * iteration is from the start to the end element (inclusive), otherwise + * if rev is non-zero, the iteration is reversed. + * + * Once the iterator is initialized, we iterate like this: + * + * streamIterator myiterator; + * streamIteratorStart(&myiterator,...); + * int64_t numfields; + * while(streamIteratorGetID(&myiterator,&ID,&numfields)) { + * while(numfields--) { + * unsigned char *key, *value; + * size_t key_len, value_len; + * streamIteratorGetField(&myiterator,&key,&value,&key_len,&value_len); + * + * ... do what you want with key and value ... + * } + * } + * streamIteratorStop(&myiterator); */ +void streamIteratorStart(streamIterator *si, stream *s, streamID *start, streamID *end, int rev) { + /* Initialize the iterator and translates the iteration start/stop + * elements into a 128 big big-endian number. */ + if (start) { + streamEncodeID(si->start_key,start); + } else { + si->start_key[0] = 0; + si->start_key[1] = 0; + } + + if (end) { + streamEncodeID(si->end_key,end); + } else { + si->end_key[0] = UINT64_MAX; + si->end_key[1] = UINT64_MAX; + } + + /* Seek the correct node in the radix tree. */ + raxStart(&si->ri,s->rax); + if (!rev) { + if (start && (start->ms || start->seq)) { + raxSeek(&si->ri,"<=",(unsigned char*)si->start_key, + sizeof(si->start_key)); + if (raxEOF(&si->ri)) raxSeek(&si->ri,"^",NULL,0); + } else { + raxSeek(&si->ri,"^",NULL,0); + } + } else { + if (end && (end->ms || end->seq)) { + raxSeek(&si->ri,"<=",(unsigned char*)si->end_key, + sizeof(si->end_key)); + if (raxEOF(&si->ri)) raxSeek(&si->ri,"$",NULL,0); + } else { + raxSeek(&si->ri,"$",NULL,0); + } + } + si->stream = s; + si->lp = NULL; /* There is no current listpack right now. */ + si->lp_ele = NULL; /* Current listpack cursor. */ + si->rev = rev; /* Direction, if non-zero reversed, from end to start. */ + si->skip_tombstones = 1; /* By default tombstones aren't emitted. */ +} + +/* Return 1 and store the current item ID at 'id' if there are still + * elements within the iteration range, otherwise return 0 in order to + * signal the iteration terminated. */ +int streamIteratorGetID(streamIterator *si, streamID *id, int64_t *numfields) { + while(1) { /* Will stop when element > stop_key or end of radix tree. */ + /* If the current listpack is set to NULL, this is the start of the + * iteration or the previous listpack was completely iterated. + * Go to the next node. */ + if (si->lp == NULL || si->lp_ele == NULL) { + if (!si->rev && !raxNext(&si->ri)) return 0; + else if (si->rev && !raxPrev(&si->ri)) return 0; + serverAssert(si->ri.key_len == sizeof(streamID)); + /* Get the master ID. */ + streamDecodeID(si->ri.key,&si->master_id); + /* Get the master fields count. */ + si->lp = si->ri.data; + si->lp_ele = lpFirst(si->lp); /* Seek items count */ + si->lp_ele = lpNext(si->lp,si->lp_ele); /* Seek deleted count. */ + si->lp_ele = lpNext(si->lp,si->lp_ele); /* Seek num fields. */ + si->master_fields_count = lpGetInteger(si->lp_ele); + si->lp_ele = lpNext(si->lp,si->lp_ele); /* Seek first field. */ + si->master_fields_start = si->lp_ele; + /* We are now pointing to the first field of the master entry. + * We need to seek either the first or the last entry depending + * on the direction of the iteration. */ + if (!si->rev) { + /* If we are iterating in normal order, skip the master fields + * to seek the first actual entry. */ + for (uint64_t i = 0; i < si->master_fields_count; i++) + si->lp_ele = lpNext(si->lp,si->lp_ele); + } else { + /* If we are iterating in reverse direction, just seek the + * last part of the last entry in the listpack (that is, the + * fields count). */ + si->lp_ele = lpLast(si->lp); + } + } else if (si->rev) { + /* If we are iterating in the reverse order, and this is not + * the first entry emitted for this listpack, then we already + * emitted the current entry, and have to go back to the previous + * one. */ + int64_t lp_count = lpGetInteger(si->lp_ele); + while(lp_count--) si->lp_ele = lpPrev(si->lp,si->lp_ele); + /* Seek lp-count of prev entry. */ + si->lp_ele = lpPrev(si->lp,si->lp_ele); + } + + /* For every radix tree node, iterate the corresponding listpack, + * returning elements when they are within range. */ + while(1) { + if (!si->rev) { + /* If we are going forward, skip the previous entry + * lp-count field (or in case of the master entry, the zero + * term field) */ + si->lp_ele = lpNext(si->lp,si->lp_ele); + if (si->lp_ele == NULL) break; + } else { + /* If we are going backward, read the number of elements this + * entry is composed of, and jump backward N times to seek + * its start. */ + int64_t lp_count = lpGetInteger(si->lp_ele); + if (lp_count == 0) { /* We reached the master entry. */ + si->lp = NULL; + si->lp_ele = NULL; + break; + } + while(lp_count--) si->lp_ele = lpPrev(si->lp,si->lp_ele); + } + + /* Get the flags entry. */ + si->lp_flags = si->lp_ele; + int64_t flags = lpGetInteger(si->lp_ele); + si->lp_ele = lpNext(si->lp,si->lp_ele); /* Seek ID. */ + + /* Get the ID: it is encoded as difference between the master + * ID and this entry ID. */ + *id = si->master_id; + id->ms += lpGetInteger(si->lp_ele); + si->lp_ele = lpNext(si->lp,si->lp_ele); + id->seq += lpGetInteger(si->lp_ele); + si->lp_ele = lpNext(si->lp,si->lp_ele); + unsigned char buf[sizeof(streamID)]; + streamEncodeID(buf,id); + + /* The number of entries is here or not depending on the + * flags. */ + if (flags & STREAM_ITEM_FLAG_SAMEFIELDS) { + *numfields = si->master_fields_count; + } else { + *numfields = lpGetInteger(si->lp_ele); + si->lp_ele = lpNext(si->lp,si->lp_ele); + } + serverAssert(*numfields>=0); + + /* If current >= start, and the entry is not marked as + * deleted or tombstones are included, emit it. */ + if (!si->rev) { + if (memcmp(buf,si->start_key,sizeof(streamID)) >= 0 && + (!si->skip_tombstones || !(flags & STREAM_ITEM_FLAG_DELETED))) + { + if (memcmp(buf,si->end_key,sizeof(streamID)) > 0) + return 0; /* We are already out of range. */ + si->entry_flags = flags; + if (flags & STREAM_ITEM_FLAG_SAMEFIELDS) + si->master_fields_ptr = si->master_fields_start; + return 1; /* Valid item returned. */ + } + } else { + if (memcmp(buf,si->end_key,sizeof(streamID)) <= 0 && + (!si->skip_tombstones || !(flags & STREAM_ITEM_FLAG_DELETED))) + { + if (memcmp(buf,si->start_key,sizeof(streamID)) < 0) + return 0; /* We are already out of range. */ + si->entry_flags = flags; + if (flags & STREAM_ITEM_FLAG_SAMEFIELDS) + si->master_fields_ptr = si->master_fields_start; + return 1; /* Valid item returned. */ + } + } + + /* If we do not emit, we have to discard if we are going + * forward, or seek the previous entry if we are going + * backward. */ + if (!si->rev) { + int64_t to_discard = (flags & STREAM_ITEM_FLAG_SAMEFIELDS) ? + *numfields : *numfields*2; + for (int64_t i = 0; i < to_discard; i++) + si->lp_ele = lpNext(si->lp,si->lp_ele); + } else { + int64_t prev_times = 4; /* flag + id ms + id seq + one more to + go back to the previous entry "count" + field. */ + /* If the entry was not flagged SAMEFIELD we also read the + * number of fields, so go back one more. */ + if (!(flags & STREAM_ITEM_FLAG_SAMEFIELDS)) prev_times++; + while(prev_times--) si->lp_ele = lpPrev(si->lp,si->lp_ele); + } + } + + /* End of listpack reached. Try the next/prev radix tree node. */ + } +} + +/* Get the field and value of the current item we are iterating. This should + * be called immediately after streamIteratorGetID(), and for each field + * according to the number of fields returned by streamIteratorGetID(). + * The function populates the field and value pointers and the corresponding + * lengths by reference, that are valid until the next iterator call, assuming + * no one touches the stream meanwhile. */ +void streamIteratorGetField(streamIterator *si, unsigned char **fieldptr, unsigned char **valueptr, int64_t *fieldlen, int64_t *valuelen) { + if (si->entry_flags & STREAM_ITEM_FLAG_SAMEFIELDS) { + *fieldptr = lpGet(si->master_fields_ptr,fieldlen,si->field_buf); + si->master_fields_ptr = lpNext(si->lp,si->master_fields_ptr); + } else { + *fieldptr = lpGet(si->lp_ele,fieldlen,si->field_buf); + si->lp_ele = lpNext(si->lp,si->lp_ele); + } + *valueptr = lpGet(si->lp_ele,valuelen,si->value_buf); + si->lp_ele = lpNext(si->lp,si->lp_ele); +} + +/* Remove the current entry from the stream: can be called after the + * GetID() API or after any GetField() call, however we need to iterate + * a valid entry while calling this function. Moreover the function + * requires the entry ID we are currently iterating, that was previously + * returned by GetID(). + * + * Note that after calling this function, next calls to GetField() can't + * be performed: the entry is now deleted. Instead the iterator will + * automatically re-seek to the next entry, so the caller should continue + * with GetID(). */ +void streamIteratorRemoveEntry(streamIterator *si, streamID *current) { + unsigned char *lp = si->lp; + int64_t aux; + + /* We do not really delete the entry here. Instead we mark it as + * deleted by flagging it, and also incrementing the count of the + * deleted entries in the listpack header. + * + * We start flagging: */ + int64_t flags = lpGetInteger(si->lp_flags); + flags |= STREAM_ITEM_FLAG_DELETED; + lp = lpReplaceInteger(lp,&si->lp_flags,flags); + + /* Change the valid/deleted entries count in the master entry. */ + unsigned char *p = lpFirst(lp); + aux = lpGetInteger(p); + + if (aux == 1) { + /* If this is the last element in the listpack, we can remove the whole + * node. */ + lpFree(lp); + raxRemove(si->stream->rax,si->ri.key,si->ri.key_len,NULL); + } else { + /* In the base case we alter the counters of valid/deleted entries. */ + lp = lpReplaceInteger(lp,&p,aux-1); + p = lpNext(lp,p); /* Seek deleted field. */ + aux = lpGetInteger(p); + lp = lpReplaceInteger(lp,&p,aux+1); + + /* Update the listpack with the new pointer. */ + if (si->lp != lp) + raxInsert(si->stream->rax,si->ri.key,si->ri.key_len,lp,NULL); + } + + /* Update the number of entries counter. */ + si->stream->length--; + + /* Re-seek the iterator to fix the now messed up state. */ + streamID start, end; + if (si->rev) { + streamDecodeID(si->start_key,&start); + end = *current; + } else { + start = *current; + streamDecodeID(si->end_key,&end); + } + streamIteratorStop(si); + streamIteratorStart(si,si->stream,&start,&end,si->rev); + + /* TODO: perform a garbage collection here if the ratio between + * deleted and valid goes over a certain limit. */ +} + +/* Stop the stream iterator. The only cleanup we need is to free the rax + * iterator, since the stream iterator itself is supposed to be stack + * allocated. */ +void streamIteratorStop(streamIterator *si) { + raxStop(&si->ri); +} + +/* Return 1 if `id` exists in `s` (and not marked as deleted) */ +int streamEntryExists(stream *s, streamID *id) { + streamIterator si; + streamIteratorStart(&si,s,id,id,0); + streamID myid; + int64_t numfields; + int found = streamIteratorGetID(&si,&myid,&numfields); + streamIteratorStop(&si); + if (!found) + return 0; + serverAssert(streamCompareID(id,&myid) == 0); + return 1; +} + +/* Delete the specified item ID from the stream, returning 1 if the item + * was deleted 0 otherwise (if it does not exist). */ +int streamDeleteItem(stream *s, streamID *id) { + int deleted = 0; + streamIterator si; + streamIteratorStart(&si,s,id,id,0); + streamID myid; + int64_t numfields; + if (streamIteratorGetID(&si,&myid,&numfields)) { + streamIteratorRemoveEntry(&si,&myid); + deleted = 1; + } + streamIteratorStop(&si); + return deleted; +} + +/* Get the last valid (non-tombstone) streamID of 's'. */ +void streamLastValidID(stream *s, streamID *maxid) +{ + streamIterator si; + streamIteratorStart(&si,s,NULL,NULL,1); + int64_t numfields; + if (!streamIteratorGetID(&si,maxid,&numfields) && s->length) + serverPanic("Corrupt stream, length is %llu, but no max id", (unsigned long long)s->length); + streamIteratorStop(&si); +} + +/* Maximum size for a stream ID string. In theory 20*2+1 should be enough, + * But to avoid chance for off by one issues and null-term, in case this will + * be used as parsing buffer, we use a slightly larger buffer. On the other + * hand considering sds header is gonna add 4 bytes, we wanna keep below the + * allocator's 48 bytes bin. */ +#define STREAM_ID_STR_LEN 44 + +sds createStreamIDString(streamID *id) { + /* Optimization: pre-allocate a big enough buffer to avoid reallocs. */ + sds str = sdsnewlen(SDS_NOINIT, STREAM_ID_STR_LEN); + sdssetlen(str, 0); + return sdscatfmt(str,"%U-%U", id->ms,id->seq); +} + +/* Emit a reply in the client output buffer by formatting a Stream ID + * in the standard - format, using the simple string protocol + * of REPL. */ +void addReplyStreamID(client *c, streamID *id) { + addReplyBulkSds(c,createStreamIDString(id)); +} + +void setDeferredReplyStreamID(client *c, void *dr, streamID *id) { + setDeferredReplyBulkSds(c, dr, createStreamIDString(id)); +} + +/* Similar to the above function, but just creates an object, usually useful + * for replication purposes to create arguments. */ +robj *createObjectFromStreamID(streamID *id) { + return createObject(OBJ_STRING, createStreamIDString(id)); +} + +/* Returns non-zero if the ID is 0-0. */ +int streamIDEqZero(streamID *id) { + return !(id->ms || id->seq); +} + +/* A helper that returns non-zero if the range from 'start' to `end` + * contains a tombstone. + * + * NOTE: this assumes that the caller had verified that 'start' is less than + * 's->last_id'. */ +int streamRangeHasTombstones(stream *s, streamID *start, streamID *end) { + streamID start_id, end_id; + + if (!s->length || streamIDEqZero(&s->max_deleted_entry_id)) { + /* The stream is empty or has no tombstones. */ + return 0; + } + + if (streamCompareID(&s->first_id,&s->max_deleted_entry_id) > 0) { + /* The latest tombstone is before the first entry. */ + return 0; + } + + if (start) { + start_id = *start; + } else { + start_id.ms = 0; + start_id.seq = 0; + } + + if (end) { + end_id = *end; + } else { + end_id.ms = UINT64_MAX; + end_id.seq = UINT64_MAX; + } + + if (streamCompareID(&start_id,&s->max_deleted_entry_id) <= 0 && + streamCompareID(&s->max_deleted_entry_id,&end_id) <= 0) + { + /* start_id <= max_deleted_entry_id <= end_id: The range does include a tombstone. */ + return 1; + } + + /* The range doesn't includes a tombstone. */ + return 0; +} + +/* Replies with a consumer group's current lag, that is the number of messages + * in the stream that are yet to be delivered. In case that the lag isn't + * available due to fragmentation, the reply to the client is a null. */ +void streamReplyWithCGLag(client *c, stream *s, streamCG *cg) { + int valid = 0; + long long lag = 0; + + if (!s->entries_added) { + /* The lag of a newly-initialized stream is 0. */ + lag = 0; + valid = 1; + } else if (cg->entries_read != SCG_INVALID_ENTRIES_READ && !streamRangeHasTombstones(s,&cg->last_id,NULL)) { + /* No fragmentation ahead means that the group's logical reads counter + * is valid for performing the lag calculation. */ + lag = (long long)s->entries_added - cg->entries_read; + valid = 1; + } else { + /* Attempt to retrieve the group's last ID logical read counter. */ + long long entries_read = streamEstimateDistanceFromFirstEverEntry(s,&cg->last_id); + if (entries_read != SCG_INVALID_ENTRIES_READ) { + /* A valid counter was obtained. */ + lag = (long long)s->entries_added - entries_read; + valid = 1; + } + } + + if (valid) { + addReplyLongLong(c,lag); + } else { + addReplyNull(c); + } +} + +/* This function returns a value that is the ID's logical read counter, or its + * distance (the number of entries) from the first entry ever to have been added + * to the stream. + * + * A counter is returned only in one of the following cases: + * 1. The ID is the same as the stream's last ID. In this case, the returned + * is the same as the stream's entries_added counter. + * 2. The ID equals that of the currently first entry in the stream, and the + * stream has no tombstones. The returned value, in this case, is the result + * of subtracting the stream's length from its added_entries, incremented by + * one. + * 3. The ID less than the stream's first current entry's ID, and there are no + * tombstones. Here the estimated counter is the result of subtracting the + * stream's length from its added_entries. + * 4. The stream's added_entries is zero, meaning that no entries were ever + * added. + * + * The special return value of ULLONG_MAX signals that the counter's value isn't + * obtainable. It is returned in these cases: + * 1. The provided ID, if it even exists, is somewhere between the stream's + * current first and last entries' IDs, or in the future. + * 2. The stream contains one or more tombstones. */ +long long streamEstimateDistanceFromFirstEverEntry(stream *s, streamID *id) { + /* The counter of any ID in an empty, never-before-used stream is 0. */ + if (!s->entries_added) { + return 0; + } + + /* In the empty stream, if the ID is smaller or equal to the last ID, + * it can set to the current added_entries value. */ + if (!s->length && streamCompareID(id,&s->last_id) < 1) { + return s->entries_added; + } + + int cmp_last = streamCompareID(id,&s->last_id); + if (cmp_last == 0) { + /* Return the exact counter of the last entry in the stream. */ + return s->entries_added; + } else if (cmp_last > 0) { + /* The counter of a future ID is unknown. */ + return SCG_INVALID_ENTRIES_READ; + } + + int cmp_id_first = streamCompareID(id,&s->first_id); + int cmp_xdel_first = streamCompareID(&s->max_deleted_entry_id,&s->first_id); + if (streamIDEqZero(&s->max_deleted_entry_id) || cmp_xdel_first < 0) { + /* There's definitely no fragmentation ahead. */ + if (cmp_id_first < 0) { + /* Return the estimated counter. */ + return s->entries_added - s->length; + } else if (cmp_id_first == 0) { + /* Return the exact counter of the first entry in the stream. */ + return s->entries_added - s->length + 1; + } + } + + /* The ID is either before an XDEL that fragments the stream or an arbitrary + * ID. Either case, so we can't make a prediction. */ + return SCG_INVALID_ENTRIES_READ; +} + +/* As a result of an explicit XCLAIM or XREADGROUP command, new entries + * are created in the pending list of the stream and consumers. We need + * to propagate this changes in the form of XCLAIM commands. */ +void streamPropagateXCLAIM(client *c, robj *key, streamCG *group, robj *groupname, robj *id, streamNACK *nack) { + /* We need to generate an XCLAIM that will work in a idempotent fashion: + * + * XCLAIM 0 TIME + * RETRYCOUNT FORCE JUSTID LASTID . + * + * Note that JUSTID is useful in order to avoid that XCLAIM will do + * useless work in the slave side, trying to fetch the stream item. */ + robj *argv[14]; + argv[0] = shared.xclaim; + argv[1] = key; + argv[2] = groupname; + argv[3] = createStringObject(nack->consumer->name,sdslen(nack->consumer->name)); + argv[4] = shared.integers[0]; + argv[5] = id; + argv[6] = shared.time; + argv[7] = createStringObjectFromLongLong(nack->delivery_time); + argv[8] = shared.retrycount; + argv[9] = createStringObjectFromLongLong(nack->delivery_count); + argv[10] = shared.force; + argv[11] = shared.justid; + argv[12] = shared.lastid; + argv[13] = createObjectFromStreamID(&group->last_id); + + alsoPropagate(c->db->id,argv,14,PROPAGATE_AOF|PROPAGATE_REPL); + + decrRefCount(argv[3]); + decrRefCount(argv[7]); + decrRefCount(argv[9]); + decrRefCount(argv[13]); +} + +/* We need this when we want to propagate the new last-id of a consumer group + * that was consumed by XREADGROUP with the NOACK option: in that case we can't + * propagate the last ID just using the XCLAIM LASTID option, so we emit + * + * XGROUP SETID ENTRIESREAD + */ +void streamPropagateGroupID(client *c, robj *key, streamCG *group, robj *groupname) { + robj *argv[7]; + argv[0] = shared.xgroup; + argv[1] = shared.setid; + argv[2] = key; + argv[3] = groupname; + argv[4] = createObjectFromStreamID(&group->last_id); + argv[5] = shared.entriesread; + argv[6] = createStringObjectFromLongLong(group->entries_read); + + alsoPropagate(c->db->id,argv,7,PROPAGATE_AOF|PROPAGATE_REPL); + + decrRefCount(argv[4]); + decrRefCount(argv[6]); +} + +/* We need this when we want to propagate creation of consumer that was created + * by XREADGROUP with the NOACK option. In that case, the only way to create + * the consumer at the replica is by using XGROUP CREATECONSUMER (see issue #7140) + * + * XGROUP CREATECONSUMER + */ +void streamPropagateConsumerCreation(client *c, robj *key, robj *groupname, sds consumername) { + robj *argv[5]; + argv[0] = shared.xgroup; + argv[1] = shared.createconsumer; + argv[2] = key; + argv[3] = groupname; + argv[4] = createObject(OBJ_STRING,sdsdup(consumername)); + + alsoPropagate(c->db->id,argv,5,PROPAGATE_AOF|PROPAGATE_REPL); + + decrRefCount(argv[4]); +} + +/* Send the stream items in the specified range to the client 'c'. The range + * the client will receive is between start and end inclusive, if 'count' is + * non zero, no more than 'count' elements are sent. + * + * The 'end' pointer can be NULL to mean that we want all the elements from + * 'start' till the end of the stream. If 'rev' is non zero, elements are + * produced in reversed order from end to start. + * + * The function returns the number of entries emitted. + * + * If group and consumer are not NULL, the function performs additional work: + * 1. It updates the last delivered ID in the group in case we are + * sending IDs greater than the current last ID. + * 2. If the requested IDs are already assigned to some other consumer, the + * function will not return it to the client. + * 3. An entry in the pending list will be created for every entry delivered + * for the first time to this consumer. + * 4. The group's read counter is incremented if it is already valid and there + * are no future tombstones, or is invalidated (set to 0) otherwise. If the + * counter is invalid to begin with, we try to obtain it for the last + * delivered ID. + * + * The behavior may be modified passing non-zero flags: + * + * STREAM_RWR_NOACK: Do not create PEL entries, that is, the point "3" above + * is not performed. + * STREAM_RWR_RAWENTRIES: Do not emit array boundaries, but just the entries, + * and return the number of entries emitted as usually. + * This is used when the function is just used in order + * to emit data and there is some higher level logic. + * + * The final argument 'spi' (stream propagation info pointer) is a structure + * filled with information needed to propagate the command execution to AOF + * and slaves, in the case a consumer group was passed: we need to generate + * XCLAIM commands to create the pending list into AOF/slaves in that case. + * + * If 'spi' is set to NULL no propagation will happen even if the group was + * given, but currently such a feature is never used by the code base that + * will always pass 'spi' and propagate when a group is passed. + * + * Note that this function is recursive in certain cases. When it's called + * with a non NULL group and consumer argument, it may call + * streamReplyWithRangeFromConsumerPEL() in order to get entries from the + * consumer pending entries list. However such a function will then call + * streamReplyWithRange() in order to emit single entries (found in the + * PEL by ID) to the client. This is the use case for the STREAM_RWR_RAWENTRIES + * flag. + */ +#define STREAM_RWR_NOACK (1<<0) /* Do not create entries in the PEL. */ +#define STREAM_RWR_RAWENTRIES (1<<1) /* Do not emit protocol for array + boundaries, just the entries. */ +#define STREAM_RWR_HISTORY (1<<2) /* Only serve consumer local PEL. */ +size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end, size_t count, int rev, streamCG *group, streamConsumer *consumer, int flags, streamPropInfo *spi) { + void *arraylen_ptr = NULL; + size_t arraylen = 0; + streamIterator si; + int64_t numfields; + streamID id; + int propagate_last_id = 0; + int noack = flags & STREAM_RWR_NOACK; + + /* If the client is asking for some history, we serve it using a + * different function, so that we return entries *solely* from its + * own PEL. This ensures each consumer will always and only see + * the history of messages delivered to it and not yet confirmed + * as delivered. */ + if (group && (flags & STREAM_RWR_HISTORY)) { + return streamReplyWithRangeFromConsumerPEL(c,s,start,end,count, + consumer); + } + + if (!(flags & STREAM_RWR_RAWENTRIES)) + arraylen_ptr = addReplyDeferredLen(c); + streamIteratorStart(&si,s,start,end,rev); + while(streamIteratorGetID(&si,&id,&numfields)) { + /* Update the group last_id if needed. */ + if (group && streamCompareID(&id,&group->last_id) > 0) { + if (group->entries_read != SCG_INVALID_ENTRIES_READ && !streamRangeHasTombstones(s,&id,NULL)) { + /* A valid counter and no future tombstones mean we can + * increment the read counter to keep tracking the group's + * progress. */ + group->entries_read++; + } else if (s->entries_added) { + /* The group's counter may be invalid, so we try to obtain it. */ + group->entries_read = streamEstimateDistanceFromFirstEverEntry(s,&id); + } + group->last_id = id; + /* Group last ID should be propagated only if NOACK was + * specified, otherwise the last id will be included + * in the propagation of XCLAIM itself. */ + if (noack) propagate_last_id = 1; + } + + /* Emit a two elements array for each item. The first is + * the ID, the second is an array of field-value pairs. */ + addReplyArrayLen(c,2); + addReplyStreamID(c,&id); + + addReplyArrayLen(c,numfields*2); + + /* Emit the field-value pairs. */ + while(numfields--) { + unsigned char *key, *value; + int64_t key_len, value_len; + streamIteratorGetField(&si,&key,&value,&key_len,&value_len); + addReplyBulkCBuffer(c,key,key_len); + addReplyBulkCBuffer(c,value,value_len); + } + + /* If a group is passed, we need to create an entry in the + * PEL (pending entries list) of this group *and* this consumer. + * + * Note that we cannot be sure about the fact the message is not + * already owned by another consumer, because the admin is able + * to change the consumer group last delivered ID using the + * XGROUP SETID command. So if we find that there is already + * a NACK for the entry, we need to associate it to the new + * consumer. */ + if (group && !noack) { + unsigned char buf[sizeof(streamID)]; + streamEncodeID(buf,&id); + + /* Try to add a new NACK. Most of the time this will work and + * will not require extra lookups. We'll fix the problem later + * if we find that there is already a entry for this ID. */ + streamNACK *nack = streamCreateNACK(consumer); + int group_inserted = + raxTryInsert(group->pel,buf,sizeof(buf),nack,NULL); + int consumer_inserted = + raxTryInsert(consumer->pel,buf,sizeof(buf),nack,NULL); + + /* Now we can check if the entry was already busy, and + * in that case reassign the entry to the new consumer, + * or update it if the consumer is the same as before. */ + if (group_inserted == 0) { + streamFreeNACK(nack); + nack = raxFind(group->pel,buf,sizeof(buf)); + serverAssert(nack != raxNotFound); + raxRemove(nack->consumer->pel,buf,sizeof(buf),NULL); + /* Update the consumer and NACK metadata. */ + nack->consumer = consumer; + nack->delivery_time = commandTimeSnapshot(); + nack->delivery_count = 1; + /* Add the entry in the new consumer local PEL. */ + raxInsert(consumer->pel,buf,sizeof(buf),nack,NULL); + } else if (group_inserted == 1 && consumer_inserted == 0) { + serverPanic("NACK half-created. Should not be possible."); + } + + consumer->active_time = commandTimeSnapshot(); + + /* Propagate as XCLAIM. */ + if (spi) { + robj *idarg = createObjectFromStreamID(&id); + streamPropagateXCLAIM(c,spi->keyname,group,spi->groupname,idarg,nack); + decrRefCount(idarg); + } + } + + arraylen++; + if (count && count == arraylen) break; + } + + if (spi && propagate_last_id) + streamPropagateGroupID(c,spi->keyname,group,spi->groupname); + + streamIteratorStop(&si); + if (arraylen_ptr) setDeferredArrayLen(c,arraylen_ptr,arraylen); + return arraylen; +} + +/* This is a helper function for streamReplyWithRange() when called with + * group and consumer arguments, but with a range that is referring to already + * delivered messages. In this case we just emit messages that are already + * in the history of the consumer, fetching the IDs from its PEL. + * + * Note that this function does not have a 'rev' argument because it's not + * possible to iterate in reverse using a group. Basically this function + * is only called as a result of the XREADGROUP command. + * + * This function is more expensive because it needs to inspect the PEL and then + * seek into the radix tree of the messages in order to emit the full message + * to the client. However clients only reach this code path when they are + * fetching the history of already retrieved messages, which is rare. */ +size_t streamReplyWithRangeFromConsumerPEL(client *c, stream *s, streamID *start, streamID *end, size_t count, streamConsumer *consumer) { + raxIterator ri; + unsigned char startkey[sizeof(streamID)]; + unsigned char endkey[sizeof(streamID)]; + streamEncodeID(startkey,start); + if (end) streamEncodeID(endkey,end); + + size_t arraylen = 0; + void *arraylen_ptr = addReplyDeferredLen(c); + raxStart(&ri,consumer->pel); + raxSeek(&ri,">=",startkey,sizeof(startkey)); + while(raxNext(&ri) && (!count || arraylen < count)) { + if (end && memcmp(ri.key,end,ri.key_len) > 0) break; + streamID thisid; + streamDecodeID(ri.key,&thisid); + if (streamReplyWithRange(c,s,&thisid,&thisid,1,0,NULL,NULL, + STREAM_RWR_RAWENTRIES,NULL) == 0) + { + /* Note that we may have a not acknowledged entry in the PEL + * about a message that's no longer here because was removed + * by the user by other means. In that case we signal it emitting + * the ID but then a NULL entry for the fields. */ + addReplyArrayLen(c,2); + addReplyStreamID(c,&thisid); + addReplyNullArray(c); + } else { + streamNACK *nack = ri.data; + nack->delivery_time = commandTimeSnapshot(); + nack->delivery_count++; + } + arraylen++; + } + raxStop(&ri); + setDeferredArrayLen(c,arraylen_ptr,arraylen); + return arraylen; +} + +/* ----------------------------------------------------------------------- + * Stream commands implementation + * ----------------------------------------------------------------------- */ + +/* Look the stream at 'key' and return the corresponding stream object. + * The function creates a key setting it to an empty stream if needed. */ +robj *streamTypeLookupWriteOrCreate(client *c, robj *key, int no_create) { + robj *o = lookupKeyWrite(c->db,key); + if (checkType(c,o,OBJ_STREAM)) return NULL; + if (o == NULL) { + if (no_create) { + addReplyNull(c); + return NULL; + } + o = createStreamObject(); + dbAdd(c->db,key,o); + } + return o; +} + +/* Parse a stream ID in the format given by clients to Redis, that is + * -, and converts it into a streamID structure. If + * the specified ID is invalid C_ERR is returned and an error is reported + * to the client, otherwise C_OK is returned. The ID may be in incomplete + * form, just stating the milliseconds time part of the stream. In such a case + * the missing part is set according to the value of 'missing_seq' parameter. + * + * The IDs "-" and "+" specify respectively the minimum and maximum IDs + * that can be represented. If 'strict' is set to 1, "-" and "+" will be + * treated as an invalid ID. + * + * The ID form -* specifies a millisconds-only ID, leaving the sequence part + * to be autogenerated. When a non-NULL 'seq_given' argument is provided, this + * form is accepted and the argument is set to 0 unless the sequence part is + * specified. + * + * If 'c' is set to NULL, no reply is sent to the client. */ +int streamGenericParseIDOrReply(client *c, const robj *o, streamID *id, uint64_t missing_seq, int strict, int *seq_given) { + char buf[128]; + if (sdslen(o->ptr) > sizeof(buf)-1) goto invalid; + memcpy(buf,o->ptr,sdslen(o->ptr)+1); + + if (strict && (buf[0] == '-' || buf[0] == '+') && buf[1] == '\0') + goto invalid; + + if (seq_given != NULL) { + *seq_given = 1; + } + + /* Handle the "-" and "+" special cases. */ + if (buf[0] == '-' && buf[1] == '\0') { + id->ms = 0; + id->seq = 0; + return C_OK; + } else if (buf[0] == '+' && buf[1] == '\0') { + id->ms = UINT64_MAX; + id->seq = UINT64_MAX; + return C_OK; + } + + /* Parse - form. */ + unsigned long long ms, seq; + char *dot = strchr(buf,'-'); + if (dot) *dot = '\0'; + if (string2ull(buf,&ms) == 0) goto invalid; + if (dot) { + size_t seqlen = strlen(dot+1); + if (seq_given != NULL && seqlen == 1 && *(dot + 1) == '*') { + /* Handle the -* form. */ + seq = 0; + *seq_given = 0; + } else if (string2ull(dot+1,&seq) == 0) { + goto invalid; + } + } else { + seq = missing_seq; + } + id->ms = ms; + id->seq = seq; + return C_OK; + +invalid: + if (c) addReplyError(c,"Invalid stream ID specified as stream " + "command argument"); + return C_ERR; +} + +/* Wrapper for streamGenericParseIDOrReply() used by module API. */ +int streamParseID(const robj *o, streamID *id) { + return streamGenericParseIDOrReply(NULL,o,id,0,0,NULL); +} + +/* Wrapper for streamGenericParseIDOrReply() with 'strict' argument set to + * 0, to be used when - and + are acceptable IDs. */ +int streamParseIDOrReply(client *c, robj *o, streamID *id, uint64_t missing_seq) { + return streamGenericParseIDOrReply(c,o,id,missing_seq,0,NULL); +} + +/* Wrapper for streamGenericParseIDOrReply() with 'strict' argument set to + * 1, to be used when we want to return an error if the special IDs + or - + * are provided. */ +int streamParseStrictIDOrReply(client *c, robj *o, streamID *id, uint64_t missing_seq, int *seq_given) { + return streamGenericParseIDOrReply(c,o,id,missing_seq,1,seq_given); +} + +/* Helper for parsing a stream ID that is a range query interval. When the + * exclude argument is NULL, streamParseIDOrReply() is called and the interval + * is treated as close (inclusive). Otherwise, the exclude argument is set if + * the interval is open (the "(" prefix) and streamParseStrictIDOrReply() is + * called in that case. + */ +int streamParseIntervalIDOrReply(client *c, robj *o, streamID *id, int *exclude, uint64_t missing_seq) { + char *p = o->ptr; + size_t len = sdslen(p); + int invalid = 0; + + if (exclude != NULL) *exclude = (len > 1 && p[0] == '('); + if (exclude != NULL && *exclude) { + robj *t = createStringObject(p+1,len-1); + invalid = (streamParseStrictIDOrReply(c,t,id,missing_seq,NULL) == C_ERR); + decrRefCount(t); + } else + invalid = (streamParseIDOrReply(c,o,id,missing_seq) == C_ERR); + if (invalid) + return C_ERR; + return C_OK; +} + +void streamRewriteApproxSpecifier(client *c, int idx) { + rewriteClientCommandArgument(c,idx,shared.special_equals); +} + +/* We propagate MAXLEN/MINID ~ as MAXLEN/MINID = + * otherwise trimming is no longer deterministic on replicas / AOF. */ +void streamRewriteTrimArgument(client *c, stream *s, int trim_strategy, int idx) { + robj *arg; + if (trim_strategy == TRIM_STRATEGY_MAXLEN) { + arg = createStringObjectFromLongLong(s->length); + } else { + streamID first_id; + streamGetEdgeID(s,1,0,&first_id); + arg = createObjectFromStreamID(&first_id); + } + + rewriteClientCommandArgument(c,idx,arg); + decrRefCount(arg); +} + +/* XADD key [(MAXLEN [~|=] | MINID [~|=] ) [LIMIT ]] [NOMKSTREAM] [field value] [field value] ... */ +void xaddCommand(client *c) { + /* Parse options. */ + streamAddTrimArgs parsed_args; + int idpos = streamParseAddOrTrimArgsOrReply(c, &parsed_args, 1); + if (idpos < 0) + return; /* streamParseAddOrTrimArgsOrReply already replied. */ + int field_pos = idpos+1; /* The ID is always one argument before the first field */ + + /* Check arity. */ + if ((c->argc - field_pos) < 2 || ((c->argc-field_pos) % 2) == 1) { + addReplyErrorArity(c); + return; + } + + /* Return ASAP if minimal ID (0-0) was given so we avoid possibly creating + * a new stream and have streamAppendItem fail, leaving an empty key in the + * database. */ + if (parsed_args.id_given && parsed_args.seq_given && + parsed_args.id.ms == 0 && parsed_args.id.seq == 0) + { + addReplyError(c,"The ID specified in XADD must be greater than 0-0"); + return; + } + + /* Lookup the stream at key. */ + robj *o; + stream *s; + if ((o = streamTypeLookupWriteOrCreate(c,c->argv[1],parsed_args.no_mkstream)) == NULL) return; + s = o->ptr; + + /* Return ASAP if the stream has reached the last possible ID */ + if (s->last_id.ms == UINT64_MAX && s->last_id.seq == UINT64_MAX) { + addReplyError(c,"The stream has exhausted the last possible ID, " + "unable to add more items"); + return; + } + + /* Append using the low level function and return the ID. */ + errno = 0; + streamID id; + if (streamAppendItem(s,c->argv+field_pos,(c->argc-field_pos)/2, + &id,parsed_args.id_given ? &parsed_args.id : NULL,parsed_args.seq_given) == C_ERR) + { + serverAssert(errno != 0); + if (errno == EDOM) + addReplyError(c,"The ID specified in XADD is equal or smaller than " + "the target stream top item"); + else + addReplyError(c,"Elements are too large to be stored"); + return; + } + sds replyid = createStreamIDString(&id); + addReplyBulkCBuffer(c, replyid, sdslen(replyid)); + + signalModifiedKey(c,c->db,c->argv[1]); + notifyKeyspaceEvent(NOTIFY_STREAM,"xadd",c->argv[1],c->db->id); + server.dirty++; + + /* Trim if needed. */ + if (parsed_args.trim_strategy != TRIM_STRATEGY_NONE) { + if (streamTrim(s, &parsed_args)) { + notifyKeyspaceEvent(NOTIFY_STREAM,"xtrim",c->argv[1],c->db->id); + } + if (parsed_args.approx_trim) { + /* In case our trimming was limited (by LIMIT or by ~) we must + * re-write the relevant trim argument to make sure there will be + * no inconsistencies in AOF loading or in the replica. + * It's enough to check only args->approx because there is no + * way LIMIT is given without the ~ option. */ + streamRewriteApproxSpecifier(c,parsed_args.trim_strategy_arg_idx-1); + streamRewriteTrimArgument(c,s,parsed_args.trim_strategy,parsed_args.trim_strategy_arg_idx); + } + } + + /* Let's rewrite the ID argument with the one actually generated for + * AOF/replication propagation. */ + if (!parsed_args.id_given || !parsed_args.seq_given) { + robj *idarg = createObject(OBJ_STRING, replyid); + rewriteClientCommandArgument(c, idpos, idarg); + decrRefCount(idarg); + } else { + sdsfree(replyid); + } + + /* We need to signal to blocked clients that there is new data on this + * stream. */ + signalKeyAsReady(c->db, c->argv[1], OBJ_STREAM); +} + +/* XRANGE/XREVRANGE actual implementation. + * The 'start' and 'end' IDs are parsed as follows: + * Incomplete 'start' has its sequence set to 0, and 'end' to UINT64_MAX. + * "-" and "+"" mean the minimal and maximal ID values, respectively. + * The "(" prefix means an open (exclusive) range, so XRANGE stream (1-0 (2-0 + * will match anything from 1-1 and 1-UINT64_MAX. + */ +void xrangeGenericCommand(client *c, int rev) { + robj *o; + stream *s; + streamID startid, endid; + long long count = -1; + robj *startarg = rev ? c->argv[3] : c->argv[2]; + robj *endarg = rev ? c->argv[2] : c->argv[3]; + int startex = 0, endex = 0; + + /* Parse start and end IDs. */ + if (streamParseIntervalIDOrReply(c,startarg,&startid,&startex,0) != C_OK) + return; + if (startex && streamIncrID(&startid) != C_OK) { + addReplyError(c,"invalid start ID for the interval"); + return; + } + if (streamParseIntervalIDOrReply(c,endarg,&endid,&endex,UINT64_MAX) != C_OK) + return; + if (endex && streamDecrID(&endid) != C_OK) { + addReplyError(c,"invalid end ID for the interval"); + return; + } + + /* Parse the COUNT option if any. */ + if (c->argc > 4) { + for (int j = 4; j < c->argc; j++) { + int additional = c->argc-j-1; + if (strcasecmp(c->argv[j]->ptr,"COUNT") == 0 && additional >= 1) { + if (getLongLongFromObjectOrReply(c,c->argv[j+1],&count,NULL) + != C_OK) return; + if (count < 0) count = 0; + j++; /* Consume additional arg. */ + } else { + addReplyErrorObject(c,shared.syntaxerr); + return; + } + } + } + + /* Return the specified range to the user. */ + if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptyarray)) == NULL || + checkType(c,o,OBJ_STREAM)) return; + + s = o->ptr; + + if (count == 0) { + addReplyNullArray(c); + } else { + if (count == -1) count = 0; + streamReplyWithRange(c,s,&startid,&endid,count,rev,NULL,NULL,0,NULL); + } +} + +/* XRANGE key start end [COUNT ] */ +void xrangeCommand(client *c) { + xrangeGenericCommand(c,0); +} + +/* XREVRANGE key end start [COUNT ] */ +void xrevrangeCommand(client *c) { + xrangeGenericCommand(c,1); +} + +/* XLEN key*/ +void xlenCommand(client *c) { + robj *o; + if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL + || checkType(c,o,OBJ_STREAM)) return; + stream *s = o->ptr; + addReplyLongLong(c,s->length); +} + +/* XREAD [BLOCK ] [COUNT ] STREAMS key_1 key_2 ... key_N + * ID_1 ID_2 ... ID_N + * + * This function also implements the XREADGROUP command, which is like XREAD + * but accepting the [GROUP group-name consumer-name] additional option. + * This is useful because while XREAD is a read command and can be called + * on slaves, XREADGROUP is not. */ +#define XREAD_BLOCKED_DEFAULT_COUNT 1000 +void xreadCommand(client *c) { + long long timeout = -1; /* -1 means, no BLOCK argument given. */ + long long count = 0; + int streams_count = 0; + int streams_arg = 0; + int noack = 0; /* True if NOACK option was specified. */ + streamID static_ids[STREAMID_STATIC_VECTOR_LEN]; + streamID *ids = static_ids; + streamCG **groups = NULL; + int xreadgroup = sdslen(c->argv[0]->ptr) == 10; /* XREAD or XREADGROUP? */ + robj *groupname = NULL; + robj *consumername = NULL; + + /* Parse arguments. */ + for (int i = 1; i < c->argc; i++) { + int moreargs = c->argc-i-1; + char *o = c->argv[i]->ptr; + if (!strcasecmp(o,"BLOCK") && moreargs) { + if (c->flags & CLIENT_SCRIPT) { + /* + * Although the CLIENT_DENY_BLOCKING flag should protect from blocking the client + * on Lua/MULTI/RM_Call we want special treatment for Lua to keep backward compatibility. + * There is no sense to use BLOCK option within Lua. */ + addReplyErrorFormat(c, "%s command is not allowed with BLOCK option from scripts", (char *)c->argv[0]->ptr); + return; + } + i++; + if (getTimeoutFromObjectOrReply(c,c->argv[i],&timeout, + UNIT_MILLISECONDS) != C_OK) return; + } else if (!strcasecmp(o,"COUNT") && moreargs) { + i++; + if (getLongLongFromObjectOrReply(c,c->argv[i],&count,NULL) != C_OK) + return; + if (count < 0) count = 0; + } else if (!strcasecmp(o,"STREAMS") && moreargs) { + streams_arg = i+1; + streams_count = (c->argc-streams_arg); + if ((streams_count % 2) != 0) { + char symbol = xreadgroup ? '>' : '$'; + addReplyErrorFormat(c,"Unbalanced '%s' list of streams: " + "for each stream key an ID or '%c' must be " + "specified.", c->cmd->fullname,symbol); + return; + } + streams_count /= 2; /* We have two arguments for each stream. */ + break; + } else if (!strcasecmp(o,"GROUP") && moreargs >= 2) { + if (!xreadgroup) { + addReplyError(c,"The GROUP option is only supported by " + "XREADGROUP. You called XREAD instead."); + return; + } + groupname = c->argv[i+1]; + consumername = c->argv[i+2]; + i += 2; + } else if (!strcasecmp(o,"NOACK")) { + if (!xreadgroup) { + addReplyError(c,"The NOACK option is only supported by " + "XREADGROUP. You called XREAD instead."); + return; + } + noack = 1; + } else { + addReplyErrorObject(c,shared.syntaxerr); + return; + } + } + + /* STREAMS option is mandatory. */ + if (streams_arg == 0) { + addReplyErrorObject(c,shared.syntaxerr); + return; + } + + /* If the user specified XREADGROUP then it must also + * provide the GROUP option. */ + if (xreadgroup && groupname == NULL) { + addReplyError(c,"Missing GROUP option for XREADGROUP"); + return; + } + + /* Parse the IDs and resolve the group name. */ + if (streams_count > STREAMID_STATIC_VECTOR_LEN) + ids = zmalloc(sizeof(streamID)*streams_count); + if (groupname) groups = zmalloc(sizeof(streamCG*)*streams_count); + + for (int i = streams_arg + streams_count; i < c->argc; i++) { + /* Specifying "$" as last-known-id means that the client wants to be + * served with just the messages that will arrive into the stream + * starting from now. */ + int id_idx = i - streams_arg - streams_count; + robj *key = c->argv[i-streams_count]; + robj *o = lookupKeyRead(c->db,key); + if (checkType(c,o,OBJ_STREAM)) goto cleanup; + streamCG *group = NULL; + + /* If a group was specified, than we need to be sure that the + * key and group actually exist. */ + if (groupname) { + if (o == NULL || + (group = streamLookupCG(o->ptr,groupname->ptr)) == NULL) + { + addReplyErrorFormat(c, "-NOGROUP No such key '%s' or consumer " + "group '%s' in XREADGROUP with GROUP " + "option", + (char*)key->ptr,(char*)groupname->ptr); + goto cleanup; + } + groups[id_idx] = group; + } + + if (strcmp(c->argv[i]->ptr,"$") == 0) { + if (xreadgroup) { + addReplyError(c,"The $ ID is meaningless in the context of " + "XREADGROUP: you want to read the history of " + "this consumer by specifying a proper ID, or " + "use the > ID to get new messages. The $ ID would " + "just return an empty result set."); + goto cleanup; + } + if (o) { + stream *s = o->ptr; + ids[id_idx] = s->last_id; + } else { + ids[id_idx].ms = 0; + ids[id_idx].seq = 0; + } + continue; + } else if (strcmp(c->argv[i]->ptr,">") == 0) { + if (!xreadgroup) { + addReplyError(c,"The > ID can be specified only when calling " + "XREADGROUP using the GROUP " + " option."); + goto cleanup; + } + /* We use just the maximum ID to signal this is a ">" ID, anyway + * the code handling the blocking clients will have to update the + * ID later in order to match the changing consumer group last ID. */ + ids[id_idx].ms = UINT64_MAX; + ids[id_idx].seq = UINT64_MAX; + continue; + } + if (streamParseStrictIDOrReply(c,c->argv[i],ids+id_idx,0,NULL) != C_OK) + goto cleanup; + } + + /* Try to serve the client synchronously. */ + size_t arraylen = 0; + void *arraylen_ptr = NULL; + for (int i = 0; i < streams_count; i++) { + robj *o = lookupKeyRead(c->db,c->argv[streams_arg+i]); + if (o == NULL) continue; + stream *s = o->ptr; + streamID *gt = ids+i; /* ID must be greater than this. */ + int serve_synchronously = 0; + int serve_history = 0; /* True for XREADGROUP with ID != ">". */ + streamConsumer *consumer = NULL; /* Unused if XREAD */ + streamPropInfo spi = {c->argv[streams_arg+i],groupname}; /* Unused if XREAD */ + + /* Check if there are the conditions to serve the client + * synchronously. */ + if (groups) { + /* If the consumer is blocked on a group, we always serve it + * synchronously (serving its local history) if the ID specified + * was not the special ">" ID. */ + if (gt->ms != UINT64_MAX || + gt->seq != UINT64_MAX) + { + serve_synchronously = 1; + serve_history = 1; + } else if (s->length) { + /* We also want to serve a consumer in a consumer group + * synchronously in case the group top item delivered is smaller + * than what the stream has inside. */ + streamID maxid, *last = &groups[i]->last_id; + streamLastValidID(s, &maxid); + if (streamCompareID(&maxid, last) > 0) { + serve_synchronously = 1; + *gt = *last; + } + } + consumer = streamLookupConsumer(groups[i],consumername->ptr); + if (consumer == NULL) { + consumer = streamCreateConsumer(groups[i],consumername->ptr, + c->argv[streams_arg+i], + c->db->id,SCC_DEFAULT); + if (noack) + streamPropagateConsumerCreation(c,spi.keyname, + spi.groupname, + consumer->name); + } + consumer->seen_time = commandTimeSnapshot(); + } else if (s->length) { + /* For consumers without a group, we serve synchronously if we can + * actually provide at least one item from the stream. */ + streamID maxid; + streamLastValidID(s, &maxid); + if (streamCompareID(&maxid, gt) > 0) { + serve_synchronously = 1; + } + } + + if (serve_synchronously) { + arraylen++; + if (arraylen == 1) arraylen_ptr = addReplyDeferredLen(c); + /* streamReplyWithRange() handles the 'start' ID as inclusive, + * so start from the next ID, since we want only messages with + * IDs greater than start. */ + streamID start = *gt; + streamIncrID(&start); + + /* Emit the two elements sub-array consisting of the name + * of the stream and the data we extracted from it. */ + if (c->resp == 2) addReplyArrayLen(c,2); + addReplyBulk(c,c->argv[streams_arg+i]); + + int flags = 0; + if (noack) flags |= STREAM_RWR_NOACK; + if (serve_history) flags |= STREAM_RWR_HISTORY; + streamReplyWithRange(c,s,&start,NULL,count,0, + groups ? groups[i] : NULL, + consumer, flags, &spi); + if (groups) server.dirty++; + } + } + + /* We replied synchronously! Set the top array len and return to caller. */ + if (arraylen) { + if (c->resp == 2) + setDeferredArrayLen(c,arraylen_ptr,arraylen); + else + setDeferredMapLen(c,arraylen_ptr,arraylen); + goto cleanup; + } + + /* Block if needed. */ + if (timeout != -1) { + /* If we are not allowed to block the client, the only thing + * we can do is treating it as a timeout (even with timeout 0). */ + if (c->flags & CLIENT_DENY_BLOCKING) { + addReplyNullArray(c); + goto cleanup; + } + /* We change the '$' to the current last ID for this stream. this is + * Since later on when we unblock on arriving data - we would like to + * re-process the command and in case '$' stays we will spin-block forever. + */ + for (int id_idx = 0; id_idx < streams_count; id_idx++) { + int arg_idx = id_idx + streams_arg + streams_count; + if (strcmp(c->argv[arg_idx]->ptr,"$") == 0) { + robj *argv_streamid = createObjectFromStreamID(&ids[id_idx]); + rewriteClientCommandArgument(c, arg_idx, argv_streamid); + decrRefCount(argv_streamid); + } + } + blockForKeys(c, BLOCKED_STREAM, c->argv+streams_arg, streams_count, timeout, xreadgroup); + goto cleanup; + } + + /* No BLOCK option, nor any stream we can serve. Reply as with a + * timeout happened. */ + addReplyNullArray(c); + /* Continue to cleanup... */ + +cleanup: /* Cleanup. */ + + /* The command is propagated (in the READGROUP form) as a side effect + * of calling lower level APIs. So stop any implicit propagation. */ + preventCommandPropagation(c); + if (ids != static_ids) zfree(ids); + zfree(groups); +} + +/* ----------------------------------------------------------------------- + * Low level implementation of consumer groups + * ----------------------------------------------------------------------- */ + +/* Create a NACK entry setting the delivery count to 1 and the delivery + * time to the current time. The NACK consumer will be set to the one + * specified as argument of the function. */ +streamNACK *streamCreateNACK(streamConsumer *consumer) { + streamNACK *nack = zmalloc(sizeof(*nack)); + nack->delivery_time = commandTimeSnapshot(); + nack->delivery_count = 1; + nack->consumer = consumer; + return nack; +} + +/* Free a NACK entry. */ +void streamFreeNACK(streamNACK *na) { + zfree(na); +} + +/* Free a consumer and associated data structures. Note that this function + * will not reassign the pending messages associated with this consumer + * nor will delete them from the stream, so when this function is called + * to delete a consumer, and not when the whole stream is destroyed, the caller + * should do some work before. */ +void streamFreeConsumer(streamConsumer *sc) { + raxFree(sc->pel); /* No value free callback: the PEL entries are shared + between the consumer and the main stream PEL. */ + sdsfree(sc->name); + zfree(sc); +} + +/* Create a new consumer group in the context of the stream 's', having the + * specified name, last server ID and reads counter. If a consumer group with + * the same name already exists NULL is returned, otherwise the pointer to the + * consumer group is returned. */ +streamCG *streamCreateCG(stream *s, char *name, size_t namelen, streamID *id, long long entries_read) { + if (s->cgroups == NULL) s->cgroups = raxNew(); + if (raxFind(s->cgroups,(unsigned char*)name,namelen) != raxNotFound) + return NULL; + + streamCG *cg = zmalloc(sizeof(*cg)); + cg->pel = raxNew(); + cg->consumers = raxNew(); + cg->last_id = *id; + cg->entries_read = entries_read; + raxInsert(s->cgroups,(unsigned char*)name,namelen,cg,NULL); + return cg; +} + +/* Free a consumer group and all its associated data. */ +void streamFreeCG(streamCG *cg) { + raxFreeWithCallback(cg->pel,(void(*)(void*))streamFreeNACK); + raxFreeWithCallback(cg->consumers,(void(*)(void*))streamFreeConsumer); + zfree(cg); +} + +/* Lookup the consumer group in the specified stream and returns its + * pointer, otherwise if there is no such group, NULL is returned. */ +streamCG *streamLookupCG(stream *s, sds groupname) { + if (s->cgroups == NULL) return NULL; + streamCG *cg = raxFind(s->cgroups,(unsigned char*)groupname, + sdslen(groupname)); + return (cg == raxNotFound) ? NULL : cg; +} + +/* Create a consumer with the specified name in the group 'cg' and return. + * If the consumer exists, return NULL. As a side effect, when the consumer + * is successfully created, the key space will be notified and dirty++ unless + * the SCC_NO_NOTIFY or SCC_NO_DIRTIFY flags is specified. */ +streamConsumer *streamCreateConsumer(streamCG *cg, sds name, robj *key, int dbid, int flags) { + if (cg == NULL) return NULL; + int notify = !(flags & SCC_NO_NOTIFY); + int dirty = !(flags & SCC_NO_DIRTIFY); + streamConsumer *consumer = zmalloc(sizeof(*consumer)); + int success = raxTryInsert(cg->consumers,(unsigned char*)name, + sdslen(name),consumer,NULL); + if (!success) { + zfree(consumer); + return NULL; + } + consumer->name = sdsdup(name); + consumer->pel = raxNew(); + consumer->active_time = -1; + consumer->seen_time = commandTimeSnapshot(); + if (dirty) server.dirty++; + if (notify) notifyKeyspaceEvent(NOTIFY_STREAM,"xgroup-createconsumer",key,dbid); + return consumer; +} + +/* Lookup the consumer with the specified name in the group 'cg'. */ +streamConsumer *streamLookupConsumer(streamCG *cg, sds name) { + if (cg == NULL) return NULL; + streamConsumer *consumer = raxFind(cg->consumers,(unsigned char*)name, + sdslen(name)); + if (consumer == raxNotFound) return NULL; + return consumer; +} + +/* Delete the consumer specified in the consumer group 'cg'. */ +void streamDelConsumer(streamCG *cg, streamConsumer *consumer) { + /* Iterate all the consumer pending messages, deleting every corresponding + * entry from the global entry. */ + raxIterator ri; + raxStart(&ri,consumer->pel); + raxSeek(&ri,"^",NULL,0); + while(raxNext(&ri)) { + streamNACK *nack = ri.data; + raxRemove(cg->pel,ri.key,ri.key_len,NULL); + streamFreeNACK(nack); + } + raxStop(&ri); + + /* Deallocate the consumer. */ + raxRemove(cg->consumers,(unsigned char*)consumer->name, + sdslen(consumer->name),NULL); + streamFreeConsumer(consumer); +} + +/* ----------------------------------------------------------------------- + * Consumer groups commands + * ----------------------------------------------------------------------- */ + +/* XGROUP CREATE [MKSTREAM] [ENTRIESREAD entries_read] + * XGROUP SETID [ENTRIESREAD entries_read] + * XGROUP DESTROY + * XGROUP CREATECONSUMER + * XGROUP DELCONSUMER */ +void xgroupCommand(client *c) { + stream *s = NULL; + sds grpname = NULL; + streamCG *cg = NULL; + char *opt = c->argv[1]->ptr; /* Subcommand name. */ + int mkstream = 0; + long long entries_read = SCG_INVALID_ENTRIES_READ; + robj *o; + + /* Everything but the "HELP" option requires a key and group name. */ + if (c->argc >= 4) { + /* Parse optional arguments for CREATE and SETID */ + int i = 5; + int create_subcmd = !strcasecmp(opt,"CREATE"); + int setid_subcmd = !strcasecmp(opt,"SETID"); + while (i < c->argc) { + if (create_subcmd && !strcasecmp(c->argv[i]->ptr,"MKSTREAM")) { + mkstream = 1; + i++; + } else if ((create_subcmd || setid_subcmd) && !strcasecmp(c->argv[i]->ptr,"ENTRIESREAD") && i + 1 < c->argc) { + if (getLongLongFromObjectOrReply(c,c->argv[i+1],&entries_read,NULL) != C_OK) + return; + if (entries_read < 0 && entries_read != SCG_INVALID_ENTRIES_READ) { + addReplyError(c,"value for ENTRIESREAD must be positive or -1"); + return; + } + i += 2; + } else { + addReplySubcommandSyntaxError(c); + return; + } + } + + o = lookupKeyWrite(c->db,c->argv[2]); + if (o) { + if (checkType(c,o,OBJ_STREAM)) return; + s = o->ptr; + } + grpname = c->argv[3]->ptr; + } + + /* Check for missing key/group. */ + if (c->argc >= 4 && !mkstream) { + /* At this point key must exist, or there is an error. */ + if (s == NULL) { + addReplyError(c, + "The XGROUP subcommand requires the key to exist. " + "Note that for CREATE you may want to use the MKSTREAM " + "option to create an empty stream automatically."); + return; + } + + /* Certain subcommands require the group to exist. */ + if ((cg = streamLookupCG(s,grpname)) == NULL && + (!strcasecmp(opt,"SETID") || + !strcasecmp(opt,"CREATECONSUMER") || + !strcasecmp(opt,"DELCONSUMER"))) + { + addReplyErrorFormat(c, "-NOGROUP No such consumer group '%s' " + "for key name '%s'", + (char*)grpname, (char*)c->argv[2]->ptr); + return; + } + } + + /* Dispatch the different subcommands. */ + if (c->argc == 2 && !strcasecmp(opt,"HELP")) { + const char *help[] = { +"CREATE [option]", +" Create a new consumer group. Options are:", +" * MKSTREAM", +" Create the empty stream if it does not exist.", +" * ENTRIESREAD entries_read", +" Set the group's entries_read counter (internal use).", +"CREATECONSUMER ", +" Create a new consumer in the specified group.", +"DELCONSUMER ", +" Remove the specified consumer.", +"DESTROY ", +" Remove the specified group.", +"SETID [ENTRIESREAD entries_read]", +" Set the current group ID and entries_read counter.", +NULL + }; + addReplyHelp(c, help); + } else if (!strcasecmp(opt,"CREATE") && (c->argc >= 5 && c->argc <= 8)) { + streamID id; + if (!strcmp(c->argv[4]->ptr,"$")) { + if (s) { + id = s->last_id; + } else { + id.ms = 0; + id.seq = 0; + } + } else if (streamParseStrictIDOrReply(c,c->argv[4],&id,0,NULL) != C_OK) { + return; + } + + /* Handle the MKSTREAM option now that the command can no longer fail. */ + if (s == NULL) { + serverAssert(mkstream); + o = createStreamObject(); + dbAdd(c->db,c->argv[2],o); + s = o->ptr; + signalModifiedKey(c,c->db,c->argv[2]); + } + + streamCG *cg = streamCreateCG(s,grpname,sdslen(grpname),&id,entries_read); + if (cg) { + addReply(c,shared.ok); + server.dirty++; + notifyKeyspaceEvent(NOTIFY_STREAM,"xgroup-create", + c->argv[2],c->db->id); + } else { + addReplyError(c,"-BUSYGROUP Consumer Group name already exists"); + } + } else if (!strcasecmp(opt,"SETID") && (c->argc == 5 || c->argc == 7)) { + streamID id; + if (!strcmp(c->argv[4]->ptr,"$")) { + id = s->last_id; + } else if (streamParseIDOrReply(c,c->argv[4],&id,0) != C_OK) { + return; + } + cg->last_id = id; + cg->entries_read = entries_read; + addReply(c,shared.ok); + server.dirty++; + notifyKeyspaceEvent(NOTIFY_STREAM,"xgroup-setid",c->argv[2],c->db->id); + } else if (!strcasecmp(opt,"DESTROY") && c->argc == 4) { + if (cg) { + raxRemove(s->cgroups,(unsigned char*)grpname,sdslen(grpname),NULL); + streamFreeCG(cg); + addReply(c,shared.cone); + server.dirty++; + notifyKeyspaceEvent(NOTIFY_STREAM,"xgroup-destroy", + c->argv[2],c->db->id); + /* We want to unblock any XREADGROUP consumers with -NOGROUP. */ + signalKeyAsReady(c->db,c->argv[2],OBJ_STREAM); + } else { + addReply(c,shared.czero); + } + } else if (!strcasecmp(opt,"CREATECONSUMER") && c->argc == 5) { + streamConsumer *created = streamCreateConsumer(cg,c->argv[4]->ptr,c->argv[2], + c->db->id,SCC_DEFAULT); + addReplyLongLong(c,created ? 1 : 0); + } else if (!strcasecmp(opt,"DELCONSUMER") && c->argc == 5) { + long long pending = 0; + streamConsumer *consumer = streamLookupConsumer(cg,c->argv[4]->ptr); + if (consumer) { + /* Delete the consumer and returns the number of pending messages + * that were yet associated with such a consumer. */ + pending = raxSize(consumer->pel); + streamDelConsumer(cg,consumer); + server.dirty++; + notifyKeyspaceEvent(NOTIFY_STREAM,"xgroup-delconsumer", + c->argv[2],c->db->id); + } + addReplyLongLong(c,pending); + } else { + addReplySubcommandSyntaxError(c); + } +} + +/* XSETID [ENTRIESADDED entries_added] [MAXDELETEDID max_deleted_entry_id] + * + * Set the internal "last ID", "added entries" and "maximal deleted entry ID" + * of a stream. */ +void xsetidCommand(client *c) { + streamID id, max_xdel_id = {0, 0}; + long long entries_added = -1; + + if (streamParseStrictIDOrReply(c,c->argv[2],&id,0,NULL) != C_OK) + return; + + int i = 3; + while (i < c->argc) { + int moreargs = (c->argc-1) - i; /* Number of additional arguments. */ + char *opt = c->argv[i]->ptr; + if (!strcasecmp(opt,"ENTRIESADDED") && moreargs) { + if (getLongLongFromObjectOrReply(c,c->argv[i+1],&entries_added,NULL) != C_OK) { + return; + } else if (entries_added < 0) { + addReplyError(c,"entries_added must be positive"); + return; + } + i += 2; + } else if (!strcasecmp(opt,"MAXDELETEDID") && moreargs) { + if (streamParseStrictIDOrReply(c,c->argv[i+1],&max_xdel_id,0,NULL) != C_OK) { + return; + } else if (streamCompareID(&id,&max_xdel_id) < 0) { + addReplyError(c,"The ID specified in XSETID is smaller than the provided max_deleted_entry_id"); + return; + } + i += 2; + } else { + addReplyErrorObject(c,shared.syntaxerr); + return; + } + } + + robj *o = lookupKeyWriteOrReply(c,c->argv[1],shared.nokeyerr); + if (o == NULL || checkType(c,o,OBJ_STREAM)) return; + stream *s = o->ptr; + + if (streamCompareID(&id,&s->max_deleted_entry_id) < 0) { + addReplyError(c,"The ID specified in XSETID is smaller than current max_deleted_entry_id"); + return; + } + + /* If the stream has at least one item, we want to check that the user + * is setting a last ID that is equal or greater than the current top + * item, otherwise the fundamental ID monotonicity assumption is violated. */ + if (s->length > 0) { + streamID maxid; + streamLastValidID(s,&maxid); + + if (streamCompareID(&id,&maxid) < 0) { + addReplyError(c,"The ID specified in XSETID is smaller than the target stream top item"); + return; + } + + /* If an entries_added was provided, it can't be lower than the length. */ + if (entries_added != -1 && s->length > (uint64_t)entries_added) { + addReplyError(c,"The entries_added specified in XSETID is smaller than the target stream length"); + return; + } + } + + s->last_id = id; + if (entries_added != -1) + s->entries_added = entries_added; + if (!streamIDEqZero(&max_xdel_id)) + s->max_deleted_entry_id = max_xdel_id; + addReply(c,shared.ok); + server.dirty++; + notifyKeyspaceEvent(NOTIFY_STREAM,"xsetid",c->argv[1],c->db->id); +} + +/* XACK ... + * Acknowledge a message as processed. In practical terms we just check the + * pending entries list (PEL) of the group, and delete the PEL entry both from + * the group and the consumer (pending messages are referenced in both places). + * + * Return value of the command is the number of messages successfully + * acknowledged, that is, the IDs we were actually able to resolve in the PEL. + */ +void xackCommand(client *c) { + streamCG *group = NULL; + robj *o = lookupKeyRead(c->db,c->argv[1]); + if (o) { + if (checkType(c,o,OBJ_STREAM)) return; /* Type error. */ + group = streamLookupCG(o->ptr,c->argv[2]->ptr); + } + + /* No key or group? Nothing to ack. */ + if (o == NULL || group == NULL) { + addReply(c,shared.czero); + return; + } + + /* Start parsing the IDs, so that we abort ASAP if there is a syntax + * error: the return value of this command cannot be an error in case + * the client successfully acknowledged some messages, so it should be + * executed in a "all or nothing" fashion. */ + streamID static_ids[STREAMID_STATIC_VECTOR_LEN]; + streamID *ids = static_ids; + int id_count = c->argc-3; + if (id_count > STREAMID_STATIC_VECTOR_LEN) + ids = zmalloc(sizeof(streamID)*id_count); + for (int j = 3; j < c->argc; j++) { + if (streamParseStrictIDOrReply(c,c->argv[j],&ids[j-3],0,NULL) != C_OK) goto cleanup; + } + + int acknowledged = 0; + for (int j = 3; j < c->argc; j++) { + unsigned char buf[sizeof(streamID)]; + streamEncodeID(buf,&ids[j-3]); + + /* Lookup the ID in the group PEL: it will have a reference to the + * NACK structure that will have a reference to the consumer, so that + * we are able to remove the entry from both PELs. */ + streamNACK *nack = raxFind(group->pel,buf,sizeof(buf)); + if (nack != raxNotFound) { + raxRemove(group->pel,buf,sizeof(buf),NULL); + raxRemove(nack->consumer->pel,buf,sizeof(buf),NULL); + streamFreeNACK(nack); + acknowledged++; + server.dirty++; + } + } + addReplyLongLong(c,acknowledged); +cleanup: + if (ids != static_ids) zfree(ids); +} + +/* XPENDING [[IDLE ] []] + * + * If start and stop are omitted, the command just outputs information about + * the amount of pending messages for the key/group pair, together with + * the minimum and maximum ID of pending messages. + * + * If start and stop are provided instead, the pending messages are returned + * with information about the current owner, number of deliveries and last + * delivery time and so forth. */ +void xpendingCommand(client *c) { + int justinfo = c->argc == 3; /* Without the range just outputs general + information about the PEL. */ + robj *key = c->argv[1]; + robj *groupname = c->argv[2]; + robj *consumername = NULL; + streamID startid, endid; + long long count = 0; + long long minidle = 0; + int startex = 0, endex = 0; + + /* Start and stop, and the consumer, can be omitted. Also the IDLE modifier. */ + if (c->argc != 3 && (c->argc < 6 || c->argc > 9)) { + addReplyErrorObject(c,shared.syntaxerr); + return; + } + + /* Parse start/end/count arguments ASAP if needed, in order to report + * syntax errors before any other error. */ + if (c->argc >= 6) { + int startidx = 3; /* Without IDLE */ + + if (!strcasecmp(c->argv[3]->ptr, "IDLE")) { + if (getLongLongFromObjectOrReply(c, c->argv[4], &minidle, NULL) == C_ERR) + return; + if (c->argc < 8) { + /* If IDLE was provided we must have at least 'start end count' */ + addReplyErrorObject(c,shared.syntaxerr); + return; + } + /* Search for rest of arguments after 'IDLE ' */ + startidx += 2; + } + + /* count argument. */ + if (getLongLongFromObjectOrReply(c,c->argv[startidx+2],&count,NULL) == C_ERR) + return; + if (count < 0) count = 0; + + /* start and end arguments. */ + if (streamParseIntervalIDOrReply(c,c->argv[startidx],&startid,&startex,0) != C_OK) + return; + if (startex && streamIncrID(&startid) != C_OK) { + addReplyError(c,"invalid start ID for the interval"); + return; + } + if (streamParseIntervalIDOrReply(c,c->argv[startidx+1],&endid,&endex,UINT64_MAX) != C_OK) + return; + if (endex && streamDecrID(&endid) != C_OK) { + addReplyError(c,"invalid end ID for the interval"); + return; + } + + if (startidx+3 < c->argc) { + /* 'consumer' was provided */ + consumername = c->argv[startidx+3]; + } + } + + /* Lookup the key and the group inside the stream. */ + robj *o = lookupKeyRead(c->db,c->argv[1]); + streamCG *group; + + if (checkType(c,o,OBJ_STREAM)) return; + if (o == NULL || + (group = streamLookupCG(o->ptr,groupname->ptr)) == NULL) + { + addReplyErrorFormat(c, "-NOGROUP No such key '%s' or consumer " + "group '%s'", + (char*)key->ptr,(char*)groupname->ptr); + return; + } + + /* XPENDING variant. */ + if (justinfo) { + addReplyArrayLen(c,4); + /* Total number of messages in the PEL. */ + addReplyLongLong(c,raxSize(group->pel)); + /* First and last IDs. */ + if (raxSize(group->pel) == 0) { + addReplyNull(c); /* Start. */ + addReplyNull(c); /* End. */ + addReplyNullArray(c); /* Clients. */ + } else { + /* Start. */ + raxIterator ri; + raxStart(&ri,group->pel); + raxSeek(&ri,"^",NULL,0); + raxNext(&ri); + streamDecodeID(ri.key,&startid); + addReplyStreamID(c,&startid); + + /* End. */ + raxSeek(&ri,"$",NULL,0); + raxNext(&ri); + streamDecodeID(ri.key,&endid); + addReplyStreamID(c,&endid); + raxStop(&ri); + + /* Consumers with pending messages. */ + raxStart(&ri,group->consumers); + raxSeek(&ri,"^",NULL,0); + void *arraylen_ptr = addReplyDeferredLen(c); + size_t arraylen = 0; + while(raxNext(&ri)) { + streamConsumer *consumer = ri.data; + if (raxSize(consumer->pel) == 0) continue; + addReplyArrayLen(c,2); + addReplyBulkCBuffer(c,ri.key,ri.key_len); + addReplyBulkLongLong(c,raxSize(consumer->pel)); + arraylen++; + } + setDeferredArrayLen(c,arraylen_ptr,arraylen); + raxStop(&ri); + } + } else { /* , and provided, return actual pending entries (not just info) */ + streamConsumer *consumer = NULL; + if (consumername) { + consumer = streamLookupConsumer(group,consumername->ptr); + + /* If a consumer name was mentioned but it does not exist, we can + * just return an empty array. */ + if (consumer == NULL) { + addReplyArrayLen(c,0); + return; + } + } + + rax *pel = consumer ? consumer->pel : group->pel; + unsigned char startkey[sizeof(streamID)]; + unsigned char endkey[sizeof(streamID)]; + raxIterator ri; + mstime_t now = commandTimeSnapshot(); + + streamEncodeID(startkey,&startid); + streamEncodeID(endkey,&endid); + raxStart(&ri,pel); + raxSeek(&ri,">=",startkey,sizeof(startkey)); + void *arraylen_ptr = addReplyDeferredLen(c); + size_t arraylen = 0; + + while(count && raxNext(&ri) && memcmp(ri.key,endkey,ri.key_len) <= 0) { + streamNACK *nack = ri.data; + + if (minidle) { + mstime_t this_idle = now - nack->delivery_time; + if (this_idle < minidle) continue; + } + + arraylen++; + count--; + addReplyArrayLen(c,4); + + /* Entry ID. */ + streamID id; + streamDecodeID(ri.key,&id); + addReplyStreamID(c,&id); + + /* Consumer name. */ + addReplyBulkCBuffer(c,nack->consumer->name, + sdslen(nack->consumer->name)); + + /* Milliseconds elapsed since last delivery. */ + mstime_t elapsed = now - nack->delivery_time; + if (elapsed < 0) elapsed = 0; + addReplyLongLong(c,elapsed); + + /* Number of deliveries. */ + addReplyLongLong(c,nack->delivery_count); + } + raxStop(&ri); + setDeferredArrayLen(c,arraylen_ptr,arraylen); + } +} + +/* XCLAIM + * [IDLE ] [TIME ] [RETRYCOUNT ] + * [FORCE] [JUSTID] + * + * Changes ownership of one or multiple messages in the Pending Entries List + * of a given stream consumer group. + * + * If the message ID (among the specified ones) exists, and its idle + * time greater or equal to , then the message new owner + * becomes the specified . If the minimum idle time specified + * is zero, messages are claimed regardless of their idle time. + * + * All the messages that cannot be found inside the pending entries list + * are ignored, but in case the FORCE option is used. In that case we + * create the NACK (representing a not yet acknowledged message) entry in + * the consumer group PEL. + * + * This command creates the consumer as side effect if it does not yet + * exists. Moreover the command reset the idle time of the message to 0, + * even if by using the IDLE or TIME options, the user can control the + * new idle time. + * + * The options at the end can be used in order to specify more attributes + * to set in the representation of the pending message: + * + * 1. IDLE : + * Set the idle time (last time it was delivered) of the message. + * If IDLE is not specified, an IDLE of 0 is assumed, that is, + * the time count is reset because the message has now a new + * owner trying to process it. + * + * 2. TIME : + * This is the same as IDLE but instead of a relative amount of + * milliseconds, it sets the idle time to a specific unix time + * (in milliseconds). This is useful in order to rewrite the AOF + * file generating XCLAIM commands. + * + * 3. RETRYCOUNT : + * Set the retry counter to the specified value. This counter is + * incremented every time a message is delivered again. Normally + * XCLAIM does not alter this counter, which is just served to clients + * when the XPENDING command is called: this way clients can detect + * anomalies, like messages that are never processed for some reason + * after a big number of delivery attempts. + * + * 4. FORCE: + * Creates the pending message entry in the PEL even if certain + * specified IDs are not already in the PEL assigned to a different + * client. However the message must be exist in the stream, otherwise + * the IDs of non existing messages are ignored. + * + * 5. JUSTID: + * Return just an array of IDs of messages successfully claimed, + * without returning the actual message. + * + * 6. LASTID : + * Update the consumer group last ID with the specified ID if the + * current last ID is smaller than the provided one. + * This is used for replication / AOF, so that when we read from a + * consumer group, the XCLAIM that gets propagated to give ownership + * to the consumer, is also used in order to update the group current + * ID. + * + * The command returns an array of messages that the user + * successfully claimed, so that the caller is able to understand + * what messages it is now in charge of. */ +void xclaimCommand(client *c) { + streamCG *group = NULL; + robj *o = lookupKeyRead(c->db,c->argv[1]); + long long minidle; /* Minimum idle time argument. */ + long long retrycount = -1; /* -1 means RETRYCOUNT option not given. */ + mstime_t deliverytime = -1; /* -1 means IDLE/TIME options not given. */ + int force = 0; + int justid = 0; + + if (o) { + if (checkType(c,o,OBJ_STREAM)) return; /* Type error. */ + group = streamLookupCG(o->ptr,c->argv[2]->ptr); + } + + /* No key or group? Send an error given that the group creation + * is mandatory. */ + if (o == NULL || group == NULL) { + addReplyErrorFormat(c,"-NOGROUP No such key '%s' or " + "consumer group '%s'", (char*)c->argv[1]->ptr, + (char*)c->argv[2]->ptr); + return; + } + + if (getLongLongFromObjectOrReply(c,c->argv[4],&minidle, + "Invalid min-idle-time argument for XCLAIM") + != C_OK) return; + if (minidle < 0) minidle = 0; + + /* Start parsing the IDs, so that we abort ASAP if there is a syntax + * error: the return value of this command cannot be an error in case + * the client successfully claimed some message, so it should be + * executed in a "all or nothing" fashion. */ + int j; + streamID static_ids[STREAMID_STATIC_VECTOR_LEN]; + streamID *ids = static_ids; + int id_count = c->argc-5; + if (id_count > STREAMID_STATIC_VECTOR_LEN) + ids = zmalloc(sizeof(streamID)*id_count); + for (j = 5; j < c->argc; j++) { + if (streamParseStrictIDOrReply(NULL,c->argv[j],&ids[j-5],0,NULL) != C_OK) break; + } + int last_id_arg = j-1; /* Next time we iterate the IDs we now the range. */ + + /* If we stopped because some IDs cannot be parsed, perhaps they + * are trailing options. */ + mstime_t now = commandTimeSnapshot(); + streamID last_id = {0,0}; + int propagate_last_id = 0; + for (; j < c->argc; j++) { + int moreargs = (c->argc-1) - j; /* Number of additional arguments. */ + char *opt = c->argv[j]->ptr; + if (!strcasecmp(opt,"FORCE")) { + force = 1; + } else if (!strcasecmp(opt,"JUSTID")) { + justid = 1; + } else if (!strcasecmp(opt,"IDLE") && moreargs) { + j++; + if (getLongLongFromObjectOrReply(c,c->argv[j],&deliverytime, + "Invalid IDLE option argument for XCLAIM") + != C_OK) goto cleanup; + deliverytime = now - deliverytime; + } else if (!strcasecmp(opt,"TIME") && moreargs) { + j++; + if (getLongLongFromObjectOrReply(c,c->argv[j],&deliverytime, + "Invalid TIME option argument for XCLAIM") + != C_OK) goto cleanup; + } else if (!strcasecmp(opt,"RETRYCOUNT") && moreargs) { + j++; + if (getLongLongFromObjectOrReply(c,c->argv[j],&retrycount, + "Invalid RETRYCOUNT option argument for XCLAIM") + != C_OK) goto cleanup; + } else if (!strcasecmp(opt,"LASTID") && moreargs) { + j++; + if (streamParseStrictIDOrReply(c,c->argv[j],&last_id,0,NULL) != C_OK) goto cleanup; + } else { + addReplyErrorFormat(c,"Unrecognized XCLAIM option '%s'",opt); + goto cleanup; + } + } + + if (streamCompareID(&last_id,&group->last_id) > 0) { + group->last_id = last_id; + propagate_last_id = 1; + } + + if (deliverytime != -1) { + /* If a delivery time was passed, either with IDLE or TIME, we + * do some sanity check on it, and set the deliverytime to now + * (which is a sane choice usually) if the value is bogus. + * To raise an error here is not wise because clients may compute + * the idle time doing some math starting from their local time, + * and this is not a good excuse to fail in case, for instance, + * the computer time is a bit in the future from our POV. */ + if (deliverytime < 0 || deliverytime > now) deliverytime = now; + } else { + /* If no IDLE/TIME option was passed, we want the last delivery + * time to be now, so that the idle time of the message will be + * zero. */ + deliverytime = now; + } + + /* Do the actual claiming. */ + streamConsumer *consumer = streamLookupConsumer(group,c->argv[3]->ptr); + if (consumer == NULL) { + consumer = streamCreateConsumer(group,c->argv[3]->ptr,c->argv[1],c->db->id,SCC_DEFAULT); + } + consumer->seen_time = commandTimeSnapshot(); + + void *arraylenptr = addReplyDeferredLen(c); + size_t arraylen = 0; + for (int j = 5; j <= last_id_arg; j++) { + streamID id = ids[j-5]; + unsigned char buf[sizeof(streamID)]; + streamEncodeID(buf,&id); + + /* Lookup the ID in the group PEL. */ + streamNACK *nack = raxFind(group->pel,buf,sizeof(buf)); + + /* Item must exist for us to transfer it to another consumer. */ + if (!streamEntryExists(o->ptr,&id)) { + /* Clear this entry from the PEL, it no longer exists */ + if (nack != raxNotFound) { + /* Propagate this change (we are going to delete the NACK). */ + streamPropagateXCLAIM(c,c->argv[1],group,c->argv[2],c->argv[j],nack); + propagate_last_id = 0; /* Will be propagated by XCLAIM itself. */ + server.dirty++; + /* Release the NACK */ + raxRemove(group->pel,buf,sizeof(buf),NULL); + raxRemove(nack->consumer->pel,buf,sizeof(buf),NULL); + streamFreeNACK(nack); + } + continue; + } + + /* If FORCE is passed, let's check if at least the entry + * exists in the Stream. In such case, we'll create a new + * entry in the PEL from scratch, so that XCLAIM can also + * be used to create entries in the PEL. Useful for AOF + * and replication of consumer groups. */ + if (force && nack == raxNotFound) { + /* Create the NACK. */ + nack = streamCreateNACK(NULL); + raxInsert(group->pel,buf,sizeof(buf),nack,NULL); + } + + if (nack != raxNotFound) { + /* We need to check if the minimum idle time requested + * by the caller is satisfied by this entry. + * + * Note that the nack could be created by FORCE, in this + * case there was no pre-existing entry and minidle should + * be ignored, but in that case nack->consumer is NULL. */ + if (nack->consumer && minidle) { + mstime_t this_idle = now - nack->delivery_time; + if (this_idle < minidle) continue; + } + + if (nack->consumer != consumer) { + /* Remove the entry from the old consumer. + * Note that nack->consumer is NULL if we created the + * NACK above because of the FORCE option. */ + if (nack->consumer) + raxRemove(nack->consumer->pel,buf,sizeof(buf),NULL); + } + nack->delivery_time = deliverytime; + /* Set the delivery attempts counter if given, otherwise + * autoincrement unless JUSTID option provided */ + if (retrycount >= 0) { + nack->delivery_count = retrycount; + } else if (!justid) { + nack->delivery_count++; + } + if (nack->consumer != consumer) { + /* Add the entry in the new consumer local PEL. */ + raxInsert(consumer->pel,buf,sizeof(buf),nack,NULL); + nack->consumer = consumer; + } + /* Send the reply for this entry. */ + if (justid) { + addReplyStreamID(c,&id); + } else { + serverAssert(streamReplyWithRange(c,o->ptr,&id,&id,1,0,NULL,NULL,STREAM_RWR_RAWENTRIES,NULL) == 1); + } + arraylen++; + + consumer->active_time = commandTimeSnapshot(); + + /* Propagate this change. */ + streamPropagateXCLAIM(c,c->argv[1],group,c->argv[2],c->argv[j],nack); + propagate_last_id = 0; /* Will be propagated by XCLAIM itself. */ + server.dirty++; + } + } + if (propagate_last_id) { + streamPropagateGroupID(c,c->argv[1],group,c->argv[2]); + server.dirty++; + } + setDeferredArrayLen(c,arraylenptr,arraylen); + preventCommandPropagation(c); +cleanup: + if (ids != static_ids) zfree(ids); +} + +/* XAUTOCLAIM [COUNT ] [JUSTID] + * + * Changes ownership of one or multiple messages in the Pending Entries List + * of a given stream consumer group. + * + * For each PEL entry, if its idle time greater or equal to , + * then the message new owner becomes the specified . + * If the minimum idle time specified is zero, messages are claimed + * regardless of their idle time. + * + * This command creates the consumer as side effect if it does not yet + * exists. Moreover the command reset the idle time of the message to 0. + * + * The command returns an array of messages that the user + * successfully claimed, so that the caller is able to understand + * what messages it is now in charge of. */ +void xautoclaimCommand(client *c) { + streamCG *group = NULL; + robj *o = lookupKeyRead(c->db,c->argv[1]); + long long minidle; /* Minimum idle time argument, in milliseconds. */ + long count = 100; /* Maximum entries to claim. */ + const unsigned attempts_factor = 10; + streamID startid; + int startex; + int justid = 0; + + /* Parse idle/start/end/count arguments ASAP if needed, in order to report + * syntax errors before any other error. */ + if (getLongLongFromObjectOrReply(c,c->argv[4],&minidle,"Invalid min-idle-time argument for XAUTOCLAIM") != C_OK) + return; + if (minidle < 0) minidle = 0; + + if (streamParseIntervalIDOrReply(c,c->argv[5],&startid,&startex,0) != C_OK) + return; + if (startex && streamIncrID(&startid) != C_OK) { + addReplyError(c,"invalid start ID for the interval"); + return; + } + + int j = 6; /* options start at argv[6] */ + while(j < c->argc) { + int moreargs = (c->argc-1) - j; /* Number of additional arguments. */ + char *opt = c->argv[j]->ptr; + if (!strcasecmp(opt,"COUNT") && moreargs) { + long max_count = LONG_MAX / (max(sizeof(streamID), attempts_factor)); + if (getRangeLongFromObjectOrReply(c,c->argv[j+1],1,max_count,&count,"COUNT must be > 0") != C_OK) + return; + j++; + } else if (!strcasecmp(opt,"JUSTID")) { + justid = 1; + } else { + addReplyErrorObject(c,shared.syntaxerr); + return; + } + j++; + } + + if (o) { + if (checkType(c,o,OBJ_STREAM)) + return; /* Type error. */ + group = streamLookupCG(o->ptr,c->argv[2]->ptr); + } + + /* No key or group? Send an error given that the group creation + * is mandatory. */ + if (o == NULL || group == NULL) { + addReplyErrorFormat(c,"-NOGROUP No such key '%s' or consumer group '%s'", + (char*)c->argv[1]->ptr, + (char*)c->argv[2]->ptr); + return; + } + + streamID *deleted_ids = ztrymalloc(count * sizeof(streamID)); + if (!deleted_ids) { + addReplyError(c, "Insufficient memory, failed allocating transient memory, COUNT too high."); + return; + } + + /* Do the actual claiming. */ + streamConsumer *consumer = streamLookupConsumer(group,c->argv[3]->ptr); + if (consumer == NULL) { + consumer = streamCreateConsumer(group,c->argv[3]->ptr,c->argv[1],c->db->id,SCC_DEFAULT); + } + consumer->seen_time = commandTimeSnapshot(); + + long long attempts = count * attempts_factor; + + addReplyArrayLen(c, 3); /* We add another reply later */ + void *endidptr = addReplyDeferredLen(c); /* reply[0] */ + void *arraylenptr = addReplyDeferredLen(c); /* reply[1] */ + + unsigned char startkey[sizeof(streamID)]; + streamEncodeID(startkey,&startid); + raxIterator ri; + raxStart(&ri,group->pel); + raxSeek(&ri,">=",startkey,sizeof(startkey)); + size_t arraylen = 0; + mstime_t now = commandTimeSnapshot(); + int deleted_id_num = 0; + while (attempts-- && count && raxNext(&ri)) { + streamNACK *nack = ri.data; + + streamID id; + streamDecodeID(ri.key, &id); + + /* Item must exist for us to transfer it to another consumer. */ + if (!streamEntryExists(o->ptr,&id)) { + /* Propagate this change (we are going to delete the NACK). */ + robj *idstr = createObjectFromStreamID(&id); + streamPropagateXCLAIM(c,c->argv[1],group,c->argv[2],idstr,nack); + decrRefCount(idstr); + server.dirty++; + /* Clear this entry from the PEL, it no longer exists */ + raxRemove(group->pel,ri.key,ri.key_len,NULL); + raxRemove(nack->consumer->pel,ri.key,ri.key_len,NULL); + streamFreeNACK(nack); + /* Remember the ID for later */ + deleted_ids[deleted_id_num++] = id; + raxSeek(&ri,">=",ri.key,ri.key_len); + count--; /* Count is a limit of the command response size. */ + continue; + } + + if (minidle) { + mstime_t this_idle = now - nack->delivery_time; + if (this_idle < minidle) + continue; + } + + if (nack->consumer != consumer) { + /* Remove the entry from the old consumer. + * Note that nack->consumer is NULL if we created the + * NACK above because of the FORCE option. */ + if (nack->consumer) + raxRemove(nack->consumer->pel,ri.key,ri.key_len,NULL); + } + + /* Update the consumer and idle time. */ + nack->delivery_time = now; + /* Increment the delivery attempts counter unless JUSTID option provided */ + if (!justid) + nack->delivery_count++; + + if (nack->consumer != consumer) { + /* Add the entry in the new consumer local PEL. */ + raxInsert(consumer->pel,ri.key,ri.key_len,nack,NULL); + nack->consumer = consumer; + } + + /* Send the reply for this entry. */ + if (justid) { + addReplyStreamID(c,&id); + } else { + serverAssert(streamReplyWithRange(c,o->ptr,&id,&id,1,0,NULL,NULL,STREAM_RWR_RAWENTRIES,NULL) == 1); + } + arraylen++; + count--; + + consumer->active_time = commandTimeSnapshot(); + + /* Propagate this change. */ + robj *idstr = createObjectFromStreamID(&id); + streamPropagateXCLAIM(c,c->argv[1],group,c->argv[2],idstr,nack); + decrRefCount(idstr); + server.dirty++; + } + + /* We need to return the next entry as a cursor for the next XAUTOCLAIM call */ + raxNext(&ri); + + streamID endid; + if (raxEOF(&ri)) { + endid.ms = endid.seq = 0; + } else { + streamDecodeID(ri.key, &endid); + } + raxStop(&ri); + + setDeferredArrayLen(c,arraylenptr,arraylen); + setDeferredReplyStreamID(c,endidptr,&endid); + + addReplyArrayLen(c, deleted_id_num); /* reply[2] */ + for (int i = 0; i < deleted_id_num; i++) { + addReplyStreamID(c, &deleted_ids[i]); + } + zfree(deleted_ids); + + preventCommandPropagation(c); +} + +/* XDEL [ ... ] + * + * Removes the specified entries from the stream. Returns the number + * of items actually deleted, that may be different from the number + * of IDs passed in case certain IDs do not exist. */ +void xdelCommand(client *c) { + robj *o; + + if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL + || checkType(c,o,OBJ_STREAM)) return; + stream *s = o->ptr; + + /* We need to sanity check the IDs passed to start. Even if not + * a big issue, it is not great that the command is only partially + * executed because at some point an invalid ID is parsed. */ + streamID static_ids[STREAMID_STATIC_VECTOR_LEN]; + streamID *ids = static_ids; + int id_count = c->argc-2; + if (id_count > STREAMID_STATIC_VECTOR_LEN) + ids = zmalloc(sizeof(streamID)*id_count); + for (int j = 2; j < c->argc; j++) { + if (streamParseStrictIDOrReply(c,c->argv[j],&ids[j-2],0,NULL) != C_OK) goto cleanup; + } + + /* Actually apply the command. */ + int deleted = 0; + int first_entry = 0; + for (int j = 2; j < c->argc; j++) { + streamID *id = &ids[j-2]; + if (streamDeleteItem(s,id)) { + /* We want to know if the first entry in the stream was deleted + * so we can later set the new one. */ + if (streamCompareID(id,&s->first_id) == 0) { + first_entry = 1; + } + /* Update the stream's maximal tombstone if needed. */ + if (streamCompareID(id,&s->max_deleted_entry_id) > 0) { + s->max_deleted_entry_id = *id; + } + deleted++; + }; + } + + /* Update the stream's first ID. */ + if (deleted) { + if (s->length == 0) { + s->first_id.ms = 0; + s->first_id.seq = 0; + } else if (first_entry) { + streamGetEdgeID(s,1,1,&s->first_id); + } + } + + /* Propagate the write if needed. */ + if (deleted) { + signalModifiedKey(c,c->db,c->argv[1]); + notifyKeyspaceEvent(NOTIFY_STREAM,"xdel",c->argv[1],c->db->id); + server.dirty += deleted; + } + addReplyLongLong(c,deleted); +cleanup: + if (ids != static_ids) zfree(ids); +} + +/* General form: XTRIM [... options ...] + * + * List of options: + * + * Trim strategies: + * + * MAXLEN [~|=] -- Trim so that the stream will be capped at + * the specified length. Use ~ before the + * count in order to demand approximated trimming + * (like XADD MAXLEN option). + * MINID [~|=] -- Trim so that the stream will not contain entries + * with IDs smaller than 'id'. Use ~ before the + * count in order to demand approximated trimming + * (like XADD MINID option). + * + * Other options: + * + * LIMIT -- The maximum number of entries to trim. + * 0 means unlimited. Unless specified, it is set + * to a default of 100*server.stream_node_max_entries, + * and that's in order to keep the trimming time sane. + * Has meaning only if `~` was provided. + */ +void xtrimCommand(client *c) { + robj *o; + + /* Argument parsing. */ + streamAddTrimArgs parsed_args; + if (streamParseAddOrTrimArgsOrReply(c, &parsed_args, 0) < 0) + return; /* streamParseAddOrTrimArgsOrReply already replied. */ + + /* If the key does not exist, we are ok returning zero, that is, the + * number of elements removed from the stream. */ + if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL + || checkType(c,o,OBJ_STREAM)) return; + stream *s = o->ptr; + + /* Perform the trimming. */ + int64_t deleted = streamTrim(s, &parsed_args); + if (deleted) { + notifyKeyspaceEvent(NOTIFY_STREAM,"xtrim",c->argv[1],c->db->id); + if (parsed_args.approx_trim) { + /* In case our trimming was limited (by LIMIT or by ~) we must + * re-write the relevant trim argument to make sure there will be + * no inconsistencies in AOF loading or in the replica. + * It's enough to check only args->approx because there is no + * way LIMIT is given without the ~ option. */ + streamRewriteApproxSpecifier(c,parsed_args.trim_strategy_arg_idx-1); + streamRewriteTrimArgument(c,s,parsed_args.trim_strategy,parsed_args.trim_strategy_arg_idx); + } + + /* Propagate the write. */ + signalModifiedKey(c, c->db,c->argv[1]); + server.dirty += deleted; + } + addReplyLongLong(c,deleted); +} + +/* Helper function for xinfoCommand. + * Handles the variants of XINFO STREAM */ +void xinfoReplyWithStreamInfo(client *c, stream *s) { + int full = 1; + long long count = 10; /* Default COUNT is 10 so we don't block the server */ + robj **optv = c->argv + 3; /* Options start after XINFO STREAM */ + int optc = c->argc - 3; + + /* Parse options. */ + if (optc == 0) { + full = 0; + } else { + /* Valid options are [FULL] or [FULL COUNT ] */ + if (optc != 1 && optc != 3) { + addReplySubcommandSyntaxError(c); + return; + } + + /* First option must be "FULL" */ + if (strcasecmp(optv[0]->ptr,"full")) { + addReplySubcommandSyntaxError(c); + return; + } + + if (optc == 3) { + /* First option must be "FULL" */ + if (strcasecmp(optv[1]->ptr,"count")) { + addReplySubcommandSyntaxError(c); + return; + } + if (getLongLongFromObjectOrReply(c,optv[2],&count,NULL) == C_ERR) + return; + if (count < 0) count = 10; + } + } + + addReplyMapLen(c,full ? 9 : 10); + addReplyBulkCString(c,"length"); + addReplyLongLong(c,s->length); + addReplyBulkCString(c,"radix-tree-keys"); + addReplyLongLong(c,raxSize(s->rax)); + addReplyBulkCString(c,"radix-tree-nodes"); + addReplyLongLong(c,s->rax->numnodes); + addReplyBulkCString(c,"last-generated-id"); + addReplyStreamID(c,&s->last_id); + addReplyBulkCString(c,"max-deleted-entry-id"); + addReplyStreamID(c,&s->max_deleted_entry_id); + addReplyBulkCString(c,"entries-added"); + addReplyLongLong(c,s->entries_added); + addReplyBulkCString(c,"recorded-first-entry-id"); + addReplyStreamID(c,&s->first_id); + + if (!full) { + /* XINFO STREAM */ + + addReplyBulkCString(c,"groups"); + addReplyLongLong(c,s->cgroups ? raxSize(s->cgroups) : 0); + + /* To emit the first/last entry we use streamReplyWithRange(). */ + int emitted; + streamID start, end; + start.ms = start.seq = 0; + end.ms = end.seq = UINT64_MAX; + addReplyBulkCString(c,"first-entry"); + emitted = streamReplyWithRange(c,s,&start,&end,1,0,NULL,NULL, + STREAM_RWR_RAWENTRIES,NULL); + if (!emitted) addReplyNull(c); + addReplyBulkCString(c,"last-entry"); + emitted = streamReplyWithRange(c,s,&start,&end,1,1,NULL,NULL, + STREAM_RWR_RAWENTRIES,NULL); + if (!emitted) addReplyNull(c); + } else { + /* XINFO STREAM FULL [COUNT ] */ + + /* Stream entries */ + addReplyBulkCString(c,"entries"); + streamReplyWithRange(c,s,NULL,NULL,count,0,NULL,NULL,0,NULL); + + /* Consumer groups */ + addReplyBulkCString(c,"groups"); + if (s->cgroups == NULL) { + addReplyArrayLen(c,0); + } else { + addReplyArrayLen(c,raxSize(s->cgroups)); + raxIterator ri_cgroups; + raxStart(&ri_cgroups,s->cgroups); + raxSeek(&ri_cgroups,"^",NULL,0); + while(raxNext(&ri_cgroups)) { + streamCG *cg = ri_cgroups.data; + addReplyMapLen(c,7); + + /* Name */ + addReplyBulkCString(c,"name"); + addReplyBulkCBuffer(c,ri_cgroups.key,ri_cgroups.key_len); + + /* Last delivered ID */ + addReplyBulkCString(c,"last-delivered-id"); + addReplyStreamID(c,&cg->last_id); + + /* Read counter of the last delivered ID */ + addReplyBulkCString(c,"entries-read"); + if (cg->entries_read != SCG_INVALID_ENTRIES_READ) { + addReplyLongLong(c,cg->entries_read); + } else { + addReplyNull(c); + } + + /* Group lag */ + addReplyBulkCString(c,"lag"); + streamReplyWithCGLag(c,s,cg); + + /* Group PEL count */ + addReplyBulkCString(c,"pel-count"); + addReplyLongLong(c,raxSize(cg->pel)); + + /* Group PEL */ + addReplyBulkCString(c,"pending"); + long long arraylen_cg_pel = 0; + void *arrayptr_cg_pel = addReplyDeferredLen(c); + raxIterator ri_cg_pel; + raxStart(&ri_cg_pel,cg->pel); + raxSeek(&ri_cg_pel,"^",NULL,0); + while(raxNext(&ri_cg_pel) && (!count || arraylen_cg_pel < count)) { + streamNACK *nack = ri_cg_pel.data; + addReplyArrayLen(c,4); + + /* Entry ID. */ + streamID id; + streamDecodeID(ri_cg_pel.key,&id); + addReplyStreamID(c,&id); + + /* Consumer name. */ + serverAssert(nack->consumer); /* assertion for valgrind (avoid NPD) */ + addReplyBulkCBuffer(c,nack->consumer->name, + sdslen(nack->consumer->name)); + + /* Last delivery. */ + addReplyLongLong(c,nack->delivery_time); + + /* Number of deliveries. */ + addReplyLongLong(c,nack->delivery_count); + + arraylen_cg_pel++; + } + setDeferredArrayLen(c,arrayptr_cg_pel,arraylen_cg_pel); + raxStop(&ri_cg_pel); + + /* Consumers */ + addReplyBulkCString(c,"consumers"); + addReplyArrayLen(c,raxSize(cg->consumers)); + raxIterator ri_consumers; + raxStart(&ri_consumers,cg->consumers); + raxSeek(&ri_consumers,"^",NULL,0); + while(raxNext(&ri_consumers)) { + streamConsumer *consumer = ri_consumers.data; + addReplyMapLen(c,5); + + /* Consumer name */ + addReplyBulkCString(c,"name"); + addReplyBulkCBuffer(c,consumer->name,sdslen(consumer->name)); + + /* Seen-time */ + addReplyBulkCString(c,"seen-time"); + addReplyLongLong(c,consumer->seen_time); + + /* Active-time */ + addReplyBulkCString(c,"active-time"); + addReplyLongLong(c,consumer->active_time); + + /* Consumer PEL count */ + addReplyBulkCString(c,"pel-count"); + addReplyLongLong(c,raxSize(consumer->pel)); + + /* Consumer PEL */ + addReplyBulkCString(c,"pending"); + long long arraylen_cpel = 0; + void *arrayptr_cpel = addReplyDeferredLen(c); + raxIterator ri_cpel; + raxStart(&ri_cpel,consumer->pel); + raxSeek(&ri_cpel,"^",NULL,0); + while(raxNext(&ri_cpel) && (!count || arraylen_cpel < count)) { + streamNACK *nack = ri_cpel.data; + addReplyArrayLen(c,3); + + /* Entry ID. */ + streamID id; + streamDecodeID(ri_cpel.key,&id); + addReplyStreamID(c,&id); + + /* Last delivery. */ + addReplyLongLong(c,nack->delivery_time); + + /* Number of deliveries. */ + addReplyLongLong(c,nack->delivery_count); + + arraylen_cpel++; + } + setDeferredArrayLen(c,arrayptr_cpel,arraylen_cpel); + raxStop(&ri_cpel); + } + raxStop(&ri_consumers); + } + raxStop(&ri_cgroups); + } + } +} + +/* XINFO CONSUMERS + * XINFO GROUPS + * XINFO STREAM [FULL [COUNT ]] + * XINFO HELP. */ +void xinfoCommand(client *c) { + stream *s = NULL; + char *opt; + robj *key; + + /* HELP is special. Handle it ASAP. */ + if (!strcasecmp(c->argv[1]->ptr,"HELP")) { + const char *help[] = { +"CONSUMERS ", +" Show consumers of .", +"GROUPS ", +" Show the stream consumer groups.", +"STREAM [FULL [COUNT ]", +" Show information about the stream.", +NULL + }; + addReplyHelp(c, help); + return; + } + + /* With the exception of HELP handled before any other sub commands, all + * the ones are in the form of " ". */ + opt = c->argv[1]->ptr; + key = c->argv[2]; + + /* Lookup the key now, this is common for all the subcommands but HELP. */ + robj *o = lookupKeyReadOrReply(c,key,shared.nokeyerr); + if (o == NULL || checkType(c,o,OBJ_STREAM)) return; + s = o->ptr; + + /* Dispatch the different subcommands. */ + if (!strcasecmp(opt,"CONSUMERS") && c->argc == 4) { + /* XINFO CONSUMERS . */ + streamCG *cg = streamLookupCG(s,c->argv[3]->ptr); + if (cg == NULL) { + addReplyErrorFormat(c, "-NOGROUP No such consumer group '%s' " + "for key name '%s'", + (char*)c->argv[3]->ptr, (char*)key->ptr); + return; + } + + addReplyArrayLen(c,raxSize(cg->consumers)); + raxIterator ri; + raxStart(&ri,cg->consumers); + raxSeek(&ri,"^",NULL,0); + mstime_t now = commandTimeSnapshot(); + while(raxNext(&ri)) { + streamConsumer *consumer = ri.data; + mstime_t inactive = consumer->active_time != -1 ? now - consumer->active_time : consumer->active_time; + mstime_t idle = now - consumer->seen_time; + if (idle < 0) idle = 0; + + addReplyMapLen(c,4); + addReplyBulkCString(c,"name"); + addReplyBulkCBuffer(c,consumer->name,sdslen(consumer->name)); + addReplyBulkCString(c,"pending"); + addReplyLongLong(c,raxSize(consumer->pel)); + addReplyBulkCString(c,"idle"); + addReplyLongLong(c,idle); + addReplyBulkCString(c,"inactive"); + addReplyLongLong(c,inactive); + } + raxStop(&ri); + } else if (!strcasecmp(opt,"GROUPS") && c->argc == 3) { + /* XINFO GROUPS . */ + if (s->cgroups == NULL) { + addReplyArrayLen(c,0); + return; + } + + addReplyArrayLen(c,raxSize(s->cgroups)); + raxIterator ri; + raxStart(&ri,s->cgroups); + raxSeek(&ri,"^",NULL,0); + while(raxNext(&ri)) { + streamCG *cg = ri.data; + addReplyMapLen(c,6); + addReplyBulkCString(c,"name"); + addReplyBulkCBuffer(c,ri.key,ri.key_len); + addReplyBulkCString(c,"consumers"); + addReplyLongLong(c,raxSize(cg->consumers)); + addReplyBulkCString(c,"pending"); + addReplyLongLong(c,raxSize(cg->pel)); + addReplyBulkCString(c,"last-delivered-id"); + addReplyStreamID(c,&cg->last_id); + addReplyBulkCString(c,"entries-read"); + if (cg->entries_read != SCG_INVALID_ENTRIES_READ) { + addReplyLongLong(c,cg->entries_read); + } else { + addReplyNull(c); + } + addReplyBulkCString(c,"lag"); + streamReplyWithCGLag(c,s,cg); + } + raxStop(&ri); + } else if (!strcasecmp(opt,"STREAM")) { + /* XINFO STREAM [FULL [COUNT ]]. */ + xinfoReplyWithStreamInfo(c,s); + } else { + addReplySubcommandSyntaxError(c); + } +} + +/* Validate the integrity stream listpack entries structure. Both in term of a + * valid listpack, but also that the structure of the entries matches a valid + * stream. return 1 if valid 0 if not valid. */ +int streamValidateListpackIntegrity(unsigned char *lp, size_t size, int deep) { + int valid_record; + unsigned char *p, *next; + + /* Since we don't want to run validation of all records twice, we'll + * run the listpack validation of just the header and do the rest here. */ + if (!lpValidateIntegrity(lp, size, 0, NULL, NULL)) + return 0; + + /* In non-deep mode we just validated the listpack header (encoded size) */ + if (!deep) return 1; + + next = p = lpValidateFirst(lp); + if (!lpValidateNext(lp, &next, size)) return 0; + if (!p) return 0; + + /* entry count */ + int64_t entry_count = lpGetIntegerIfValid(p, &valid_record); + if (!valid_record) return 0; + p = next; if (!lpValidateNext(lp, &next, size)) return 0; + + /* deleted */ + int64_t deleted_count = lpGetIntegerIfValid(p, &valid_record); + if (!valid_record) return 0; + p = next; if (!lpValidateNext(lp, &next, size)) return 0; + + /* num-of-fields */ + int64_t master_fields = lpGetIntegerIfValid(p, &valid_record); + if (!valid_record) return 0; + p = next; if (!lpValidateNext(lp, &next, size)) return 0; + + /* the field names */ + for (int64_t j = 0; j < master_fields; j++) { + p = next; if (!lpValidateNext(lp, &next, size)) return 0; + } + + /* the zero master entry terminator. */ + int64_t zero = lpGetIntegerIfValid(p, &valid_record); + if (!valid_record || zero != 0) return 0; + p = next; if (!lpValidateNext(lp, &next, size)) return 0; + + entry_count += deleted_count; + while (entry_count--) { + if (!p) return 0; + int64_t fields = master_fields, extra_fields = 3; + int64_t flags = lpGetIntegerIfValid(p, &valid_record); + if (!valid_record) return 0; + p = next; if (!lpValidateNext(lp, &next, size)) return 0; + + /* entry id */ + lpGetIntegerIfValid(p, &valid_record); + if (!valid_record) return 0; + p = next; if (!lpValidateNext(lp, &next, size)) return 0; + lpGetIntegerIfValid(p, &valid_record); + if (!valid_record) return 0; + p = next; if (!lpValidateNext(lp, &next, size)) return 0; + + if (!(flags & STREAM_ITEM_FLAG_SAMEFIELDS)) { + /* num-of-fields */ + fields = lpGetIntegerIfValid(p, &valid_record); + if (!valid_record) return 0; + p = next; if (!lpValidateNext(lp, &next, size)) return 0; + + /* the field names */ + for (int64_t j = 0; j < fields; j++) { + p = next; if (!lpValidateNext(lp, &next, size)) return 0; + } + + extra_fields += fields + 1; + } + + /* the values */ + for (int64_t j = 0; j < fields; j++) { + p = next; if (!lpValidateNext(lp, &next, size)) return 0; + } + + /* lp-count */ + int64_t lp_count = lpGetIntegerIfValid(p, &valid_record); + if (!valid_record) return 0; + if (lp_count != fields + extra_fields) return 0; + p = next; if (!lpValidateNext(lp, &next, size)) return 0; + } + + if (next) + return 0; + + return 1; +} -- cgit v1.2.3