summaryrefslogtreecommitdiffstats
path: root/src/rdb.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/rdb.c')
-rw-r--r--src/rdb.c3722
1 files changed, 3722 insertions, 0 deletions
diff --git a/src/rdb.c b/src/rdb.c
new file mode 100644
index 0000000..ed30b65
--- /dev/null
+++ b/src/rdb.c
@@ -0,0 +1,3722 @@
+/*
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include "lzf.h" /* LZF compression library */
+#include "zipmap.h"
+#include "endianconv.h"
+#include "fpconv_dtoa.h"
+#include "stream.h"
+#include "functions.h"
+#include "intset.h" /* Compact integer set structure */
+#include "bio.h"
+
+#include <math.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/wait.h>
+#include <arpa/inet.h>
+#include <sys/stat.h>
+#include <sys/param.h>
+
+/* This macro is called when the internal RDB structure is corrupt */
+#define rdbReportCorruptRDB(...) rdbReportError(1, __LINE__,__VA_ARGS__)
+/* This macro is called when RDB read failed (possibly a short read) */
+#define rdbReportReadError(...) rdbReportError(0, __LINE__,__VA_ARGS__)
+
+/* This macro tells if we are in the context of a RESTORE command, and not loading an RDB or AOF. */
+#define isRestoreContext() \
+ ((server.current_client == NULL || server.current_client->id == CLIENT_ID_AOF) ? 0 : 1)
+
+char* rdbFileBeingLoaded = NULL; /* used for rdb checking on read error */
+extern int rdbCheckMode;
+void rdbCheckError(const char *fmt, ...);
+void rdbCheckSetError(const char *fmt, ...);
+
+#ifdef __GNUC__
+void rdbReportError(int corruption_error, int linenum, char *reason, ...) __attribute__ ((format (printf, 3, 4)));
+#endif
+void rdbReportError(int corruption_error, int linenum, char *reason, ...) {
+ va_list ap;
+ char msg[1024];
+ int len;
+
+ len = snprintf(msg,sizeof(msg),
+ "Internal error in RDB reading offset %llu, function at rdb.c:%d -> ",
+ (unsigned long long)server.loading_loaded_bytes, linenum);
+ va_start(ap,reason);
+ vsnprintf(msg+len,sizeof(msg)-len,reason,ap);
+ va_end(ap);
+
+ if (isRestoreContext()) {
+ /* If we're in the context of a RESTORE command, just propagate the error. */
+ /* log in VERBOSE, and return (don't exit). */
+ serverLog(LL_VERBOSE, "%s", msg);
+ return;
+ } else if (rdbCheckMode) {
+ /* If we're inside the rdb checker, let it handle the error. */
+ rdbCheckError("%s",msg);
+ } else if (rdbFileBeingLoaded) {
+ /* If we're loading an rdb file form disk, run rdb check (and exit) */
+ serverLog(LL_WARNING, "%s", msg);
+ char *argv[2] = {"",rdbFileBeingLoaded};
+ if (anetIsFifo(argv[1])) {
+ /* Cannot check RDB FIFO because we cannot reopen the FIFO and check already streamed data. */
+ rdbCheckError("Cannot check RDB that is a FIFO: %s", argv[1]);
+ return;
+ }
+ redis_check_rdb_main(2,argv,NULL);
+ } else if (corruption_error) {
+ /* In diskless loading, in case of corrupt file, log and exit. */
+ serverLog(LL_WARNING, "%s. Failure loading rdb format", msg);
+ } else {
+ /* In diskless loading, in case of a short read (not a corrupt
+ * file), log and proceed (don't exit). */
+ serverLog(LL_WARNING, "%s. Failure loading rdb format from socket, assuming connection error, resuming operation.", msg);
+ return;
+ }
+ serverLog(LL_WARNING, "Terminating server after rdb file reading failure.");
+ exit(1);
+}
+
+ssize_t rdbWriteRaw(rio *rdb, void *p, size_t len) {
+ if (rdb && rioWrite(rdb,p,len) == 0)
+ return -1;
+ return len;
+}
+
+int rdbSaveType(rio *rdb, unsigned char type) {
+ return rdbWriteRaw(rdb,&type,1);
+}
+
+/* Load a "type" in RDB format, that is a one byte unsigned integer.
+ * This function is not only used to load object types, but also special
+ * "types" like the end-of-file type, the EXPIRE type, and so forth. */
+int rdbLoadType(rio *rdb) {
+ unsigned char type;
+ if (rioRead(rdb,&type,1) == 0) return -1;
+ return type;
+}
+
+/* This is only used to load old databases stored with the RDB_OPCODE_EXPIRETIME
+ * opcode. New versions of Redis store using the RDB_OPCODE_EXPIRETIME_MS
+ * opcode. On error -1 is returned, however this could be a valid time, so
+ * to check for loading errors the caller should call rioGetReadError() after
+ * calling this function. */
+time_t rdbLoadTime(rio *rdb) {
+ int32_t t32;
+ if (rioRead(rdb,&t32,4) == 0) return -1;
+ return (time_t)t32;
+}
+
+int rdbSaveMillisecondTime(rio *rdb, long long t) {
+ int64_t t64 = (int64_t) t;
+ memrev64ifbe(&t64); /* Store in little endian. */
+ return rdbWriteRaw(rdb,&t64,8);
+}
+
+/* This function loads a time from the RDB file. It gets the version of the
+ * RDB because, unfortunately, before Redis 5 (RDB version 9), the function
+ * failed to convert data to/from little endian, so RDB files with keys having
+ * expires could not be shared between big endian and little endian systems
+ * (because the expire time will be totally wrong). The fix for this is just
+ * to call memrev64ifbe(), however if we fix this for all the RDB versions,
+ * this call will introduce an incompatibility for big endian systems:
+ * after upgrading to Redis version 5 they will no longer be able to load their
+ * own old RDB files. Because of that, we instead fix the function only for new
+ * RDB versions, and load older RDB versions as we used to do in the past,
+ * allowing big endian systems to load their own old RDB files.
+ *
+ * On I/O error the function returns LLONG_MAX, however if this is also a
+ * valid stored value, the caller should use rioGetReadError() to check for
+ * errors after calling this function. */
+long long rdbLoadMillisecondTime(rio *rdb, int rdbver) {
+ int64_t t64;
+ if (rioRead(rdb,&t64,8) == 0) return LLONG_MAX;
+ if (rdbver >= 9) /* Check the top comment of this function. */
+ memrev64ifbe(&t64); /* Convert in big endian if the system is BE. */
+ return (long long)t64;
+}
+
+/* Saves an encoded length. The first two bits in the first byte are used to
+ * hold the encoding type. See the RDB_* definitions for more information
+ * on the types of encoding. */
+int rdbSaveLen(rio *rdb, uint64_t len) {
+ unsigned char buf[2];
+ size_t nwritten;
+
+ if (len < (1<<6)) {
+ /* Save a 6 bit len */
+ buf[0] = (len&0xFF)|(RDB_6BITLEN<<6);
+ if (rdbWriteRaw(rdb,buf,1) == -1) return -1;
+ nwritten = 1;
+ } else if (len < (1<<14)) {
+ /* Save a 14 bit len */
+ buf[0] = ((len>>8)&0xFF)|(RDB_14BITLEN<<6);
+ buf[1] = len&0xFF;
+ if (rdbWriteRaw(rdb,buf,2) == -1) return -1;
+ nwritten = 2;
+ } else if (len <= UINT32_MAX) {
+ /* Save a 32 bit len */
+ buf[0] = RDB_32BITLEN;
+ if (rdbWriteRaw(rdb,buf,1) == -1) return -1;
+ uint32_t len32 = htonl(len);
+ if (rdbWriteRaw(rdb,&len32,4) == -1) return -1;
+ nwritten = 1+4;
+ } else {
+ /* Save a 64 bit len */
+ buf[0] = RDB_64BITLEN;
+ if (rdbWriteRaw(rdb,buf,1) == -1) return -1;
+ len = htonu64(len);
+ if (rdbWriteRaw(rdb,&len,8) == -1) return -1;
+ nwritten = 1+8;
+ }
+ return nwritten;
+}
+
+
+/* Load an encoded length. If the loaded length is a normal length as stored
+ * with rdbSaveLen(), the read length is set to '*lenptr'. If instead the
+ * loaded length describes a special encoding that follows, then '*isencoded'
+ * is set to 1 and the encoding format is stored at '*lenptr'.
+ *
+ * See the RDB_ENC_* definitions in rdb.h for more information on special
+ * encodings.
+ *
+ * The function returns -1 on error, 0 on success. */
+int rdbLoadLenByRef(rio *rdb, int *isencoded, uint64_t *lenptr) {
+ unsigned char buf[2];
+ int type;
+
+ if (isencoded) *isencoded = 0;
+ if (rioRead(rdb,buf,1) == 0) return -1;
+ type = (buf[0]&0xC0)>>6;
+ if (type == RDB_ENCVAL) {
+ /* Read a 6 bit encoding type. */
+ if (isencoded) *isencoded = 1;
+ *lenptr = buf[0]&0x3F;
+ } else if (type == RDB_6BITLEN) {
+ /* Read a 6 bit len. */
+ *lenptr = buf[0]&0x3F;
+ } else if (type == RDB_14BITLEN) {
+ /* Read a 14 bit len. */
+ if (rioRead(rdb,buf+1,1) == 0) return -1;
+ *lenptr = ((buf[0]&0x3F)<<8)|buf[1];
+ } else if (buf[0] == RDB_32BITLEN) {
+ /* Read a 32 bit len. */
+ uint32_t len;
+ if (rioRead(rdb,&len,4) == 0) return -1;
+ *lenptr = ntohl(len);
+ } else if (buf[0] == RDB_64BITLEN) {
+ /* Read a 64 bit len. */
+ uint64_t len;
+ if (rioRead(rdb,&len,8) == 0) return -1;
+ *lenptr = ntohu64(len);
+ } else {
+ rdbReportCorruptRDB(
+ "Unknown length encoding %d in rdbLoadLen()",type);
+ return -1; /* Never reached. */
+ }
+ return 0;
+}
+
+/* This is like rdbLoadLenByRef() but directly returns the value read
+ * from the RDB stream, signaling an error by returning RDB_LENERR
+ * (since it is a too large count to be applicable in any Redis data
+ * structure). */
+uint64_t rdbLoadLen(rio *rdb, int *isencoded) {
+ uint64_t len;
+
+ if (rdbLoadLenByRef(rdb,isencoded,&len) == -1) return RDB_LENERR;
+ return len;
+}
+
+/* Encodes the "value" argument as integer when it fits in the supported ranges
+ * for encoded types. If the function successfully encodes the integer, the
+ * representation is stored in the buffer pointer to by "enc" and the string
+ * length is returned. Otherwise 0 is returned. */
+int rdbEncodeInteger(long long value, unsigned char *enc) {
+ if (value >= -(1<<7) && value <= (1<<7)-1) {
+ enc[0] = (RDB_ENCVAL<<6)|RDB_ENC_INT8;
+ enc[1] = value&0xFF;
+ return 2;
+ } else if (value >= -(1<<15) && value <= (1<<15)-1) {
+ enc[0] = (RDB_ENCVAL<<6)|RDB_ENC_INT16;
+ enc[1] = value&0xFF;
+ enc[2] = (value>>8)&0xFF;
+ return 3;
+ } else if (value >= -((long long)1<<31) && value <= ((long long)1<<31)-1) {
+ enc[0] = (RDB_ENCVAL<<6)|RDB_ENC_INT32;
+ enc[1] = value&0xFF;
+ enc[2] = (value>>8)&0xFF;
+ enc[3] = (value>>16)&0xFF;
+ enc[4] = (value>>24)&0xFF;
+ return 5;
+ } else {
+ return 0;
+ }
+}
+
+/* Loads an integer-encoded object with the specified encoding type "enctype".
+ * The returned value changes according to the flags, see
+ * rdbGenericLoadStringObject() for more info. */
+void *rdbLoadIntegerObject(rio *rdb, int enctype, int flags, size_t *lenptr) {
+ int plain = flags & RDB_LOAD_PLAIN;
+ int sds = flags & RDB_LOAD_SDS;
+ int encode = flags & RDB_LOAD_ENC;
+ unsigned char enc[4];
+ long long val;
+
+ if (enctype == RDB_ENC_INT8) {
+ if (rioRead(rdb,enc,1) == 0) return NULL;
+ val = (signed char)enc[0];
+ } else if (enctype == RDB_ENC_INT16) {
+ uint16_t v;
+ if (rioRead(rdb,enc,2) == 0) return NULL;
+ v = ((uint32_t)enc[0])|
+ ((uint32_t)enc[1]<<8);
+ val = (int16_t)v;
+ } else if (enctype == RDB_ENC_INT32) {
+ uint32_t v;
+ if (rioRead(rdb,enc,4) == 0) return NULL;
+ v = ((uint32_t)enc[0])|
+ ((uint32_t)enc[1]<<8)|
+ ((uint32_t)enc[2]<<16)|
+ ((uint32_t)enc[3]<<24);
+ val = (int32_t)v;
+ } else {
+ rdbReportCorruptRDB("Unknown RDB integer encoding type %d",enctype);
+ return NULL; /* Never reached. */
+ }
+ if (plain || sds) {
+ char buf[LONG_STR_SIZE], *p;
+ int len = ll2string(buf,sizeof(buf),val);
+ if (lenptr) *lenptr = len;
+ p = plain ? zmalloc(len) : sdsnewlen(SDS_NOINIT,len);
+ memcpy(p,buf,len);
+ return p;
+ } else if (encode) {
+ return createStringObjectFromLongLongForValue(val);
+ } else {
+ return createStringObjectFromLongLongWithSds(val);
+ }
+}
+
+/* String objects in the form "2391" "-100" without any space and with a
+ * range of values that can fit in an 8, 16 or 32 bit signed value can be
+ * encoded as integers to save space */
+int rdbTryIntegerEncoding(char *s, size_t len, unsigned char *enc) {
+ long long value;
+ if (string2ll(s, len, &value)) {
+ return rdbEncodeInteger(value, enc);
+ } else {
+ return 0;
+ }
+}
+
+ssize_t rdbSaveLzfBlob(rio *rdb, void *data, size_t compress_len,
+ size_t original_len) {
+ unsigned char byte;
+ ssize_t n, nwritten = 0;
+
+ /* Data compressed! Let's save it on disk */
+ byte = (RDB_ENCVAL<<6)|RDB_ENC_LZF;
+ if ((n = rdbWriteRaw(rdb,&byte,1)) == -1) goto writeerr;
+ nwritten += n;
+
+ if ((n = rdbSaveLen(rdb,compress_len)) == -1) goto writeerr;
+ nwritten += n;
+
+ if ((n = rdbSaveLen(rdb,original_len)) == -1) goto writeerr;
+ nwritten += n;
+
+ if ((n = rdbWriteRaw(rdb,data,compress_len)) == -1) goto writeerr;
+ nwritten += n;
+
+ return nwritten;
+
+writeerr:
+ return -1;
+}
+
+ssize_t rdbSaveLzfStringObject(rio *rdb, unsigned char *s, size_t len) {
+ size_t comprlen, outlen;
+ void *out;
+
+ /* We require at least four bytes compression for this to be worth it */
+ if (len <= 4) return 0;
+ outlen = len-4;
+ if ((out = zmalloc(outlen+1)) == NULL) return 0;
+ comprlen = lzf_compress(s, len, out, outlen);
+ if (comprlen == 0) {
+ zfree(out);
+ return 0;
+ }
+ ssize_t nwritten = rdbSaveLzfBlob(rdb, out, comprlen, len);
+ zfree(out);
+ return nwritten;
+}
+
+/* Load an LZF compressed string in RDB format. The returned value
+ * changes according to 'flags'. For more info check the
+ * rdbGenericLoadStringObject() function. */
+void *rdbLoadLzfStringObject(rio *rdb, int flags, size_t *lenptr) {
+ int plain = flags & RDB_LOAD_PLAIN;
+ int sds = flags & RDB_LOAD_SDS;
+ uint64_t len, clen;
+ unsigned char *c = NULL;
+ char *val = NULL;
+
+ if ((clen = rdbLoadLen(rdb,NULL)) == RDB_LENERR) return NULL;
+ if ((len = rdbLoadLen(rdb,NULL)) == RDB_LENERR) return NULL;
+ if ((c = ztrymalloc(clen)) == NULL) {
+ serverLog(isRestoreContext()? LL_VERBOSE: LL_WARNING, "rdbLoadLzfStringObject failed allocating %llu bytes", (unsigned long long)clen);
+ goto err;
+ }
+
+ /* Allocate our target according to the uncompressed size. */
+ if (plain) {
+ val = ztrymalloc(len);
+ } else {
+ val = sdstrynewlen(SDS_NOINIT,len);
+ }
+ if (!val) {
+ serverLog(isRestoreContext()? LL_VERBOSE: LL_WARNING, "rdbLoadLzfStringObject failed allocating %llu bytes", (unsigned long long)len);
+ goto err;
+ }
+
+ if (lenptr) *lenptr = len;
+
+ /* Load the compressed representation and uncompress it to target. */
+ if (rioRead(rdb,c,clen) == 0) goto err;
+ if (lzf_decompress(c,clen,val,len) != len) {
+ rdbReportCorruptRDB("Invalid LZF compressed string");
+ goto err;
+ }
+ zfree(c);
+
+ if (plain || sds) {
+ return val;
+ } else {
+ return createObject(OBJ_STRING,val);
+ }
+err:
+ zfree(c);
+ if (plain)
+ zfree(val);
+ else
+ sdsfree(val);
+ return NULL;
+}
+
+/* Save a string object as [len][data] on disk. If the object is a string
+ * representation of an integer value we try to save it in a special form */
+ssize_t rdbSaveRawString(rio *rdb, unsigned char *s, size_t len) {
+ int enclen;
+ ssize_t n, nwritten = 0;
+
+ /* Try integer encoding */
+ if (len <= 11) {
+ unsigned char buf[5];
+ if ((enclen = rdbTryIntegerEncoding((char*)s,len,buf)) > 0) {
+ if (rdbWriteRaw(rdb,buf,enclen) == -1) return -1;
+ return enclen;
+ }
+ }
+
+ /* Try LZF compression - under 20 bytes it's unable to compress even
+ * aaaaaaaaaaaaaaaaaa so skip it */
+ if (server.rdb_compression && len > 20) {
+ n = rdbSaveLzfStringObject(rdb,s,len);
+ if (n == -1) return -1;
+ if (n > 0) return n;
+ /* Return value of 0 means data can't be compressed, save the old way */
+ }
+
+ /* Store verbatim */
+ if ((n = rdbSaveLen(rdb,len)) == -1) return -1;
+ nwritten += n;
+ if (len > 0) {
+ if (rdbWriteRaw(rdb,s,len) == -1) return -1;
+ nwritten += len;
+ }
+ return nwritten;
+}
+
+/* Save a long long value as either an encoded string or a string. */
+ssize_t rdbSaveLongLongAsStringObject(rio *rdb, long long value) {
+ unsigned char buf[32];
+ ssize_t n, nwritten = 0;
+ int enclen = rdbEncodeInteger(value,buf);
+ if (enclen > 0) {
+ return rdbWriteRaw(rdb,buf,enclen);
+ } else {
+ /* Encode as string */
+ enclen = ll2string((char*)buf,32,value);
+ serverAssert(enclen < 32);
+ if ((n = rdbSaveLen(rdb,enclen)) == -1) return -1;
+ nwritten += n;
+ if ((n = rdbWriteRaw(rdb,buf,enclen)) == -1) return -1;
+ nwritten += n;
+ }
+ return nwritten;
+}
+
+/* Like rdbSaveRawString() gets a Redis object instead. */
+ssize_t rdbSaveStringObject(rio *rdb, robj *obj) {
+ /* Avoid to decode the object, then encode it again, if the
+ * object is already integer encoded. */
+ if (obj->encoding == OBJ_ENCODING_INT) {
+ return rdbSaveLongLongAsStringObject(rdb,(long)obj->ptr);
+ } else {
+ serverAssertWithInfo(NULL,obj,sdsEncodedObject(obj));
+ return rdbSaveRawString(rdb,obj->ptr,sdslen(obj->ptr));
+ }
+}
+
+/* Load a string object from an RDB file according to flags:
+ *
+ * RDB_LOAD_NONE (no flags): load an RDB object, unencoded.
+ * RDB_LOAD_ENC: If the returned type is a Redis object, try to
+ * encode it in a special way to be more memory
+ * efficient. When this flag is passed the function
+ * no longer guarantees that obj->ptr is an SDS string.
+ * RDB_LOAD_PLAIN: Return a plain string allocated with zmalloc()
+ * instead of a Redis object with an sds in it.
+ * RDB_LOAD_SDS: Return an SDS string instead of a Redis object.
+ *
+ * On I/O error NULL is returned.
+ */
+void *rdbGenericLoadStringObject(rio *rdb, int flags, size_t *lenptr) {
+ int plain = flags & RDB_LOAD_PLAIN;
+ int sds = flags & RDB_LOAD_SDS;
+ int isencoded;
+ unsigned long long len;
+
+ len = rdbLoadLen(rdb,&isencoded);
+ if (len == RDB_LENERR) return NULL;
+
+ if (isencoded) {
+ switch(len) {
+ case RDB_ENC_INT8:
+ case RDB_ENC_INT16:
+ case RDB_ENC_INT32:
+ return rdbLoadIntegerObject(rdb,len,flags,lenptr);
+ case RDB_ENC_LZF:
+ return rdbLoadLzfStringObject(rdb,flags,lenptr);
+ default:
+ rdbReportCorruptRDB("Unknown RDB string encoding type %llu",len);
+ return NULL;
+ }
+ }
+
+ if (plain || sds) {
+ void *buf = plain ? ztrymalloc(len) : sdstrynewlen(SDS_NOINIT,len);
+ if (!buf) {
+ serverLog(isRestoreContext()? LL_VERBOSE: LL_WARNING, "rdbGenericLoadStringObject failed allocating %llu bytes", len);
+ return NULL;
+ }
+ if (lenptr) *lenptr = len;
+ if (len && rioRead(rdb,buf,len) == 0) {
+ if (plain)
+ zfree(buf);
+ else
+ sdsfree(buf);
+ return NULL;
+ }
+ return buf;
+ } else {
+ robj *o = tryCreateStringObject(SDS_NOINIT,len);
+ if (!o) {
+ serverLog(isRestoreContext()? LL_VERBOSE: LL_WARNING, "rdbGenericLoadStringObject failed allocating %llu bytes", len);
+ return NULL;
+ }
+ if (len && rioRead(rdb,o->ptr,len) == 0) {
+ decrRefCount(o);
+ return NULL;
+ }
+ return o;
+ }
+}
+
+robj *rdbLoadStringObject(rio *rdb) {
+ return rdbGenericLoadStringObject(rdb,RDB_LOAD_NONE,NULL);
+}
+
+robj *rdbLoadEncodedStringObject(rio *rdb) {
+ return rdbGenericLoadStringObject(rdb,RDB_LOAD_ENC,NULL);
+}
+
+/* Save a double value. Doubles are saved as strings prefixed by an unsigned
+ * 8 bit integer specifying the length of the representation.
+ * This 8 bit integer has special values in order to specify the following
+ * conditions:
+ * 253: not a number
+ * 254: + inf
+ * 255: - inf
+ */
+int rdbSaveDoubleValue(rio *rdb, double val) {
+ unsigned char buf[128];
+ int len;
+
+ if (isnan(val)) {
+ buf[0] = 253;
+ len = 1;
+ } else if (!isfinite(val)) {
+ len = 1;
+ buf[0] = (val < 0) ? 255 : 254;
+ } else {
+ long long lvalue;
+ /* Integer printing function is much faster, check if we can safely use it. */
+ if (double2ll(val, &lvalue))
+ ll2string((char*)buf+1,sizeof(buf)-1,lvalue);
+ else {
+ const int dlen = fpconv_dtoa(val, (char*)buf+1);
+ buf[dlen+1] = '\0';
+ }
+ buf[0] = strlen((char*)buf+1);
+ len = buf[0]+1;
+ }
+ return rdbWriteRaw(rdb,buf,len);
+}
+
+/* For information about double serialization check rdbSaveDoubleValue() */
+int rdbLoadDoubleValue(rio *rdb, double *val) {
+ char buf[256];
+ unsigned char len;
+
+ if (rioRead(rdb,&len,1) == 0) return -1;
+ switch(len) {
+ case 255: *val = R_NegInf; return 0;
+ case 254: *val = R_PosInf; return 0;
+ case 253: *val = R_Nan; return 0;
+ default:
+ if (rioRead(rdb,buf,len) == 0) return -1;
+ buf[len] = '\0';
+ if (sscanf(buf, "%lg", val)!=1) return -1;
+ return 0;
+ }
+}
+
+/* Saves a double for RDB 8 or greater, where IE754 binary64 format is assumed.
+ * We just make sure the integer is always stored in little endian, otherwise
+ * the value is copied verbatim from memory to disk.
+ *
+ * Return -1 on error, the size of the serialized value on success. */
+int rdbSaveBinaryDoubleValue(rio *rdb, double val) {
+ memrev64ifbe(&val);
+ return rdbWriteRaw(rdb,&val,sizeof(val));
+}
+
+/* Loads a double from RDB 8 or greater. See rdbSaveBinaryDoubleValue() for
+ * more info. On error -1 is returned, otherwise 0. */
+int rdbLoadBinaryDoubleValue(rio *rdb, double *val) {
+ if (rioRead(rdb,val,sizeof(*val)) == 0) return -1;
+ memrev64ifbe(val);
+ return 0;
+}
+
+/* Like rdbSaveBinaryDoubleValue() but single precision. */
+int rdbSaveBinaryFloatValue(rio *rdb, float val) {
+ memrev32ifbe(&val);
+ return rdbWriteRaw(rdb,&val,sizeof(val));
+}
+
+/* Like rdbLoadBinaryDoubleValue() but single precision. */
+int rdbLoadBinaryFloatValue(rio *rdb, float *val) {
+ if (rioRead(rdb,val,sizeof(*val)) == 0) return -1;
+ memrev32ifbe(val);
+ return 0;
+}
+
+/* Save the object type of object "o". */
+int rdbSaveObjectType(rio *rdb, robj *o) {
+ switch (o->type) {
+ case OBJ_STRING:
+ return rdbSaveType(rdb,RDB_TYPE_STRING);
+ case OBJ_LIST:
+ if (o->encoding == OBJ_ENCODING_QUICKLIST || o->encoding == OBJ_ENCODING_LISTPACK)
+ return rdbSaveType(rdb, RDB_TYPE_LIST_QUICKLIST_2);
+ else
+ serverPanic("Unknown list encoding");
+ case OBJ_SET:
+ if (o->encoding == OBJ_ENCODING_INTSET)
+ return rdbSaveType(rdb,RDB_TYPE_SET_INTSET);
+ else if (o->encoding == OBJ_ENCODING_HT)
+ return rdbSaveType(rdb,RDB_TYPE_SET);
+ else if (o->encoding == OBJ_ENCODING_LISTPACK)
+ return rdbSaveType(rdb,RDB_TYPE_SET_LISTPACK);
+ else
+ serverPanic("Unknown set encoding");
+ case OBJ_ZSET:
+ if (o->encoding == OBJ_ENCODING_LISTPACK)
+ return rdbSaveType(rdb,RDB_TYPE_ZSET_LISTPACK);
+ else if (o->encoding == OBJ_ENCODING_SKIPLIST)
+ return rdbSaveType(rdb,RDB_TYPE_ZSET_2);
+ else
+ serverPanic("Unknown sorted set encoding");
+ case OBJ_HASH:
+ if (o->encoding == OBJ_ENCODING_LISTPACK)
+ return rdbSaveType(rdb,RDB_TYPE_HASH_LISTPACK);
+ else if (o->encoding == OBJ_ENCODING_HT)
+ return rdbSaveType(rdb,RDB_TYPE_HASH);
+ else
+ serverPanic("Unknown hash encoding");
+ case OBJ_STREAM:
+ return rdbSaveType(rdb,RDB_TYPE_STREAM_LISTPACKS_3);
+ case OBJ_MODULE:
+ return rdbSaveType(rdb,RDB_TYPE_MODULE_2);
+ default:
+ serverPanic("Unknown object type");
+ }
+ return -1; /* avoid warning */
+}
+
+/* Use rdbLoadType() to load a TYPE in RDB format, but returns -1 if the
+ * type is not specifically a valid Object Type. */
+int rdbLoadObjectType(rio *rdb) {
+ int type;
+ if ((type = rdbLoadType(rdb)) == -1) return -1;
+ if (!rdbIsObjectType(type)) return -1;
+ return type;
+}
+
+/* This helper function serializes a consumer group Pending Entries List (PEL)
+ * into the RDB file. The 'nacks' argument tells the function if also persist
+ * the information about the not acknowledged message, or if to persist
+ * just the IDs: this is useful because for the global consumer group PEL
+ * we serialized the NACKs as well, but when serializing the local consumer
+ * PELs we just add the ID, that will be resolved inside the global PEL to
+ * put a reference to the same structure. */
+ssize_t rdbSaveStreamPEL(rio *rdb, rax *pel, int nacks) {
+ ssize_t n, nwritten = 0;
+
+ /* Number of entries in the PEL. */
+ if ((n = rdbSaveLen(rdb,raxSize(pel))) == -1) return -1;
+ nwritten += n;
+
+ /* Save each entry. */
+ raxIterator ri;
+ raxStart(&ri,pel);
+ raxSeek(&ri,"^",NULL,0);
+ while(raxNext(&ri)) {
+ /* We store IDs in raw form as 128 big big endian numbers, like
+ * they are inside the radix tree key. */
+ if ((n = rdbWriteRaw(rdb,ri.key,sizeof(streamID))) == -1) {
+ raxStop(&ri);
+ return -1;
+ }
+ nwritten += n;
+
+ if (nacks) {
+ streamNACK *nack = ri.data;
+ if ((n = rdbSaveMillisecondTime(rdb,nack->delivery_time)) == -1) {
+ raxStop(&ri);
+ return -1;
+ }
+ nwritten += n;
+ if ((n = rdbSaveLen(rdb,nack->delivery_count)) == -1) {
+ raxStop(&ri);
+ return -1;
+ }
+ nwritten += n;
+ /* We don't save the consumer name: we'll save the pending IDs
+ * for each consumer in the consumer PEL, and resolve the consumer
+ * at loading time. */
+ }
+ }
+ raxStop(&ri);
+ return nwritten;
+}
+
+/* Serialize the consumers of a stream consumer group into the RDB. Helper
+ * function for the stream data type serialization. What we do here is to
+ * persist the consumer metadata, and it's PEL, for each consumer. */
+size_t rdbSaveStreamConsumers(rio *rdb, streamCG *cg) {
+ ssize_t n, nwritten = 0;
+
+ /* Number of consumers in this consumer group. */
+ if ((n = rdbSaveLen(rdb,raxSize(cg->consumers))) == -1) return -1;
+ nwritten += n;
+
+ /* Save each consumer. */
+ raxIterator ri;
+ raxStart(&ri,cg->consumers);
+ raxSeek(&ri,"^",NULL,0);
+ while(raxNext(&ri)) {
+ streamConsumer *consumer = ri.data;
+
+ /* Consumer name. */
+ if ((n = rdbSaveRawString(rdb,ri.key,ri.key_len)) == -1) {
+ raxStop(&ri);
+ return -1;
+ }
+ nwritten += n;
+
+ /* Seen time. */
+ if ((n = rdbSaveMillisecondTime(rdb,consumer->seen_time)) == -1) {
+ raxStop(&ri);
+ return -1;
+ }
+ nwritten += n;
+
+ /* Active time. */
+ if ((n = rdbSaveMillisecondTime(rdb,consumer->active_time)) == -1) {
+ raxStop(&ri);
+ return -1;
+ }
+ nwritten += n;
+
+ /* Consumer PEL, without the ACKs (see last parameter of the function
+ * passed with value of 0), at loading time we'll lookup the ID
+ * in the consumer group global PEL and will put a reference in the
+ * consumer local PEL. */
+ if ((n = rdbSaveStreamPEL(rdb,consumer->pel,0)) == -1) {
+ raxStop(&ri);
+ return -1;
+ }
+ nwritten += n;
+ }
+ raxStop(&ri);
+ return nwritten;
+}
+
+/* Save a Redis object.
+ * Returns -1 on error, number of bytes written on success. */
+ssize_t rdbSaveObject(rio *rdb, robj *o, robj *key, int dbid) {
+ ssize_t n = 0, nwritten = 0;
+
+ if (o->type == OBJ_STRING) {
+ /* Save a string value */
+ if ((n = rdbSaveStringObject(rdb,o)) == -1) return -1;
+ nwritten += n;
+ } else if (o->type == OBJ_LIST) {
+ /* Save a list value */
+ if (o->encoding == OBJ_ENCODING_QUICKLIST) {
+ quicklist *ql = o->ptr;
+ quicklistNode *node = ql->head;
+
+ if ((n = rdbSaveLen(rdb,ql->len)) == -1) return -1;
+ nwritten += n;
+
+ while(node) {
+ if ((n = rdbSaveLen(rdb,node->container)) == -1) return -1;
+ nwritten += n;
+
+ if (quicklistNodeIsCompressed(node)) {
+ void *data;
+ size_t compress_len = quicklistGetLzf(node, &data);
+ if ((n = rdbSaveLzfBlob(rdb,data,compress_len,node->sz)) == -1) return -1;
+ nwritten += n;
+ } else {
+ if ((n = rdbSaveRawString(rdb,node->entry,node->sz)) == -1) return -1;
+ nwritten += n;
+ }
+ node = node->next;
+ }
+ } else if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *lp = o->ptr;
+
+ /* Save list listpack as a fake quicklist that only has a single node. */
+ if ((n = rdbSaveLen(rdb,1)) == -1) return -1;
+ nwritten += n;
+ if ((n = rdbSaveLen(rdb,QUICKLIST_NODE_CONTAINER_PACKED)) == -1) return -1;
+ nwritten += n;
+ if ((n = rdbSaveRawString(rdb,lp,lpBytes(lp))) == -1) return -1;
+ nwritten += n;
+ } else {
+ serverPanic("Unknown list encoding");
+ }
+ } else if (o->type == OBJ_SET) {
+ /* Save a set value */
+ if (o->encoding == OBJ_ENCODING_HT) {
+ dict *set = o->ptr;
+ dictIterator *di = dictGetIterator(set);
+ dictEntry *de;
+
+ if ((n = rdbSaveLen(rdb,dictSize(set))) == -1) {
+ dictReleaseIterator(di);
+ return -1;
+ }
+ nwritten += n;
+
+ while((de = dictNext(di)) != NULL) {
+ sds ele = dictGetKey(de);
+ if ((n = rdbSaveRawString(rdb,(unsigned char*)ele,sdslen(ele)))
+ == -1)
+ {
+ dictReleaseIterator(di);
+ return -1;
+ }
+ nwritten += n;
+ }
+ dictReleaseIterator(di);
+ } else if (o->encoding == OBJ_ENCODING_INTSET) {
+ size_t l = intsetBlobLen((intset*)o->ptr);
+
+ if ((n = rdbSaveRawString(rdb,o->ptr,l)) == -1) return -1;
+ nwritten += n;
+ } else if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ size_t l = lpBytes((unsigned char *)o->ptr);
+ if ((n = rdbSaveRawString(rdb, o->ptr, l)) == -1) return -1;
+ nwritten += n;
+ } else {
+ serverPanic("Unknown set encoding");
+ }
+ } else if (o->type == OBJ_ZSET) {
+ /* Save a sorted set value */
+ if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ size_t l = lpBytes((unsigned char*)o->ptr);
+
+ if ((n = rdbSaveRawString(rdb,o->ptr,l)) == -1) return -1;
+ nwritten += n;
+ } else if (o->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = o->ptr;
+ zskiplist *zsl = zs->zsl;
+
+ if ((n = rdbSaveLen(rdb,zsl->length)) == -1) return -1;
+ nwritten += n;
+
+ /* We save the skiplist elements from the greatest to the smallest
+ * (that's trivial since the elements are already ordered in the
+ * skiplist): this improves the load process, since the next loaded
+ * element will always be the smaller, so adding to the skiplist
+ * will always immediately stop at the head, making the insertion
+ * O(1) instead of O(log(N)). */
+ zskiplistNode *zn = zsl->tail;
+ while (zn != NULL) {
+ if ((n = rdbSaveRawString(rdb,
+ (unsigned char*)zn->ele,sdslen(zn->ele))) == -1)
+ {
+ return -1;
+ }
+ nwritten += n;
+ if ((n = rdbSaveBinaryDoubleValue(rdb,zn->score)) == -1)
+ return -1;
+ nwritten += n;
+ zn = zn->backward;
+ }
+ } else {
+ serverPanic("Unknown sorted set encoding");
+ }
+ } else if (o->type == OBJ_HASH) {
+ /* Save a hash value */
+ if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ size_t l = lpBytes((unsigned char*)o->ptr);
+
+ if ((n = rdbSaveRawString(rdb,o->ptr,l)) == -1) return -1;
+ nwritten += n;
+ } else if (o->encoding == OBJ_ENCODING_HT) {
+ dictIterator *di = dictGetIterator(o->ptr);
+ dictEntry *de;
+
+ if ((n = rdbSaveLen(rdb,dictSize((dict*)o->ptr))) == -1) {
+ dictReleaseIterator(di);
+ return -1;
+ }
+ nwritten += n;
+
+ while((de = dictNext(di)) != NULL) {
+ sds field = dictGetKey(de);
+ sds value = dictGetVal(de);
+
+ if ((n = rdbSaveRawString(rdb,(unsigned char*)field,
+ sdslen(field))) == -1)
+ {
+ dictReleaseIterator(di);
+ return -1;
+ }
+ nwritten += n;
+ if ((n = rdbSaveRawString(rdb,(unsigned char*)value,
+ sdslen(value))) == -1)
+ {
+ dictReleaseIterator(di);
+ return -1;
+ }
+ nwritten += n;
+ }
+ dictReleaseIterator(di);
+ } else {
+ serverPanic("Unknown hash encoding");
+ }
+ } else if (o->type == OBJ_STREAM) {
+ /* Store how many listpacks we have inside the radix tree. */
+ stream *s = o->ptr;
+ rax *rax = s->rax;
+ if ((n = rdbSaveLen(rdb,raxSize(rax))) == -1) return -1;
+ nwritten += n;
+
+ /* Serialize all the listpacks inside the radix tree as they are,
+ * when loading back, we'll use the first entry of each listpack
+ * to insert it back into the radix tree. */
+ raxIterator ri;
+ raxStart(&ri,rax);
+ raxSeek(&ri,"^",NULL,0);
+ while (raxNext(&ri)) {
+ unsigned char *lp = ri.data;
+ size_t lp_bytes = lpBytes(lp);
+ if ((n = rdbSaveRawString(rdb,ri.key,ri.key_len)) == -1) {
+ raxStop(&ri);
+ return -1;
+ }
+ nwritten += n;
+ if ((n = rdbSaveRawString(rdb,lp,lp_bytes)) == -1) {
+ raxStop(&ri);
+ return -1;
+ }
+ nwritten += n;
+ }
+ raxStop(&ri);
+
+ /* Save the number of elements inside the stream. We cannot obtain
+ * this easily later, since our macro nodes should be checked for
+ * number of items: not a great CPU / space tradeoff. */
+ if ((n = rdbSaveLen(rdb,s->length)) == -1) return -1;
+ nwritten += n;
+ /* Save the last entry ID. */
+ if ((n = rdbSaveLen(rdb,s->last_id.ms)) == -1) return -1;
+ nwritten += n;
+ if ((n = rdbSaveLen(rdb,s->last_id.seq)) == -1) return -1;
+ nwritten += n;
+ /* Save the first entry ID. */
+ if ((n = rdbSaveLen(rdb,s->first_id.ms)) == -1) return -1;
+ nwritten += n;
+ if ((n = rdbSaveLen(rdb,s->first_id.seq)) == -1) return -1;
+ nwritten += n;
+ /* Save the maximal tombstone ID. */
+ if ((n = rdbSaveLen(rdb,s->max_deleted_entry_id.ms)) == -1) return -1;
+ nwritten += n;
+ if ((n = rdbSaveLen(rdb,s->max_deleted_entry_id.seq)) == -1) return -1;
+ nwritten += n;
+ /* Save the offset. */
+ if ((n = rdbSaveLen(rdb,s->entries_added)) == -1) return -1;
+ nwritten += n;
+
+ /* The consumer groups and their clients are part of the stream
+ * type, so serialize every consumer group. */
+
+ /* Save the number of groups. */
+ size_t num_cgroups = s->cgroups ? raxSize(s->cgroups) : 0;
+ if ((n = rdbSaveLen(rdb,num_cgroups)) == -1) return -1;
+ nwritten += n;
+
+ if (num_cgroups) {
+ /* Serialize each consumer group. */
+ raxStart(&ri,s->cgroups);
+ raxSeek(&ri,"^",NULL,0);
+ while(raxNext(&ri)) {
+ streamCG *cg = ri.data;
+
+ /* Save the group name. */
+ if ((n = rdbSaveRawString(rdb,ri.key,ri.key_len)) == -1) {
+ raxStop(&ri);
+ return -1;
+ }
+ nwritten += n;
+
+ /* Last ID. */
+ if ((n = rdbSaveLen(rdb,cg->last_id.ms)) == -1) {
+ raxStop(&ri);
+ return -1;
+ }
+ nwritten += n;
+ if ((n = rdbSaveLen(rdb,cg->last_id.seq)) == -1) {
+ raxStop(&ri);
+ return -1;
+ }
+ nwritten += n;
+
+ /* Save the group's logical reads counter. */
+ if ((n = rdbSaveLen(rdb,cg->entries_read)) == -1) {
+ raxStop(&ri);
+ return -1;
+ }
+ nwritten += n;
+
+ /* Save the global PEL. */
+ if ((n = rdbSaveStreamPEL(rdb,cg->pel,1)) == -1) {
+ raxStop(&ri);
+ return -1;
+ }
+ nwritten += n;
+
+ /* Save the consumers of this group. */
+ if ((n = rdbSaveStreamConsumers(rdb,cg)) == -1) {
+ raxStop(&ri);
+ return -1;
+ }
+ nwritten += n;
+ }
+ raxStop(&ri);
+ }
+ } else if (o->type == OBJ_MODULE) {
+ /* Save a module-specific value. */
+ RedisModuleIO io;
+ moduleValue *mv = o->ptr;
+ moduleType *mt = mv->type;
+
+ /* Write the "module" identifier as prefix, so that we'll be able
+ * to call the right module during loading. */
+ int retval = rdbSaveLen(rdb,mt->id);
+ if (retval == -1) return -1;
+ moduleInitIOContext(io,mt,rdb,key,dbid);
+ io.bytes += retval;
+
+ /* Then write the module-specific representation + EOF marker. */
+ mt->rdb_save(&io,mv->value);
+ retval = rdbSaveLen(rdb,RDB_MODULE_OPCODE_EOF);
+ if (retval == -1)
+ io.error = 1;
+ else
+ io.bytes += retval;
+
+ if (io.ctx) {
+ moduleFreeContext(io.ctx);
+ zfree(io.ctx);
+ }
+ return io.error ? -1 : (ssize_t)io.bytes;
+ } else {
+ serverPanic("Unknown object type");
+ }
+ return nwritten;
+}
+
+/* Return the length the object will have on disk if saved with
+ * the rdbSaveObject() function. Currently we use a trick to get
+ * this length with very little changes to the code. In the future
+ * we could switch to a faster solution. */
+size_t rdbSavedObjectLen(robj *o, robj *key, int dbid) {
+ ssize_t len = rdbSaveObject(NULL,o,key,dbid);
+ serverAssertWithInfo(NULL,o,len != -1);
+ return len;
+}
+
+/* Save a key-value pair, with expire time, type, key, value.
+ * On error -1 is returned.
+ * On success if the key was actually saved 1 is returned. */
+int rdbSaveKeyValuePair(rio *rdb, robj *key, robj *val, long long expiretime, int dbid) {
+ int savelru = server.maxmemory_policy & MAXMEMORY_FLAG_LRU;
+ int savelfu = server.maxmemory_policy & MAXMEMORY_FLAG_LFU;
+
+ /* Save the expire time */
+ if (expiretime != -1) {
+ if (rdbSaveType(rdb,RDB_OPCODE_EXPIRETIME_MS) == -1) return -1;
+ if (rdbSaveMillisecondTime(rdb,expiretime) == -1) return -1;
+ }
+
+ /* Save the LRU info. */
+ if (savelru) {
+ uint64_t idletime = estimateObjectIdleTime(val);
+ idletime /= 1000; /* Using seconds is enough and requires less space.*/
+ if (rdbSaveType(rdb,RDB_OPCODE_IDLE) == -1) return -1;
+ if (rdbSaveLen(rdb,idletime) == -1) return -1;
+ }
+
+ /* Save the LFU info. */
+ if (savelfu) {
+ uint8_t buf[1];
+ buf[0] = LFUDecrAndReturn(val);
+ /* We can encode this in exactly two bytes: the opcode and an 8
+ * bit counter, since the frequency is logarithmic with a 0-255 range.
+ * Note that we do not store the halving time because to reset it
+ * a single time when loading does not affect the frequency much. */
+ if (rdbSaveType(rdb,RDB_OPCODE_FREQ) == -1) return -1;
+ if (rdbWriteRaw(rdb,buf,1) == -1) return -1;
+ }
+
+ /* Save type, key, value */
+ if (rdbSaveObjectType(rdb,val) == -1) return -1;
+ if (rdbSaveStringObject(rdb,key) == -1) return -1;
+ if (rdbSaveObject(rdb,val,key,dbid) == -1) return -1;
+
+ /* Delay return if required (for testing) */
+ if (server.rdb_key_save_delay)
+ debugDelay(server.rdb_key_save_delay);
+
+ return 1;
+}
+
+/* Save an AUX field. */
+ssize_t rdbSaveAuxField(rio *rdb, void *key, size_t keylen, void *val, size_t vallen) {
+ ssize_t ret, len = 0;
+ if ((ret = rdbSaveType(rdb,RDB_OPCODE_AUX)) == -1) return -1;
+ len += ret;
+ if ((ret = rdbSaveRawString(rdb,key,keylen)) == -1) return -1;
+ len += ret;
+ if ((ret = rdbSaveRawString(rdb,val,vallen)) == -1) return -1;
+ len += ret;
+ return len;
+}
+
+/* Wrapper for rdbSaveAuxField() used when key/val length can be obtained
+ * with strlen(). */
+ssize_t rdbSaveAuxFieldStrStr(rio *rdb, char *key, char *val) {
+ return rdbSaveAuxField(rdb,key,strlen(key),val,strlen(val));
+}
+
+/* Wrapper for strlen(key) + integer type (up to long long range). */
+ssize_t rdbSaveAuxFieldStrInt(rio *rdb, char *key, long long val) {
+ char buf[LONG_STR_SIZE];
+ int vlen = ll2string(buf,sizeof(buf),val);
+ return rdbSaveAuxField(rdb,key,strlen(key),buf,vlen);
+}
+
+/* Save a few default AUX fields with information about the RDB generated. */
+int rdbSaveInfoAuxFields(rio *rdb, int rdbflags, rdbSaveInfo *rsi) {
+ int redis_bits = (sizeof(void*) == 8) ? 64 : 32;
+ int aof_base = (rdbflags & RDBFLAGS_AOF_PREAMBLE) != 0;
+
+ /* Add a few fields about the state when the RDB was created. */
+ if (rdbSaveAuxFieldStrStr(rdb,"redis-ver",REDIS_VERSION) == -1) return -1;
+ if (rdbSaveAuxFieldStrInt(rdb,"redis-bits",redis_bits) == -1) return -1;
+ if (rdbSaveAuxFieldStrInt(rdb,"ctime",time(NULL)) == -1) return -1;
+ if (rdbSaveAuxFieldStrInt(rdb,"used-mem",zmalloc_used_memory()) == -1) return -1;
+
+ /* Handle saving options that generate aux fields. */
+ if (rsi) {
+ if (rdbSaveAuxFieldStrInt(rdb,"repl-stream-db",rsi->repl_stream_db)
+ == -1) return -1;
+ if (rdbSaveAuxFieldStrStr(rdb,"repl-id",server.replid)
+ == -1) return -1;
+ if (rdbSaveAuxFieldStrInt(rdb,"repl-offset",server.master_repl_offset)
+ == -1) return -1;
+ }
+ if (rdbSaveAuxFieldStrInt(rdb, "aof-base", aof_base) == -1) return -1;
+ return 1;
+}
+
+ssize_t rdbSaveSingleModuleAux(rio *rdb, int when, moduleType *mt) {
+ /* Save a module-specific aux value. */
+ RedisModuleIO io;
+ int retval = 0;
+ moduleInitIOContext(io,mt,rdb,NULL,-1);
+
+ /* We save the AUX field header in a temporary buffer so we can support aux_save2 API.
+ * If aux_save2 is used the buffer will be flushed at the first time the module will perform
+ * a write operation to the RDB and will be ignored is case there was no writes. */
+ rio aux_save_headers_rio;
+ rioInitWithBuffer(&aux_save_headers_rio, sdsempty());
+
+ if (rdbSaveType(&aux_save_headers_rio, RDB_OPCODE_MODULE_AUX) == -1) goto error;
+
+ /* Write the "module" identifier as prefix, so that we'll be able
+ * to call the right module during loading. */
+ if (rdbSaveLen(&aux_save_headers_rio,mt->id) == -1) goto error;
+
+ /* write the 'when' so that we can provide it on loading. add a UINT opcode
+ * for backwards compatibility, everything after the MT needs to be prefixed
+ * by an opcode. */
+ if (rdbSaveLen(&aux_save_headers_rio,RDB_MODULE_OPCODE_UINT) == -1) goto error;
+ if (rdbSaveLen(&aux_save_headers_rio,when) == -1) goto error;
+
+ /* Then write the module-specific representation + EOF marker. */
+ if (mt->aux_save2) {
+ io.pre_flush_buffer = aux_save_headers_rio.io.buffer.ptr;
+ mt->aux_save2(&io,when);
+ if (io.pre_flush_buffer) {
+ /* aux_save did not save any data to the RDB.
+ * We will avoid saving any data related to this aux type
+ * to allow loading this RDB if the module is not present. */
+ sdsfree(io.pre_flush_buffer);
+ io.pre_flush_buffer = NULL;
+ return 0;
+ }
+ } else {
+ /* Write headers now, aux_save does not do lazy saving of the headers. */
+ retval = rdbWriteRaw(rdb, aux_save_headers_rio.io.buffer.ptr, sdslen(aux_save_headers_rio.io.buffer.ptr));
+ if (retval == -1) goto error;
+ io.bytes += retval;
+ sdsfree(aux_save_headers_rio.io.buffer.ptr);
+ mt->aux_save(&io,when);
+ }
+ retval = rdbSaveLen(rdb,RDB_MODULE_OPCODE_EOF);
+ serverAssert(!io.pre_flush_buffer);
+ if (retval == -1)
+ io.error = 1;
+ else
+ io.bytes += retval;
+
+ if (io.ctx) {
+ moduleFreeContext(io.ctx);
+ zfree(io.ctx);
+ }
+ if (io.error)
+ return -1;
+ return io.bytes;
+error:
+ sdsfree(aux_save_headers_rio.io.buffer.ptr);
+ return -1;
+}
+
+ssize_t rdbSaveFunctions(rio *rdb) {
+ dict *functions = functionsLibGet();
+ dictIterator *iter = dictGetIterator(functions);
+ dictEntry *entry = NULL;
+ ssize_t written = 0;
+ ssize_t ret;
+ while ((entry = dictNext(iter))) {
+ if ((ret = rdbSaveType(rdb, RDB_OPCODE_FUNCTION2)) < 0) goto werr;
+ written += ret;
+ functionLibInfo *li = dictGetVal(entry);
+ if ((ret = rdbSaveRawString(rdb, (unsigned char *) li->code, sdslen(li->code))) < 0) goto werr;
+ written += ret;
+ }
+ dictReleaseIterator(iter);
+ return written;
+
+werr:
+ dictReleaseIterator(iter);
+ return -1;
+}
+
+ssize_t rdbSaveDb(rio *rdb, int dbid, int rdbflags, long *key_counter) {
+ dictIterator *di;
+ dictEntry *de;
+ ssize_t written = 0;
+ ssize_t res;
+ static long long info_updated_time = 0;
+ char *pname = (rdbflags & RDBFLAGS_AOF_PREAMBLE) ? "AOF rewrite" : "RDB";
+
+ redisDb *db = server.db + dbid;
+ dict *d = db->dict;
+ if (dictSize(d) == 0) return 0;
+ di = dictGetSafeIterator(d);
+
+ /* Write the SELECT DB opcode */
+ if ((res = rdbSaveType(rdb,RDB_OPCODE_SELECTDB)) < 0) goto werr;
+ written += res;
+ if ((res = rdbSaveLen(rdb, dbid)) < 0) goto werr;
+ written += res;
+
+ /* Write the RESIZE DB opcode. */
+ uint64_t db_size, expires_size;
+ db_size = dictSize(db->dict);
+ expires_size = dictSize(db->expires);
+ if ((res = rdbSaveType(rdb,RDB_OPCODE_RESIZEDB)) < 0) goto werr;
+ written += res;
+ if ((res = rdbSaveLen(rdb,db_size)) < 0) goto werr;
+ written += res;
+ if ((res = rdbSaveLen(rdb,expires_size)) < 0) goto werr;
+ written += res;
+
+ /* Iterate this DB writing every entry */
+ while((de = dictNext(di)) != NULL) {
+ sds keystr = dictGetKey(de);
+ robj key, *o = dictGetVal(de);
+ long long expire;
+ size_t rdb_bytes_before_key = rdb->processed_bytes;
+
+ initStaticStringObject(key,keystr);
+ expire = getExpire(db,&key);
+ if ((res = rdbSaveKeyValuePair(rdb, &key, o, expire, dbid)) < 0) goto werr;
+ written += res;
+
+ /* In fork child process, we can try to release memory back to the
+ * OS and possibly avoid or decrease COW. We give the dismiss
+ * mechanism a hint about an estimated size of the object we stored. */
+ size_t dump_size = rdb->processed_bytes - rdb_bytes_before_key;
+ if (server.in_fork_child) dismissObject(o, dump_size);
+
+ /* Update child info every 1 second (approximately).
+ * in order to avoid calling mstime() on each iteration, we will
+ * check the diff every 1024 keys */
+ if (((*key_counter)++ & 1023) == 0) {
+ long long now = mstime();
+ if (now - info_updated_time >= 1000) {
+ sendChildInfo(CHILD_INFO_TYPE_CURRENT_INFO, *key_counter, pname);
+ info_updated_time = now;
+ }
+ }
+ }
+
+ dictReleaseIterator(di);
+ return written;
+
+werr:
+ dictReleaseIterator(di);
+ return -1;
+}
+
+/* Produces a dump of the database in RDB format sending it to the specified
+ * Redis I/O channel. On success C_OK is returned, otherwise C_ERR
+ * is returned and part of the output, or all the output, can be
+ * missing because of I/O errors.
+ *
+ * When the function returns C_ERR and if 'error' is not NULL, the
+ * integer pointed by 'error' is set to the value of errno just after the I/O
+ * error. */
+int rdbSaveRio(int req, rio *rdb, int *error, int rdbflags, rdbSaveInfo *rsi) {
+ char magic[10];
+ uint64_t cksum;
+ long key_counter = 0;
+ int j;
+
+ if (server.rdb_checksum)
+ rdb->update_cksum = rioGenericUpdateChecksum;
+ snprintf(magic,sizeof(magic),"REDIS%04d",RDB_VERSION);
+ if (rdbWriteRaw(rdb,magic,9) == -1) goto werr;
+ if (rdbSaveInfoAuxFields(rdb,rdbflags,rsi) == -1) goto werr;
+ if (!(req & SLAVE_REQ_RDB_EXCLUDE_DATA) && rdbSaveModulesAux(rdb, REDISMODULE_AUX_BEFORE_RDB) == -1) goto werr;
+
+ /* save functions */
+ if (!(req & SLAVE_REQ_RDB_EXCLUDE_FUNCTIONS) && rdbSaveFunctions(rdb) == -1) goto werr;
+
+ /* save all databases, skip this if we're in functions-only mode */
+ if (!(req & SLAVE_REQ_RDB_EXCLUDE_DATA)) {
+ for (j = 0; j < server.dbnum; j++) {
+ if (rdbSaveDb(rdb, j, rdbflags, &key_counter) == -1) goto werr;
+ }
+ }
+
+ if (!(req & SLAVE_REQ_RDB_EXCLUDE_DATA) && rdbSaveModulesAux(rdb, REDISMODULE_AUX_AFTER_RDB) == -1) goto werr;
+
+ /* EOF opcode */
+ if (rdbSaveType(rdb,RDB_OPCODE_EOF) == -1) goto werr;
+
+ /* CRC64 checksum. It will be zero if checksum computation is disabled, the
+ * loading code skips the check in this case. */
+ cksum = rdb->cksum;
+ memrev64ifbe(&cksum);
+ if (rioWrite(rdb,&cksum,8) == 0) goto werr;
+ return C_OK;
+
+werr:
+ if (error) *error = errno;
+ return C_ERR;
+}
+
+/* This is just a wrapper to rdbSaveRio() that additionally adds a prefix
+ * and a suffix to the generated RDB dump. The prefix is:
+ *
+ * $EOF:<40 bytes unguessable hex string>\r\n
+ *
+ * While the suffix is the 40 bytes hex string we announced in the prefix.
+ * This way processes receiving the payload can understand when it ends
+ * without doing any processing of the content. */
+int rdbSaveRioWithEOFMark(int req, rio *rdb, int *error, rdbSaveInfo *rsi) {
+ char eofmark[RDB_EOF_MARK_SIZE];
+
+ startSaving(RDBFLAGS_REPLICATION);
+ getRandomHexChars(eofmark,RDB_EOF_MARK_SIZE);
+ if (error) *error = 0;
+ if (rioWrite(rdb,"$EOF:",5) == 0) goto werr;
+ if (rioWrite(rdb,eofmark,RDB_EOF_MARK_SIZE) == 0) goto werr;
+ if (rioWrite(rdb,"\r\n",2) == 0) goto werr;
+ if (rdbSaveRio(req,rdb,error,RDBFLAGS_NONE,rsi) == C_ERR) goto werr;
+ if (rioWrite(rdb,eofmark,RDB_EOF_MARK_SIZE) == 0) goto werr;
+ stopSaving(1);
+ return C_OK;
+
+werr: /* Write error. */
+ /* Set 'error' only if not already set by rdbSaveRio() call. */
+ if (error && *error == 0) *error = errno;
+ stopSaving(0);
+ return C_ERR;
+}
+
+static int rdbSaveInternal(int req, const char *filename, rdbSaveInfo *rsi, int rdbflags) {
+ char cwd[MAXPATHLEN]; /* Current working dir path for error messages. */
+ rio rdb;
+ int error = 0;
+ int saved_errno;
+ char *err_op; /* For a detailed log */
+
+ FILE *fp = fopen(filename,"w");
+ if (!fp) {
+ saved_errno = errno;
+ char *str_err = strerror(errno);
+ char *cwdp = getcwd(cwd,MAXPATHLEN);
+ serverLog(LL_WARNING,
+ "Failed opening the temp RDB file %s (in server root dir %s) "
+ "for saving: %s",
+ filename,
+ cwdp ? cwdp : "unknown",
+ str_err);
+ errno = saved_errno;
+ return C_ERR;
+ }
+
+ rioInitWithFile(&rdb,fp);
+
+ if (server.rdb_save_incremental_fsync) {
+ rioSetAutoSync(&rdb,REDIS_AUTOSYNC_BYTES);
+ if (!(rdbflags & RDBFLAGS_KEEP_CACHE)) rioSetReclaimCache(&rdb,1);
+ }
+
+ if (rdbSaveRio(req,&rdb,&error,rdbflags,rsi) == C_ERR) {
+ errno = error;
+ err_op = "rdbSaveRio";
+ goto werr;
+ }
+
+ /* Make sure data will not remain on the OS's output buffers */
+ if (fflush(fp)) { err_op = "fflush"; goto werr; }
+ if (fsync(fileno(fp))) { err_op = "fsync"; goto werr; }
+ if (!(rdbflags & RDBFLAGS_KEEP_CACHE) && reclaimFilePageCache(fileno(fp), 0, 0) == -1) {
+ serverLog(LL_NOTICE,"Unable to reclaim cache after saving RDB: %s", strerror(errno));
+ }
+ if (fclose(fp)) { fp = NULL; err_op = "fclose"; goto werr; }
+
+ return C_OK;
+
+werr:
+ saved_errno = errno;
+ serverLog(LL_WARNING,"Write error while saving DB to the disk(%s): %s", err_op, strerror(errno));
+ if (fp) fclose(fp);
+ unlink(filename);
+ errno = saved_errno;
+ return C_ERR;
+}
+
+/* Save DB to the file. Similar to rdbSave() but this function won't use a
+ * temporary file and won't update the metrics. */
+int rdbSaveToFile(const char *filename) {
+ startSaving(RDBFLAGS_NONE);
+
+ if (rdbSaveInternal(SLAVE_REQ_NONE,filename,NULL,RDBFLAGS_NONE) != C_OK) {
+ int saved_errno = errno;
+ stopSaving(0);
+ errno = saved_errno;
+ return C_ERR;
+ }
+
+ stopSaving(1);
+ return C_OK;
+}
+
+/* Save the DB on disk. Return C_ERR on error, C_OK on success. */
+int rdbSave(int req, char *filename, rdbSaveInfo *rsi, int rdbflags) {
+ char tmpfile[256];
+ char cwd[MAXPATHLEN]; /* Current working dir path for error messages. */
+
+ startSaving(RDBFLAGS_NONE);
+ snprintf(tmpfile,256,"temp-%d.rdb", (int) getpid());
+
+ if (rdbSaveInternal(req,tmpfile,rsi,rdbflags) != C_OK) {
+ stopSaving(0);
+ return C_ERR;
+ }
+
+ /* Use RENAME to make sure the DB file is changed atomically only
+ * if the generate DB file is ok. */
+ if (rename(tmpfile,filename) == -1) {
+ char *str_err = strerror(errno);
+ char *cwdp = getcwd(cwd,MAXPATHLEN);
+ serverLog(LL_WARNING,
+ "Error moving temp DB file %s on the final "
+ "destination %s (in server root dir %s): %s",
+ tmpfile,
+ filename,
+ cwdp ? cwdp : "unknown",
+ str_err);
+ unlink(tmpfile);
+ stopSaving(0);
+ return C_ERR;
+ }
+ if (fsyncFileDir(filename) != 0) {
+ serverLog(LL_WARNING,
+ "Failed to fsync directory while saving DB: %s", strerror(errno));
+ stopSaving(0);
+ return C_ERR;
+ }
+
+ serverLog(LL_NOTICE,"DB saved on disk");
+ server.dirty = 0;
+ server.lastsave = time(NULL);
+ server.lastbgsave_status = C_OK;
+ stopSaving(1);
+ return C_OK;
+}
+
+int rdbSaveBackground(int req, char *filename, rdbSaveInfo *rsi, int rdbflags) {
+ pid_t childpid;
+
+ if (hasActiveChildProcess()) return C_ERR;
+ server.stat_rdb_saves++;
+
+ server.dirty_before_bgsave = server.dirty;
+ server.lastbgsave_try = time(NULL);
+
+ if ((childpid = redisFork(CHILD_TYPE_RDB)) == 0) {
+ int retval;
+
+ /* Child */
+ redisSetProcTitle("redis-rdb-bgsave");
+ redisSetCpuAffinity(server.bgsave_cpulist);
+ retval = rdbSave(req, filename,rsi,rdbflags);
+ if (retval == C_OK) {
+ sendChildCowInfo(CHILD_INFO_TYPE_RDB_COW_SIZE, "RDB");
+ }
+ exitFromChild((retval == C_OK) ? 0 : 1);
+ } else {
+ /* Parent */
+ if (childpid == -1) {
+ server.lastbgsave_status = C_ERR;
+ serverLog(LL_WARNING,"Can't save in background: fork: %s",
+ strerror(errno));
+ return C_ERR;
+ }
+ serverLog(LL_NOTICE,"Background saving started by pid %ld",(long) childpid);
+ server.rdb_save_time_start = time(NULL);
+ server.rdb_child_type = RDB_CHILD_TYPE_DISK;
+ return C_OK;
+ }
+ return C_OK; /* unreached */
+}
+
+/* Note that we may call this function in signal handle 'sigShutdownHandler',
+ * so we need guarantee all functions we call are async-signal-safe.
+ * If we call this function from signal handle, we won't call bg_unlink that
+ * is not async-signal-safe. */
+void rdbRemoveTempFile(pid_t childpid, int from_signal) {
+ char tmpfile[256];
+ char pid[32];
+
+ /* Generate temp rdb file name using async-signal safe functions. */
+ ll2string(pid, sizeof(pid), childpid);
+ redis_strlcpy(tmpfile, "temp-", sizeof(tmpfile));
+ redis_strlcat(tmpfile, pid, sizeof(tmpfile));
+ redis_strlcat(tmpfile, ".rdb", sizeof(tmpfile));
+
+ if (from_signal) {
+ /* bg_unlink is not async-signal-safe, but in this case we don't really
+ * need to close the fd, it'll be released when the process exists. */
+ int fd = open(tmpfile, O_RDONLY|O_NONBLOCK);
+ UNUSED(fd);
+ unlink(tmpfile);
+ } else {
+ bg_unlink(tmpfile);
+ }
+}
+
+/* This function is called by rdbLoadObject() when the code is in RDB-check
+ * mode and we find a module value of type 2 that can be parsed without
+ * the need of the actual module. The value is parsed for errors, finally
+ * a dummy redis object is returned just to conform to the API. */
+robj *rdbLoadCheckModuleValue(rio *rdb, char *modulename) {
+ uint64_t opcode;
+ while((opcode = rdbLoadLen(rdb,NULL)) != RDB_MODULE_OPCODE_EOF) {
+ if (opcode == RDB_MODULE_OPCODE_SINT ||
+ opcode == RDB_MODULE_OPCODE_UINT)
+ {
+ uint64_t len;
+ if (rdbLoadLenByRef(rdb,NULL,&len) == -1) {
+ rdbReportCorruptRDB(
+ "Error reading integer from module %s value", modulename);
+ }
+ } else if (opcode == RDB_MODULE_OPCODE_STRING) {
+ robj *o = rdbGenericLoadStringObject(rdb,RDB_LOAD_NONE,NULL);
+ if (o == NULL) {
+ rdbReportCorruptRDB(
+ "Error reading string from module %s value", modulename);
+ }
+ decrRefCount(o);
+ } else if (opcode == RDB_MODULE_OPCODE_FLOAT) {
+ float val;
+ if (rdbLoadBinaryFloatValue(rdb,&val) == -1) {
+ rdbReportCorruptRDB(
+ "Error reading float from module %s value", modulename);
+ }
+ } else if (opcode == RDB_MODULE_OPCODE_DOUBLE) {
+ double val;
+ if (rdbLoadBinaryDoubleValue(rdb,&val) == -1) {
+ rdbReportCorruptRDB(
+ "Error reading double from module %s value", modulename);
+ }
+ }
+ }
+ return createStringObject("module-dummy-value",18);
+}
+
+/* callback for hashZiplistConvertAndValidateIntegrity.
+ * Check that the ziplist doesn't have duplicate hash field names.
+ * The ziplist element pointed by 'p' will be converted and stored into listpack. */
+static int _ziplistPairsEntryConvertAndValidate(unsigned char *p, unsigned int head_count, void *userdata) {
+ unsigned char *str;
+ unsigned int slen;
+ long long vll;
+
+ struct {
+ long count;
+ dict *fields;
+ unsigned char **lp;
+ } *data = userdata;
+
+ if (data->fields == NULL) {
+ data->fields = dictCreate(&hashDictType);
+ dictExpand(data->fields, head_count/2);
+ }
+
+ if (!ziplistGet(p, &str, &slen, &vll))
+ return 0;
+
+ /* Even records are field names, add to dict and check that's not a dup */
+ if (((data->count) & 1) == 0) {
+ sds field = str? sdsnewlen(str, slen): sdsfromlonglong(vll);
+ if (dictAdd(data->fields, field, NULL) != DICT_OK) {
+ /* Duplicate, return an error */
+ sdsfree(field);
+ return 0;
+ }
+ }
+
+ if (str) {
+ *(data->lp) = lpAppend(*(data->lp), (unsigned char*)str, slen);
+ } else {
+ *(data->lp) = lpAppendInteger(*(data->lp), vll);
+ }
+
+ (data->count)++;
+ return 1;
+}
+
+/* Validate the integrity of the data structure while converting it to
+ * listpack and storing it at 'lp'.
+ * The function is safe to call on non-validated ziplists, it returns 0
+ * when encounter an integrity validation issue. */
+int ziplistPairsConvertAndValidateIntegrity(unsigned char *zl, size_t size, unsigned char **lp) {
+ /* Keep track of the field names to locate duplicate ones */
+ struct {
+ long count;
+ dict *fields; /* Initialisation at the first callback. */
+ unsigned char **lp;
+ } data = {0, NULL, lp};
+
+ int ret = ziplistValidateIntegrity(zl, size, 1, _ziplistPairsEntryConvertAndValidate, &data);
+
+ /* make sure we have an even number of records. */
+ if (data.count & 1)
+ ret = 0;
+
+ if (data.fields) dictRelease(data.fields);
+ return ret;
+}
+
+/* callback for ziplistValidateIntegrity.
+ * The ziplist element pointed by 'p' will be converted and stored into listpack. */
+static int _ziplistEntryConvertAndValidate(unsigned char *p, unsigned int head_count, void *userdata) {
+ UNUSED(head_count);
+ unsigned char *str;
+ unsigned int slen;
+ long long vll;
+ unsigned char **lp = (unsigned char**)userdata;
+
+ if (!ziplistGet(p, &str, &slen, &vll)) return 0;
+
+ if (str)
+ *lp = lpAppend(*lp, (unsigned char*)str, slen);
+ else
+ *lp = lpAppendInteger(*lp, vll);
+
+ return 1;
+}
+
+/* callback for ziplistValidateIntegrity.
+ * The ziplist element pointed by 'p' will be converted and stored into quicklist. */
+static int _listZiplistEntryConvertAndValidate(unsigned char *p, unsigned int head_count, void *userdata) {
+ UNUSED(head_count);
+ unsigned char *str;
+ unsigned int slen;
+ long long vll;
+ char longstr[32] = {0};
+ quicklist *ql = (quicklist*)userdata;
+
+ if (!ziplistGet(p, &str, &slen, &vll)) return 0;
+ if (!str) {
+ /* Write the longval as a string so we can re-add it */
+ slen = ll2string(longstr, sizeof(longstr), vll);
+ str = (unsigned char *)longstr;
+ }
+ quicklistPushTail(ql, str, slen);
+ return 1;
+}
+
+/* callback for to check the listpack doesn't have duplicate records */
+static int _lpEntryValidation(unsigned char *p, unsigned int head_count, void *userdata) {
+ struct {
+ int pairs;
+ long count;
+ dict *fields;
+ } *data = userdata;
+
+ if (data->fields == NULL) {
+ data->fields = dictCreate(&hashDictType);
+ dictExpand(data->fields, data->pairs ? head_count/2 : head_count);
+ }
+
+ /* If we're checking pairs, then even records are field names. Otherwise
+ * we're checking all elements. Add to dict and check that's not a dup */
+ if (!data->pairs || ((data->count) & 1) == 0) {
+ unsigned char *str;
+ int64_t slen;
+ unsigned char buf[LP_INTBUF_SIZE];
+
+ str = lpGet(p, &slen, buf);
+ sds field = sdsnewlen(str, slen);
+ if (dictAdd(data->fields, field, NULL) != DICT_OK) {
+ /* Duplicate, return an error */
+ sdsfree(field);
+ return 0;
+ }
+ }
+
+ (data->count)++;
+ return 1;
+}
+
+/* Validate the integrity of the listpack structure.
+ * when `deep` is 0, only the integrity of the header is validated.
+ * when `deep` is 1, we scan all the entries one by one.
+ * when `pairs` is 0, all elements need to be unique (it's a set)
+ * when `pairs` is 1, odd elements need to be unique (it's a key-value map) */
+int lpValidateIntegrityAndDups(unsigned char *lp, size_t size, int deep, int pairs) {
+ if (!deep)
+ return lpValidateIntegrity(lp, size, 0, NULL, NULL);
+
+ /* Keep track of the field names to locate duplicate ones */
+ struct {
+ int pairs;
+ long count;
+ dict *fields; /* Initialisation at the first callback. */
+ } data = {pairs, 0, NULL};
+
+ int ret = lpValidateIntegrity(lp, size, 1, _lpEntryValidation, &data);
+
+ /* make sure we have an even number of records. */
+ if (pairs && data.count & 1)
+ ret = 0;
+
+ if (data.fields) dictRelease(data.fields);
+ return ret;
+}
+
+/* Load a Redis object of the specified type from the specified file.
+ * On success a newly allocated object is returned, otherwise NULL.
+ * When the function returns NULL and if 'error' is not NULL, the
+ * integer pointed by 'error' is set to the type of error that occurred */
+robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
+ robj *o = NULL, *ele, *dec;
+ uint64_t len;
+ unsigned int i;
+
+ /* Set default error of load object, it will be set to 0 on success. */
+ if (error) *error = RDB_LOAD_ERR_OTHER;
+
+ int deep_integrity_validation = server.sanitize_dump_payload == SANITIZE_DUMP_YES;
+ if (server.sanitize_dump_payload == SANITIZE_DUMP_CLIENTS) {
+ /* Skip sanitization when loading (an RDB), or getting a RESTORE command
+ * from either the master or a client using an ACL user with the skip-sanitize-payload flag. */
+ int skip = server.loading ||
+ (server.current_client && (server.current_client->flags & CLIENT_MASTER));
+ if (!skip && server.current_client && server.current_client->user)
+ skip = !!(server.current_client->user->flags & USER_FLAG_SANITIZE_PAYLOAD_SKIP);
+ deep_integrity_validation = !skip;
+ }
+
+ if (rdbtype == RDB_TYPE_STRING) {
+ /* Read string value */
+ if ((o = rdbLoadEncodedStringObject(rdb)) == NULL) return NULL;
+ o = tryObjectEncodingEx(o, 0);
+ } else if (rdbtype == RDB_TYPE_LIST) {
+ /* Read list value */
+ if ((len = rdbLoadLen(rdb,NULL)) == RDB_LENERR) return NULL;
+ if (len == 0) goto emptykey;
+
+ o = createQuicklistObject();
+ quicklistSetOptions(o->ptr, server.list_max_listpack_size,
+ server.list_compress_depth);
+
+ /* Load every single element of the list */
+ while(len--) {
+ if ((ele = rdbLoadEncodedStringObject(rdb)) == NULL) {
+ decrRefCount(o);
+ return NULL;
+ }
+ dec = getDecodedObject(ele);
+ size_t len = sdslen(dec->ptr);
+ quicklistPushTail(o->ptr, dec->ptr, len);
+ decrRefCount(dec);
+ decrRefCount(ele);
+ }
+
+ listTypeTryConversion(o,LIST_CONV_AUTO,NULL,NULL);
+ } else if (rdbtype == RDB_TYPE_SET) {
+ /* Read Set value */
+ if ((len = rdbLoadLen(rdb,NULL)) == RDB_LENERR) return NULL;
+ if (len == 0) goto emptykey;
+
+ /* Use a regular set when there are too many entries. */
+ size_t max_entries = server.set_max_intset_entries;
+ if (max_entries >= 1<<30) max_entries = 1<<30;
+ if (len > max_entries) {
+ o = createSetObject();
+ /* It's faster to expand the dict to the right size asap in order
+ * to avoid rehashing */
+ if (len > DICT_HT_INITIAL_SIZE && dictTryExpand(o->ptr,len) != DICT_OK) {
+ rdbReportCorruptRDB("OOM in dictTryExpand %llu", (unsigned long long)len);
+ decrRefCount(o);
+ return NULL;
+ }
+ } else {
+ o = createIntsetObject();
+ }
+
+ /* Load every single element of the set */
+ size_t maxelelen = 0, sumelelen = 0;
+ for (i = 0; i < len; i++) {
+ long long llval;
+ sds sdsele;
+
+ if ((sdsele = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL)) == NULL) {
+ decrRefCount(o);
+ return NULL;
+ }
+ size_t elelen = sdslen(sdsele);
+ sumelelen += elelen;
+ if (elelen > maxelelen) maxelelen = elelen;
+
+ if (o->encoding == OBJ_ENCODING_INTSET) {
+ /* Fetch integer value from element. */
+ if (isSdsRepresentableAsLongLong(sdsele,&llval) == C_OK) {
+ uint8_t success;
+ o->ptr = intsetAdd(o->ptr,llval,&success);
+ if (!success) {
+ rdbReportCorruptRDB("Duplicate set members detected");
+ decrRefCount(o);
+ sdsfree(sdsele);
+ return NULL;
+ }
+ } else if (setTypeSize(o) < server.set_max_listpack_entries &&
+ maxelelen <= server.set_max_listpack_value &&
+ lpSafeToAdd(NULL, sumelelen))
+ {
+ /* We checked if it's safe to add one large element instead
+ * of many small ones. It's OK since lpSafeToAdd doesn't
+ * care about individual elements, only the total size. */
+ setTypeConvert(o, OBJ_ENCODING_LISTPACK);
+ } else if (setTypeConvertAndExpand(o, OBJ_ENCODING_HT, len, 0) != C_OK) {
+ rdbReportCorruptRDB("OOM in dictTryExpand %llu", (unsigned long long)len);
+ sdsfree(sdsele);
+ decrRefCount(o);
+ return NULL;
+ }
+ }
+
+ /* This will also be called when the set was just converted
+ * to a listpack encoded set. */
+ if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ if (setTypeSize(o) < server.set_max_listpack_entries &&
+ elelen <= server.set_max_listpack_value &&
+ lpSafeToAdd(o->ptr, elelen))
+ {
+ unsigned char *p = lpFirst(o->ptr);
+ if (p && lpFind(o->ptr, p, (unsigned char*)sdsele, elelen, 0)) {
+ rdbReportCorruptRDB("Duplicate set members detected");
+ decrRefCount(o);
+ sdsfree(sdsele);
+ return NULL;
+ }
+ o->ptr = lpAppend(o->ptr, (unsigned char *)sdsele, elelen);
+ } else if (setTypeConvertAndExpand(o, OBJ_ENCODING_HT, len, 0) != C_OK) {
+ rdbReportCorruptRDB("OOM in dictTryExpand %llu",
+ (unsigned long long)len);
+ sdsfree(sdsele);
+ decrRefCount(o);
+ return NULL;
+ }
+ }
+
+ /* This will also be called when the set was just converted
+ * to a regular hash table encoded set. */
+ if (o->encoding == OBJ_ENCODING_HT) {
+ if (dictAdd((dict*)o->ptr,sdsele,NULL) != DICT_OK) {
+ rdbReportCorruptRDB("Duplicate set members detected");
+ decrRefCount(o);
+ sdsfree(sdsele);
+ return NULL;
+ }
+ } else {
+ sdsfree(sdsele);
+ }
+ }
+ } else if (rdbtype == RDB_TYPE_ZSET_2 || rdbtype == RDB_TYPE_ZSET) {
+ /* Read sorted set value. */
+ uint64_t zsetlen;
+ size_t maxelelen = 0, totelelen = 0;
+ zset *zs;
+
+ if ((zsetlen = rdbLoadLen(rdb,NULL)) == RDB_LENERR) return NULL;
+ if (zsetlen == 0) goto emptykey;
+
+ o = createZsetObject();
+ zs = o->ptr;
+
+ if (zsetlen > DICT_HT_INITIAL_SIZE && dictTryExpand(zs->dict,zsetlen) != DICT_OK) {
+ rdbReportCorruptRDB("OOM in dictTryExpand %llu", (unsigned long long)zsetlen);
+ decrRefCount(o);
+ return NULL;
+ }
+
+ /* Load every single element of the sorted set. */
+ while(zsetlen--) {
+ sds sdsele;
+ double score;
+ zskiplistNode *znode;
+
+ if ((sdsele = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL)) == NULL) {
+ decrRefCount(o);
+ return NULL;
+ }
+
+ if (rdbtype == RDB_TYPE_ZSET_2) {
+ if (rdbLoadBinaryDoubleValue(rdb,&score) == -1) {
+ decrRefCount(o);
+ sdsfree(sdsele);
+ return NULL;
+ }
+ } else {
+ if (rdbLoadDoubleValue(rdb,&score) == -1) {
+ decrRefCount(o);
+ sdsfree(sdsele);
+ return NULL;
+ }
+ }
+
+ if (isnan(score)) {
+ rdbReportCorruptRDB("Zset with NAN score detected");
+ decrRefCount(o);
+ sdsfree(sdsele);
+ return NULL;
+ }
+
+ /* Don't care about integer-encoded strings. */
+ if (sdslen(sdsele) > maxelelen) maxelelen = sdslen(sdsele);
+ totelelen += sdslen(sdsele);
+
+ znode = zslInsert(zs->zsl,score,sdsele);
+ if (dictAdd(zs->dict,sdsele,&znode->score) != DICT_OK) {
+ rdbReportCorruptRDB("Duplicate zset fields detected");
+ decrRefCount(o);
+ /* no need to free 'sdsele', will be released by zslFree together with 'o' */
+ return NULL;
+ }
+ }
+
+ /* Convert *after* loading, since sorted sets are not stored ordered. */
+ if (zsetLength(o) <= server.zset_max_listpack_entries &&
+ maxelelen <= server.zset_max_listpack_value &&
+ lpSafeToAdd(NULL, totelelen))
+ {
+ zsetConvert(o,OBJ_ENCODING_LISTPACK);
+ }
+ } else if (rdbtype == RDB_TYPE_HASH) {
+ uint64_t len;
+ int ret;
+ sds field, value;
+ dict *dupSearchDict = NULL;
+
+ len = rdbLoadLen(rdb, NULL);
+ if (len == RDB_LENERR) return NULL;
+ if (len == 0) goto emptykey;
+
+ o = createHashObject();
+
+ /* Too many entries? Use a hash table right from the start. */
+ if (len > server.hash_max_listpack_entries)
+ hashTypeConvert(o, OBJ_ENCODING_HT);
+ else if (deep_integrity_validation) {
+ /* In this mode, we need to guarantee that the server won't crash
+ * later when the ziplist is converted to a dict.
+ * Create a set (dict with no values) to for a dup search.
+ * We can dismiss it as soon as we convert the ziplist to a hash. */
+ dupSearchDict = dictCreate(&hashDictType);
+ }
+
+
+ /* Load every field and value into the ziplist */
+ while (o->encoding == OBJ_ENCODING_LISTPACK && len > 0) {
+ len--;
+ /* Load raw strings */
+ if ((field = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL)) == NULL) {
+ decrRefCount(o);
+ if (dupSearchDict) dictRelease(dupSearchDict);
+ return NULL;
+ }
+ if ((value = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL)) == NULL) {
+ sdsfree(field);
+ decrRefCount(o);
+ if (dupSearchDict) dictRelease(dupSearchDict);
+ return NULL;
+ }
+
+ if (dupSearchDict) {
+ sds field_dup = sdsdup(field);
+ if (dictAdd(dupSearchDict, field_dup, NULL) != DICT_OK) {
+ rdbReportCorruptRDB("Hash with dup elements");
+ dictRelease(dupSearchDict);
+ decrRefCount(o);
+ sdsfree(field_dup);
+ sdsfree(field);
+ sdsfree(value);
+ return NULL;
+ }
+ }
+
+ /* Convert to hash table if size threshold is exceeded */
+ if (sdslen(field) > server.hash_max_listpack_value ||
+ sdslen(value) > server.hash_max_listpack_value ||
+ !lpSafeToAdd(o->ptr, sdslen(field)+sdslen(value)))
+ {
+ hashTypeConvert(o, OBJ_ENCODING_HT);
+ ret = dictAdd((dict*)o->ptr, field, value);
+ if (ret == DICT_ERR) {
+ rdbReportCorruptRDB("Duplicate hash fields detected");
+ if (dupSearchDict) dictRelease(dupSearchDict);
+ sdsfree(value);
+ sdsfree(field);
+ decrRefCount(o);
+ return NULL;
+ }
+ break;
+ }
+
+ /* Add pair to listpack */
+ o->ptr = lpAppend(o->ptr, (unsigned char*)field, sdslen(field));
+ o->ptr = lpAppend(o->ptr, (unsigned char*)value, sdslen(value));
+
+ sdsfree(field);
+ sdsfree(value);
+ }
+
+ if (dupSearchDict) {
+ /* We no longer need this, from now on the entries are added
+ * to a dict so the check is performed implicitly. */
+ dictRelease(dupSearchDict);
+ dupSearchDict = NULL;
+ }
+
+ if (o->encoding == OBJ_ENCODING_HT && len > DICT_HT_INITIAL_SIZE) {
+ if (dictTryExpand(o->ptr,len) != DICT_OK) {
+ rdbReportCorruptRDB("OOM in dictTryExpand %llu", (unsigned long long)len);
+ decrRefCount(o);
+ return NULL;
+ }
+ }
+
+ /* Load remaining fields and values into the hash table */
+ while (o->encoding == OBJ_ENCODING_HT && len > 0) {
+ len--;
+ /* Load encoded strings */
+ if ((field = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL)) == NULL) {
+ decrRefCount(o);
+ return NULL;
+ }
+ if ((value = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL)) == NULL) {
+ sdsfree(field);
+ decrRefCount(o);
+ return NULL;
+ }
+
+ /* Add pair to hash table */
+ ret = dictAdd((dict*)o->ptr, field, value);
+ if (ret == DICT_ERR) {
+ rdbReportCorruptRDB("Duplicate hash fields detected");
+ sdsfree(value);
+ sdsfree(field);
+ decrRefCount(o);
+ return NULL;
+ }
+ }
+
+ /* All pairs should be read by now */
+ serverAssert(len == 0);
+ } else if (rdbtype == RDB_TYPE_LIST_QUICKLIST || rdbtype == RDB_TYPE_LIST_QUICKLIST_2) {
+ if ((len = rdbLoadLen(rdb,NULL)) == RDB_LENERR) return NULL;
+ if (len == 0) goto emptykey;
+
+ o = createQuicklistObject();
+ quicklistSetOptions(o->ptr, server.list_max_listpack_size,
+ server.list_compress_depth);
+ uint64_t container = QUICKLIST_NODE_CONTAINER_PACKED;
+ while (len--) {
+ unsigned char *lp;
+ size_t encoded_len;
+
+ if (rdbtype == RDB_TYPE_LIST_QUICKLIST_2) {
+ if ((container = rdbLoadLen(rdb,NULL)) == RDB_LENERR) {
+ decrRefCount(o);
+ return NULL;
+ }
+
+ if (container != QUICKLIST_NODE_CONTAINER_PACKED && container != QUICKLIST_NODE_CONTAINER_PLAIN) {
+ rdbReportCorruptRDB("Quicklist integrity check failed.");
+ decrRefCount(o);
+ return NULL;
+ }
+ }
+
+ unsigned char *data =
+ rdbGenericLoadStringObject(rdb,RDB_LOAD_PLAIN,&encoded_len);
+ if (data == NULL || (encoded_len == 0)) {
+ zfree(data);
+ decrRefCount(o);
+ return NULL;
+ }
+
+ if (container == QUICKLIST_NODE_CONTAINER_PLAIN) {
+ quicklistAppendPlainNode(o->ptr, data, encoded_len);
+ continue;
+ }
+
+ if (rdbtype == RDB_TYPE_LIST_QUICKLIST_2) {
+ lp = data;
+ if (deep_integrity_validation) server.stat_dump_payload_sanitizations++;
+ if (!lpValidateIntegrity(lp, encoded_len, deep_integrity_validation, NULL, NULL)) {
+ rdbReportCorruptRDB("Listpack integrity check failed.");
+ decrRefCount(o);
+ zfree(lp);
+ return NULL;
+ }
+ } else {
+ lp = lpNew(encoded_len);
+ if (!ziplistValidateIntegrity(data, encoded_len, 1,
+ _ziplistEntryConvertAndValidate, &lp))
+ {
+ rdbReportCorruptRDB("Ziplist integrity check failed.");
+ decrRefCount(o);
+ zfree(data);
+ zfree(lp);
+ return NULL;
+ }
+ zfree(data);
+ lp = lpShrinkToFit(lp);
+ }
+
+ /* Silently skip empty ziplists, if we'll end up with empty quicklist we'll fail later. */
+ if (lpLength(lp) == 0) {
+ zfree(lp);
+ continue;
+ } else {
+ quicklistAppendListpack(o->ptr, lp);
+ }
+ }
+
+ if (quicklistCount(o->ptr) == 0) {
+ decrRefCount(o);
+ goto emptykey;
+ }
+
+ listTypeTryConversion(o,LIST_CONV_AUTO,NULL,NULL);
+ } else if (rdbtype == RDB_TYPE_HASH_ZIPMAP ||
+ rdbtype == RDB_TYPE_LIST_ZIPLIST ||
+ rdbtype == RDB_TYPE_SET_INTSET ||
+ rdbtype == RDB_TYPE_SET_LISTPACK ||
+ rdbtype == RDB_TYPE_ZSET_ZIPLIST ||
+ rdbtype == RDB_TYPE_ZSET_LISTPACK ||
+ rdbtype == RDB_TYPE_HASH_ZIPLIST ||
+ rdbtype == RDB_TYPE_HASH_LISTPACK)
+ {
+ size_t encoded_len;
+ unsigned char *encoded =
+ rdbGenericLoadStringObject(rdb,RDB_LOAD_PLAIN,&encoded_len);
+ if (encoded == NULL) return NULL;
+
+ o = createObject(OBJ_STRING,encoded); /* Obj type fixed below. */
+
+ /* Fix the object encoding, and make sure to convert the encoded
+ * data type into the base type if accordingly to the current
+ * configuration there are too many elements in the encoded data
+ * type. Note that we only check the length and not max element
+ * size as this is an O(N) scan. Eventually everything will get
+ * converted. */
+ switch(rdbtype) {
+ case RDB_TYPE_HASH_ZIPMAP:
+ /* Since we don't keep zipmaps anymore, the rdb loading for these
+ * is O(n) anyway, use `deep` validation. */
+ if (!zipmapValidateIntegrity(encoded, encoded_len, 1)) {
+ rdbReportCorruptRDB("Zipmap integrity check failed.");
+ zfree(encoded);
+ o->ptr = NULL;
+ decrRefCount(o);
+ return NULL;
+ }
+ /* Convert to ziplist encoded hash. This must be deprecated
+ * when loading dumps created by Redis 2.4 gets deprecated. */
+ {
+ unsigned char *lp = lpNew(0);
+ unsigned char *zi = zipmapRewind(o->ptr);
+ unsigned char *fstr, *vstr;
+ unsigned int flen, vlen;
+ unsigned int maxlen = 0;
+ dict *dupSearchDict = dictCreate(&hashDictType);
+
+ while ((zi = zipmapNext(zi, &fstr, &flen, &vstr, &vlen)) != NULL) {
+ if (flen > maxlen) maxlen = flen;
+ if (vlen > maxlen) maxlen = vlen;
+
+ /* search for duplicate records */
+ sds field = sdstrynewlen(fstr, flen);
+ if (!field || dictAdd(dupSearchDict, field, NULL) != DICT_OK ||
+ !lpSafeToAdd(lp, (size_t)flen + vlen)) {
+ rdbReportCorruptRDB("Hash zipmap with dup elements, or big length (%u)", flen);
+ dictRelease(dupSearchDict);
+ sdsfree(field);
+ zfree(encoded);
+ o->ptr = NULL;
+ decrRefCount(o);
+ return NULL;
+ }
+
+ lp = lpAppend(lp, fstr, flen);
+ lp = lpAppend(lp, vstr, vlen);
+ }
+
+ dictRelease(dupSearchDict);
+ zfree(o->ptr);
+ o->ptr = lp;
+ o->type = OBJ_HASH;
+ o->encoding = OBJ_ENCODING_LISTPACK;
+
+ if (hashTypeLength(o) > server.hash_max_listpack_entries ||
+ maxlen > server.hash_max_listpack_value)
+ {
+ hashTypeConvert(o, OBJ_ENCODING_HT);
+ }
+ }
+ break;
+ case RDB_TYPE_LIST_ZIPLIST:
+ {
+ quicklist *ql = quicklistNew(server.list_max_listpack_size,
+ server.list_compress_depth);
+
+ if (!ziplistValidateIntegrity(encoded, encoded_len, 1,
+ _listZiplistEntryConvertAndValidate, ql))
+ {
+ rdbReportCorruptRDB("List ziplist integrity check failed.");
+ zfree(encoded);
+ o->ptr = NULL;
+ decrRefCount(o);
+ quicklistRelease(ql);
+ return NULL;
+ }
+
+ if (ql->len == 0) {
+ zfree(encoded);
+ o->ptr = NULL;
+ decrRefCount(o);
+ quicklistRelease(ql);
+ goto emptykey;
+ }
+
+ zfree(encoded);
+ o->type = OBJ_LIST;
+ o->ptr = ql;
+ o->encoding = OBJ_ENCODING_QUICKLIST;
+ break;
+ }
+ case RDB_TYPE_SET_INTSET:
+ if (deep_integrity_validation) server.stat_dump_payload_sanitizations++;
+ if (!intsetValidateIntegrity(encoded, encoded_len, deep_integrity_validation)) {
+ rdbReportCorruptRDB("Intset integrity check failed.");
+ zfree(encoded);
+ o->ptr = NULL;
+ decrRefCount(o);
+ return NULL;
+ }
+ o->type = OBJ_SET;
+ o->encoding = OBJ_ENCODING_INTSET;
+ if (intsetLen(o->ptr) > server.set_max_intset_entries)
+ setTypeConvert(o,OBJ_ENCODING_HT);
+ break;
+ case RDB_TYPE_SET_LISTPACK:
+ if (deep_integrity_validation) server.stat_dump_payload_sanitizations++;
+ if (!lpValidateIntegrityAndDups(encoded, encoded_len, deep_integrity_validation, 0)) {
+ rdbReportCorruptRDB("Set listpack integrity check failed.");
+ zfree(encoded);
+ o->ptr = NULL;
+ decrRefCount(o);
+ return NULL;
+ }
+ o->type = OBJ_SET;
+ o->encoding = OBJ_ENCODING_LISTPACK;
+
+ if (setTypeSize(o) == 0) {
+ zfree(encoded);
+ o->ptr = NULL;
+ decrRefCount(o);
+ goto emptykey;
+ }
+ if (setTypeSize(o) > server.set_max_listpack_entries)
+ setTypeConvert(o, OBJ_ENCODING_HT);
+ break;
+ case RDB_TYPE_ZSET_ZIPLIST:
+ {
+ unsigned char *lp = lpNew(encoded_len);
+ if (!ziplistPairsConvertAndValidateIntegrity(encoded, encoded_len, &lp)) {
+ rdbReportCorruptRDB("Zset ziplist integrity check failed.");
+ zfree(lp);
+ zfree(encoded);
+ o->ptr = NULL;
+ decrRefCount(o);
+ return NULL;
+ }
+
+ zfree(o->ptr);
+ o->type = OBJ_ZSET;
+ o->ptr = lp;
+ o->encoding = OBJ_ENCODING_LISTPACK;
+ if (zsetLength(o) == 0) {
+ decrRefCount(o);
+ goto emptykey;
+ }
+
+ if (zsetLength(o) > server.zset_max_listpack_entries)
+ zsetConvert(o,OBJ_ENCODING_SKIPLIST);
+ else
+ o->ptr = lpShrinkToFit(o->ptr);
+ break;
+ }
+ case RDB_TYPE_ZSET_LISTPACK:
+ if (deep_integrity_validation) server.stat_dump_payload_sanitizations++;
+ if (!lpValidateIntegrityAndDups(encoded, encoded_len, deep_integrity_validation, 1)) {
+ rdbReportCorruptRDB("Zset listpack integrity check failed.");
+ zfree(encoded);
+ o->ptr = NULL;
+ decrRefCount(o);
+ return NULL;
+ }
+ o->type = OBJ_ZSET;
+ o->encoding = OBJ_ENCODING_LISTPACK;
+ if (zsetLength(o) == 0) {
+ decrRefCount(o);
+ goto emptykey;
+ }
+
+ if (zsetLength(o) > server.zset_max_listpack_entries)
+ zsetConvert(o,OBJ_ENCODING_SKIPLIST);
+ break;
+ case RDB_TYPE_HASH_ZIPLIST:
+ {
+ unsigned char *lp = lpNew(encoded_len);
+ if (!ziplistPairsConvertAndValidateIntegrity(encoded, encoded_len, &lp)) {
+ rdbReportCorruptRDB("Hash ziplist integrity check failed.");
+ zfree(lp);
+ zfree(encoded);
+ o->ptr = NULL;
+ decrRefCount(o);
+ return NULL;
+ }
+
+ zfree(o->ptr);
+ o->ptr = lp;
+ o->type = OBJ_HASH;
+ o->encoding = OBJ_ENCODING_LISTPACK;
+ if (hashTypeLength(o) == 0) {
+ decrRefCount(o);
+ goto emptykey;
+ }
+
+ if (hashTypeLength(o) > server.hash_max_listpack_entries)
+ hashTypeConvert(o, OBJ_ENCODING_HT);
+ else
+ o->ptr = lpShrinkToFit(o->ptr);
+ break;
+ }
+ case RDB_TYPE_HASH_LISTPACK:
+ if (deep_integrity_validation) server.stat_dump_payload_sanitizations++;
+ if (!lpValidateIntegrityAndDups(encoded, encoded_len, deep_integrity_validation, 1)) {
+ rdbReportCorruptRDB("Hash listpack integrity check failed.");
+ zfree(encoded);
+ o->ptr = NULL;
+ decrRefCount(o);
+ return NULL;
+ }
+ o->type = OBJ_HASH;
+ o->encoding = OBJ_ENCODING_LISTPACK;
+ if (hashTypeLength(o) == 0) {
+ decrRefCount(o);
+ goto emptykey;
+ }
+
+ if (hashTypeLength(o) > server.hash_max_listpack_entries)
+ hashTypeConvert(o, OBJ_ENCODING_HT);
+ break;
+ default:
+ /* totally unreachable */
+ rdbReportCorruptRDB("Unknown RDB encoding type %d",rdbtype);
+ break;
+ }
+ } else if (rdbtype == RDB_TYPE_STREAM_LISTPACKS ||
+ rdbtype == RDB_TYPE_STREAM_LISTPACKS_2 ||
+ rdbtype == RDB_TYPE_STREAM_LISTPACKS_3)
+ {
+ o = createStreamObject();
+ stream *s = o->ptr;
+ uint64_t listpacks = rdbLoadLen(rdb,NULL);
+ if (listpacks == RDB_LENERR) {
+ rdbReportReadError("Stream listpacks len loading failed.");
+ decrRefCount(o);
+ return NULL;
+ }
+
+ while(listpacks--) {
+ /* Get the master ID, the one we'll use as key of the radix tree
+ * node: the entries inside the listpack itself are delta-encoded
+ * relatively to this ID. */
+ sds nodekey = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL);
+ if (nodekey == NULL) {
+ rdbReportReadError("Stream master ID loading failed: invalid encoding or I/O error.");
+ decrRefCount(o);
+ return NULL;
+ }
+ if (sdslen(nodekey) != sizeof(streamID)) {
+ rdbReportCorruptRDB("Stream node key entry is not the "
+ "size of a stream ID");
+ sdsfree(nodekey);
+ decrRefCount(o);
+ return NULL;
+ }
+
+ /* Load the listpack. */
+ size_t lp_size;
+ unsigned char *lp =
+ rdbGenericLoadStringObject(rdb,RDB_LOAD_PLAIN,&lp_size);
+ if (lp == NULL) {
+ rdbReportReadError("Stream listpacks loading failed.");
+ sdsfree(nodekey);
+ decrRefCount(o);
+ return NULL;
+ }
+ if (deep_integrity_validation) server.stat_dump_payload_sanitizations++;
+ if (!streamValidateListpackIntegrity(lp, lp_size, deep_integrity_validation)) {
+ rdbReportCorruptRDB("Stream listpack integrity check failed.");
+ sdsfree(nodekey);
+ decrRefCount(o);
+ zfree(lp);
+ return NULL;
+ }
+
+ unsigned char *first = lpFirst(lp);
+ if (first == NULL) {
+ /* Serialized listpacks should never be empty, since on
+ * deletion we should remove the radix tree key if the
+ * resulting listpack is empty. */
+ rdbReportCorruptRDB("Empty listpack inside stream");
+ sdsfree(nodekey);
+ decrRefCount(o);
+ zfree(lp);
+ return NULL;
+ }
+
+ /* Insert the key in the radix tree. */
+ int retval = raxTryInsert(s->rax,
+ (unsigned char*)nodekey,sizeof(streamID),lp,NULL);
+ sdsfree(nodekey);
+ if (!retval) {
+ rdbReportCorruptRDB("Listpack re-added with existing key");
+ decrRefCount(o);
+ zfree(lp);
+ return NULL;
+ }
+ }
+ /* Load total number of items inside the stream. */
+ s->length = rdbLoadLen(rdb,NULL);
+
+ /* Load the last entry ID. */
+ s->last_id.ms = rdbLoadLen(rdb,NULL);
+ s->last_id.seq = rdbLoadLen(rdb,NULL);
+
+ if (rdbtype >= RDB_TYPE_STREAM_LISTPACKS_2) {
+ /* Load the first entry ID. */
+ s->first_id.ms = rdbLoadLen(rdb,NULL);
+ s->first_id.seq = rdbLoadLen(rdb,NULL);
+
+ /* Load the maximal deleted entry ID. */
+ s->max_deleted_entry_id.ms = rdbLoadLen(rdb,NULL);
+ s->max_deleted_entry_id.seq = rdbLoadLen(rdb,NULL);
+
+ /* Load the offset. */
+ s->entries_added = rdbLoadLen(rdb,NULL);
+ } else {
+ /* During migration the offset can be initialized to the stream's
+ * length. At this point, we also don't care about tombstones
+ * because CG offsets will be later initialized as well. */
+ s->max_deleted_entry_id.ms = 0;
+ s->max_deleted_entry_id.seq = 0;
+ s->entries_added = s->length;
+
+ /* Since the rax is already loaded, we can find the first entry's
+ * ID. */
+ streamGetEdgeID(s,1,1,&s->first_id);
+ }
+
+ if (rioGetReadError(rdb)) {
+ rdbReportReadError("Stream object metadata loading failed.");
+ decrRefCount(o);
+ return NULL;
+ }
+
+ if (s->length && !raxSize(s->rax)) {
+ rdbReportCorruptRDB("Stream length inconsistent with rax entries");
+ decrRefCount(o);
+ return NULL;
+ }
+
+ /* Consumer groups loading */
+ uint64_t cgroups_count = rdbLoadLen(rdb,NULL);
+ if (cgroups_count == RDB_LENERR) {
+ rdbReportReadError("Stream cgroup count loading failed.");
+ decrRefCount(o);
+ return NULL;
+ }
+ while(cgroups_count--) {
+ /* Get the consumer group name and ID. We can then create the
+ * consumer group ASAP and populate its structure as
+ * we read more data. */
+ streamID cg_id;
+ sds cgname = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL);
+ if (cgname == NULL) {
+ rdbReportReadError(
+ "Error reading the consumer group name from Stream");
+ decrRefCount(o);
+ return NULL;
+ }
+
+ cg_id.ms = rdbLoadLen(rdb,NULL);
+ cg_id.seq = rdbLoadLen(rdb,NULL);
+ if (rioGetReadError(rdb)) {
+ rdbReportReadError("Stream cgroup ID loading failed.");
+ sdsfree(cgname);
+ decrRefCount(o);
+ return NULL;
+ }
+
+ /* Load group offset. */
+ uint64_t cg_offset;
+ if (rdbtype >= RDB_TYPE_STREAM_LISTPACKS_2) {
+ cg_offset = rdbLoadLen(rdb,NULL);
+ if (rioGetReadError(rdb)) {
+ rdbReportReadError("Stream cgroup offset loading failed.");
+ sdsfree(cgname);
+ decrRefCount(o);
+ return NULL;
+ }
+ } else {
+ cg_offset = streamEstimateDistanceFromFirstEverEntry(s,&cg_id);
+ }
+
+ streamCG *cgroup = streamCreateCG(s,cgname,sdslen(cgname),&cg_id,cg_offset);
+ if (cgroup == NULL) {
+ rdbReportCorruptRDB("Duplicated consumer group name %s",
+ cgname);
+ decrRefCount(o);
+ sdsfree(cgname);
+ return NULL;
+ }
+ sdsfree(cgname);
+
+ /* Load the global PEL for this consumer group, however we'll
+ * not yet populate the NACK structures with the message
+ * owner, since consumers for this group and their messages will
+ * be read as a next step. So for now leave them not resolved
+ * and later populate it. */
+ uint64_t pel_size = rdbLoadLen(rdb,NULL);
+ if (pel_size == RDB_LENERR) {
+ rdbReportReadError("Stream PEL size loading failed.");
+ decrRefCount(o);
+ return NULL;
+ }
+ while(pel_size--) {
+ unsigned char rawid[sizeof(streamID)];
+ if (rioRead(rdb,rawid,sizeof(rawid)) == 0) {
+ rdbReportReadError("Stream PEL ID loading failed.");
+ decrRefCount(o);
+ return NULL;
+ }
+ streamNACK *nack = streamCreateNACK(NULL);
+ nack->delivery_time = rdbLoadMillisecondTime(rdb,RDB_VERSION);
+ nack->delivery_count = rdbLoadLen(rdb,NULL);
+ if (rioGetReadError(rdb)) {
+ rdbReportReadError("Stream PEL NACK loading failed.");
+ decrRefCount(o);
+ streamFreeNACK(nack);
+ return NULL;
+ }
+ if (!raxTryInsert(cgroup->pel,rawid,sizeof(rawid),nack,NULL)) {
+ rdbReportCorruptRDB("Duplicated global PEL entry "
+ "loading stream consumer group");
+ decrRefCount(o);
+ streamFreeNACK(nack);
+ return NULL;
+ }
+ }
+
+ /* Now that we loaded our global PEL, we need to load the
+ * consumers and their local PELs. */
+ uint64_t consumers_num = rdbLoadLen(rdb,NULL);
+ if (consumers_num == RDB_LENERR) {
+ rdbReportReadError("Stream consumers num loading failed.");
+ decrRefCount(o);
+ return NULL;
+ }
+ while(consumers_num--) {
+ sds cname = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL);
+ if (cname == NULL) {
+ rdbReportReadError(
+ "Error reading the consumer name from Stream group.");
+ decrRefCount(o);
+ return NULL;
+ }
+ streamConsumer *consumer = streamCreateConsumer(cgroup,cname,NULL,0,
+ SCC_NO_NOTIFY|SCC_NO_DIRTIFY);
+ sdsfree(cname);
+ if (!consumer) {
+ rdbReportCorruptRDB("Duplicate stream consumer detected.");
+ decrRefCount(o);
+ return NULL;
+ }
+
+ consumer->seen_time = rdbLoadMillisecondTime(rdb,RDB_VERSION);
+ if (rioGetReadError(rdb)) {
+ rdbReportReadError("Stream short read reading seen time.");
+ decrRefCount(o);
+ return NULL;
+ }
+
+ if (rdbtype >= RDB_TYPE_STREAM_LISTPACKS_3) {
+ consumer->active_time = rdbLoadMillisecondTime(rdb,RDB_VERSION);
+ if (rioGetReadError(rdb)) {
+ rdbReportReadError("Stream short read reading active time.");
+ decrRefCount(o);
+ return NULL;
+ }
+ } else {
+ /* That's the best estimate we got */
+ consumer->active_time = consumer->seen_time;
+ }
+
+ /* Load the PEL about entries owned by this specific
+ * consumer. */
+ pel_size = rdbLoadLen(rdb,NULL);
+ if (pel_size == RDB_LENERR) {
+ rdbReportReadError(
+ "Stream consumer PEL num loading failed.");
+ decrRefCount(o);
+ return NULL;
+ }
+ while(pel_size--) {
+ unsigned char rawid[sizeof(streamID)];
+ if (rioRead(rdb,rawid,sizeof(rawid)) == 0) {
+ rdbReportReadError(
+ "Stream short read reading PEL streamID.");
+ decrRefCount(o);
+ return NULL;
+ }
+ streamNACK *nack = raxFind(cgroup->pel,rawid,sizeof(rawid));
+ if (nack == raxNotFound) {
+ rdbReportCorruptRDB("Consumer entry not found in "
+ "group global PEL");
+ decrRefCount(o);
+ return NULL;
+ }
+
+ /* Set the NACK consumer, that was left to NULL when
+ * loading the global PEL. Then set the same shared
+ * NACK structure also in the consumer-specific PEL. */
+ nack->consumer = consumer;
+ if (!raxTryInsert(consumer->pel,rawid,sizeof(rawid),nack,NULL)) {
+ rdbReportCorruptRDB("Duplicated consumer PEL entry "
+ " loading a stream consumer "
+ "group");
+ decrRefCount(o);
+ streamFreeNACK(nack);
+ return NULL;
+ }
+ }
+ }
+
+ /* Verify that each PEL eventually got a consumer assigned to it. */
+ if (deep_integrity_validation) {
+ raxIterator ri_cg_pel;
+ raxStart(&ri_cg_pel,cgroup->pel);
+ raxSeek(&ri_cg_pel,"^",NULL,0);
+ while(raxNext(&ri_cg_pel)) {
+ streamNACK *nack = ri_cg_pel.data;
+ if (!nack->consumer) {
+ raxStop(&ri_cg_pel);
+ rdbReportCorruptRDB("Stream CG PEL entry without consumer");
+ decrRefCount(o);
+ return NULL;
+ }
+ }
+ raxStop(&ri_cg_pel);
+ }
+ }
+ } else if (rdbtype == RDB_TYPE_MODULE_PRE_GA) {
+ rdbReportCorruptRDB("Pre-release module format not supported");
+ return NULL;
+ } else if (rdbtype == RDB_TYPE_MODULE_2) {
+ uint64_t moduleid = rdbLoadLen(rdb,NULL);
+ if (rioGetReadError(rdb)) {
+ rdbReportReadError("Short read module id");
+ return NULL;
+ }
+ moduleType *mt = moduleTypeLookupModuleByID(moduleid);
+
+ if (rdbCheckMode) {
+ char name[10];
+ moduleTypeNameByID(name,moduleid);
+ return rdbLoadCheckModuleValue(rdb,name);
+ }
+
+ if (mt == NULL) {
+ char name[10];
+ moduleTypeNameByID(name,moduleid);
+ rdbReportCorruptRDB("The RDB file contains module data I can't load: no matching module type '%s'", name);
+ return NULL;
+ }
+ RedisModuleIO io;
+ robj keyobj;
+ initStaticStringObject(keyobj,key);
+ moduleInitIOContext(io,mt,rdb,&keyobj,dbid);
+ /* Call the rdb_load method of the module providing the 10 bit
+ * encoding version in the lower 10 bits of the module ID. */
+ void *ptr = mt->rdb_load(&io,moduleid&1023);
+ if (io.ctx) {
+ moduleFreeContext(io.ctx);
+ zfree(io.ctx);
+ }
+
+ /* Module v2 serialization has an EOF mark at the end. */
+ uint64_t eof = rdbLoadLen(rdb,NULL);
+ if (eof == RDB_LENERR) {
+ if (ptr) {
+ o = createModuleObject(mt,ptr); /* creating just in order to easily destroy */
+ decrRefCount(o);
+ }
+ return NULL;
+ }
+ if (eof != RDB_MODULE_OPCODE_EOF) {
+ rdbReportCorruptRDB("The RDB file contains module data for the module '%s' that is not terminated by "
+ "the proper module value EOF marker", moduleTypeModuleName(mt));
+ if (ptr) {
+ o = createModuleObject(mt,ptr); /* creating just in order to easily destroy */
+ decrRefCount(o);
+ }
+ return NULL;
+ }
+
+ if (ptr == NULL) {
+ rdbReportCorruptRDB("The RDB file contains module data for the module type '%s', that the responsible "
+ "module is not able to load. Check for modules log above for additional clues.",
+ moduleTypeModuleName(mt));
+ return NULL;
+ }
+ o = createModuleObject(mt,ptr);
+ } else {
+ rdbReportReadError("Unknown RDB encoding type %d",rdbtype);
+ return NULL;
+ }
+ if (error) *error = 0;
+ return o;
+
+emptykey:
+ if (error) *error = RDB_LOAD_ERR_EMPTY_KEY;
+ return NULL;
+}
+
+/* Mark that we are loading in the global state and setup the fields
+ * needed to provide loading stats. */
+void startLoading(size_t size, int rdbflags, int async) {
+ /* Load the DB */
+ server.loading = 1;
+ if (async == 1) server.async_loading = 1;
+ server.loading_start_time = time(NULL);
+ server.loading_loaded_bytes = 0;
+ server.loading_total_bytes = size;
+ server.loading_rdb_used_mem = 0;
+ server.rdb_last_load_keys_expired = 0;
+ server.rdb_last_load_keys_loaded = 0;
+ blockingOperationStarts();
+
+ /* Fire the loading modules start event. */
+ int subevent;
+ if (rdbflags & RDBFLAGS_AOF_PREAMBLE)
+ subevent = REDISMODULE_SUBEVENT_LOADING_AOF_START;
+ else if(rdbflags & RDBFLAGS_REPLICATION)
+ subevent = REDISMODULE_SUBEVENT_LOADING_REPL_START;
+ else
+ subevent = REDISMODULE_SUBEVENT_LOADING_RDB_START;
+ moduleFireServerEvent(REDISMODULE_EVENT_LOADING,subevent,NULL);
+}
+
+/* Mark that we are loading in the global state and setup the fields
+ * needed to provide loading stats.
+ * 'filename' is optional and used for rdb-check on error */
+void startLoadingFile(size_t size, char* filename, int rdbflags) {
+ rdbFileBeingLoaded = filename;
+ startLoading(size, rdbflags, 0);
+}
+
+/* Refresh the absolute loading progress info */
+void loadingAbsProgress(off_t pos) {
+ server.loading_loaded_bytes = pos;
+ if (server.stat_peak_memory < zmalloc_used_memory())
+ server.stat_peak_memory = zmalloc_used_memory();
+}
+
+/* Refresh the incremental loading progress info */
+void loadingIncrProgress(off_t size) {
+ server.loading_loaded_bytes += size;
+ if (server.stat_peak_memory < zmalloc_used_memory())
+ server.stat_peak_memory = zmalloc_used_memory();
+}
+
+/* Update the file name currently being loaded */
+void updateLoadingFileName(char* filename) {
+ rdbFileBeingLoaded = filename;
+}
+
+/* Loading finished */
+void stopLoading(int success) {
+ server.loading = 0;
+ server.async_loading = 0;
+ blockingOperationEnds();
+ rdbFileBeingLoaded = NULL;
+
+ /* Fire the loading modules end event. */
+ moduleFireServerEvent(REDISMODULE_EVENT_LOADING,
+ success?
+ REDISMODULE_SUBEVENT_LOADING_ENDED:
+ REDISMODULE_SUBEVENT_LOADING_FAILED,
+ NULL);
+}
+
+void startSaving(int rdbflags) {
+ /* Fire the persistence modules start event. */
+ int subevent;
+ if (rdbflags & RDBFLAGS_AOF_PREAMBLE && getpid() != server.pid)
+ subevent = REDISMODULE_SUBEVENT_PERSISTENCE_AOF_START;
+ else if (rdbflags & RDBFLAGS_AOF_PREAMBLE)
+ subevent = REDISMODULE_SUBEVENT_PERSISTENCE_SYNC_AOF_START;
+ else if (getpid()!=server.pid)
+ subevent = REDISMODULE_SUBEVENT_PERSISTENCE_RDB_START;
+ else
+ subevent = REDISMODULE_SUBEVENT_PERSISTENCE_SYNC_RDB_START;
+ moduleFireServerEvent(REDISMODULE_EVENT_PERSISTENCE,subevent,NULL);
+}
+
+void stopSaving(int success) {
+ /* Fire the persistence modules end event. */
+ moduleFireServerEvent(REDISMODULE_EVENT_PERSISTENCE,
+ success?
+ REDISMODULE_SUBEVENT_PERSISTENCE_ENDED:
+ REDISMODULE_SUBEVENT_PERSISTENCE_FAILED,
+ NULL);
+}
+
+/* Track loading progress in order to serve client's from time to time
+ and if needed calculate rdb checksum */
+void rdbLoadProgressCallback(rio *r, const void *buf, size_t len) {
+ if (server.rdb_checksum)
+ rioGenericUpdateChecksum(r, buf, len);
+ if (server.loading_process_events_interval_bytes &&
+ (r->processed_bytes + len)/server.loading_process_events_interval_bytes > r->processed_bytes/server.loading_process_events_interval_bytes)
+ {
+ if (server.masterhost && server.repl_state == REPL_STATE_TRANSFER)
+ replicationSendNewlineToMaster();
+ loadingAbsProgress(r->processed_bytes);
+ processEventsWhileBlocked();
+ processModuleLoadingProgressEvent(0);
+ }
+ if (server.repl_state == REPL_STATE_TRANSFER && rioCheckType(r) == RIO_TYPE_CONN) {
+ atomicIncr(server.stat_net_repl_input_bytes, len);
+ }
+}
+
+/* Save the given functions_ctx to the rdb.
+ * The err output parameter is optional and will be set with relevant error
+ * message on failure, it is the caller responsibility to free the error
+ * message on failure.
+ *
+ * The lib_ctx argument is also optional. If NULL is given, only verify rdb
+ * structure with out performing the actual functions loading. */
+int rdbFunctionLoad(rio *rdb, int ver, functionsLibCtx* lib_ctx, int rdbflags, sds *err) {
+ UNUSED(ver);
+ sds error = NULL;
+ sds final_payload = NULL;
+ int res = C_ERR;
+ if (!(final_payload = rdbGenericLoadStringObject(rdb, RDB_LOAD_SDS, NULL))) {
+ error = sdsnew("Failed loading library payload");
+ goto done;
+ }
+
+ if (lib_ctx) {
+ sds library_name = NULL;
+ if (!(library_name = functionsCreateWithLibraryCtx(final_payload, rdbflags & RDBFLAGS_ALLOW_DUP, &error, lib_ctx, 0))) {
+ if (!error) {
+ error = sdsnew("Failed creating the library");
+ }
+ goto done;
+ }
+ sdsfree(library_name);
+ }
+
+ res = C_OK;
+
+done:
+ if (final_payload) sdsfree(final_payload);
+ if (error) {
+ if (err) {
+ *err = error;
+ } else {
+ serverLog(LL_WARNING, "Failed creating function, %s", error);
+ sdsfree(error);
+ }
+ }
+ return res;
+}
+
+/* Load an RDB file from the rio stream 'rdb'. On success C_OK is returned,
+ * otherwise C_ERR is returned and 'errno' is set accordingly. */
+int rdbLoadRio(rio *rdb, int rdbflags, rdbSaveInfo *rsi) {
+ functionsLibCtx* functions_lib_ctx = functionsLibCtxGetCurrent();
+ rdbLoadingCtx loading_ctx = { .dbarray = server.db, .functions_lib_ctx = functions_lib_ctx };
+ int retval = rdbLoadRioWithLoadingCtx(rdb,rdbflags,rsi,&loading_ctx);
+ return retval;
+}
+
+
+/* Load an RDB file from the rio stream 'rdb'. On success C_OK is returned,
+ * otherwise C_ERR is returned.
+ * The rdb_loading_ctx argument holds objects to which the rdb will be loaded to,
+ * currently it only allow to set db object and functionLibCtx to which the data
+ * will be loaded (in the future it might contains more such objects). */
+int rdbLoadRioWithLoadingCtx(rio *rdb, int rdbflags, rdbSaveInfo *rsi, rdbLoadingCtx *rdb_loading_ctx) {
+ uint64_t dbid = 0;
+ int type, rdbver;
+ redisDb *db = rdb_loading_ctx->dbarray+0;
+ char buf[1024];
+ int error;
+ long long empty_keys_skipped = 0;
+
+ rdb->update_cksum = rdbLoadProgressCallback;
+ rdb->max_processing_chunk = server.loading_process_events_interval_bytes;
+ if (rioRead(rdb,buf,9) == 0) goto eoferr;
+ buf[9] = '\0';
+ if (memcmp(buf,"REDIS",5) != 0) {
+ serverLog(LL_WARNING,"Wrong signature trying to load DB from file");
+ return C_ERR;
+ }
+ rdbver = atoi(buf+5);
+ if (rdbver < 1 || rdbver > RDB_VERSION) {
+ serverLog(LL_WARNING,"Can't handle RDB format version %d",rdbver);
+ return C_ERR;
+ }
+
+ /* Key-specific attributes, set by opcodes before the key type. */
+ long long lru_idle = -1, lfu_freq = -1, expiretime = -1, now = mstime();
+ long long lru_clock = LRU_CLOCK();
+
+ while(1) {
+ sds key;
+ robj *val;
+
+ /* Read type. */
+ if ((type = rdbLoadType(rdb)) == -1) goto eoferr;
+
+ /* Handle special types. */
+ if (type == RDB_OPCODE_EXPIRETIME) {
+ /* EXPIRETIME: load an expire associated with the next key
+ * to load. Note that after loading an expire we need to
+ * load the actual type, and continue. */
+ expiretime = rdbLoadTime(rdb);
+ expiretime *= 1000;
+ if (rioGetReadError(rdb)) goto eoferr;
+ continue; /* Read next opcode. */
+ } else if (type == RDB_OPCODE_EXPIRETIME_MS) {
+ /* EXPIRETIME_MS: milliseconds precision expire times introduced
+ * with RDB v3. Like EXPIRETIME but no with more precision. */
+ expiretime = rdbLoadMillisecondTime(rdb,rdbver);
+ if (rioGetReadError(rdb)) goto eoferr;
+ continue; /* Read next opcode. */
+ } else if (type == RDB_OPCODE_FREQ) {
+ /* FREQ: LFU frequency. */
+ uint8_t byte;
+ if (rioRead(rdb,&byte,1) == 0) goto eoferr;
+ lfu_freq = byte;
+ continue; /* Read next opcode. */
+ } else if (type == RDB_OPCODE_IDLE) {
+ /* IDLE: LRU idle time. */
+ uint64_t qword;
+ if ((qword = rdbLoadLen(rdb,NULL)) == RDB_LENERR) goto eoferr;
+ lru_idle = qword;
+ continue; /* Read next opcode. */
+ } else if (type == RDB_OPCODE_EOF) {
+ /* EOF: End of file, exit the main loop. */
+ break;
+ } else if (type == RDB_OPCODE_SELECTDB) {
+ /* SELECTDB: Select the specified database. */
+ if ((dbid = rdbLoadLen(rdb,NULL)) == RDB_LENERR) goto eoferr;
+ if (dbid >= (unsigned)server.dbnum) {
+ serverLog(LL_WARNING,
+ "FATAL: Data file was created with a Redis "
+ "server configured to handle more than %d "
+ "databases. Exiting\n", server.dbnum);
+ exit(1);
+ }
+ db = rdb_loading_ctx->dbarray+dbid;
+ continue; /* Read next opcode. */
+ } else if (type == RDB_OPCODE_RESIZEDB) {
+ /* RESIZEDB: Hint about the size of the keys in the currently
+ * selected data base, in order to avoid useless rehashing. */
+ uint64_t db_size, expires_size;
+ if ((db_size = rdbLoadLen(rdb,NULL)) == RDB_LENERR)
+ goto eoferr;
+ if ((expires_size = rdbLoadLen(rdb,NULL)) == RDB_LENERR)
+ goto eoferr;
+ dictExpand(db->dict,db_size);
+ dictExpand(db->expires,expires_size);
+ continue; /* Read next opcode. */
+ } else if (type == RDB_OPCODE_AUX) {
+ /* AUX: generic string-string fields. Use to add state to RDB
+ * which is backward compatible. Implementations of RDB loading
+ * are required to skip AUX fields they don't understand.
+ *
+ * An AUX field is composed of two strings: key and value. */
+ robj *auxkey, *auxval;
+ if ((auxkey = rdbLoadStringObject(rdb)) == NULL) goto eoferr;
+ if ((auxval = rdbLoadStringObject(rdb)) == NULL) {
+ decrRefCount(auxkey);
+ goto eoferr;
+ }
+
+ if (((char*)auxkey->ptr)[0] == '%') {
+ /* All the fields with a name staring with '%' are considered
+ * information fields and are logged at startup with a log
+ * level of NOTICE. */
+ serverLog(LL_NOTICE,"RDB '%s': %s",
+ (char*)auxkey->ptr,
+ (char*)auxval->ptr);
+ } else if (!strcasecmp(auxkey->ptr,"repl-stream-db")) {
+ if (rsi) rsi->repl_stream_db = atoi(auxval->ptr);
+ } else if (!strcasecmp(auxkey->ptr,"repl-id")) {
+ if (rsi && sdslen(auxval->ptr) == CONFIG_RUN_ID_SIZE) {
+ memcpy(rsi->repl_id,auxval->ptr,CONFIG_RUN_ID_SIZE+1);
+ rsi->repl_id_is_set = 1;
+ }
+ } else if (!strcasecmp(auxkey->ptr,"repl-offset")) {
+ if (rsi) rsi->repl_offset = strtoll(auxval->ptr,NULL,10);
+ } else if (!strcasecmp(auxkey->ptr,"lua")) {
+ /* Won't load the script back in memory anymore. */
+ } else if (!strcasecmp(auxkey->ptr,"redis-ver")) {
+ serverLog(LL_NOTICE,"Loading RDB produced by version %s",
+ (char*)auxval->ptr);
+ } else if (!strcasecmp(auxkey->ptr,"ctime")) {
+ time_t age = time(NULL)-strtol(auxval->ptr,NULL,10);
+ if (age < 0) age = 0;
+ serverLog(LL_NOTICE,"RDB age %ld seconds",
+ (unsigned long) age);
+ } else if (!strcasecmp(auxkey->ptr,"used-mem")) {
+ long long usedmem = strtoll(auxval->ptr,NULL,10);
+ serverLog(LL_NOTICE,"RDB memory usage when created %.2f Mb",
+ (double) usedmem / (1024*1024));
+ server.loading_rdb_used_mem = usedmem;
+ } else if (!strcasecmp(auxkey->ptr,"aof-preamble")) {
+ long long haspreamble = strtoll(auxval->ptr,NULL,10);
+ if (haspreamble) serverLog(LL_NOTICE,"RDB has an AOF tail");
+ } else if (!strcasecmp(auxkey->ptr, "aof-base")) {
+ long long isbase = strtoll(auxval->ptr, NULL, 10);
+ if (isbase) serverLog(LL_NOTICE, "RDB is base AOF");
+ } else if (!strcasecmp(auxkey->ptr,"redis-bits")) {
+ /* Just ignored. */
+ } else {
+ /* We ignore fields we don't understand, as by AUX field
+ * contract. */
+ serverLog(LL_DEBUG,"Unrecognized RDB AUX field: '%s'",
+ (char*)auxkey->ptr);
+ }
+
+ decrRefCount(auxkey);
+ decrRefCount(auxval);
+ continue; /* Read type again. */
+ } else if (type == RDB_OPCODE_MODULE_AUX) {
+ /* Load module data that is not related to the Redis key space.
+ * Such data can be potentially be stored both before and after the
+ * RDB keys-values section. */
+ uint64_t moduleid = rdbLoadLen(rdb,NULL);
+ int when_opcode = rdbLoadLen(rdb,NULL);
+ int when = rdbLoadLen(rdb,NULL);
+ if (rioGetReadError(rdb)) goto eoferr;
+ if (when_opcode != RDB_MODULE_OPCODE_UINT) {
+ rdbReportReadError("bad when_opcode");
+ goto eoferr;
+ }
+ moduleType *mt = moduleTypeLookupModuleByID(moduleid);
+ char name[10];
+ moduleTypeNameByID(name,moduleid);
+
+ if (!rdbCheckMode && mt == NULL) {
+ /* Unknown module. */
+ serverLog(LL_WARNING,"The RDB file contains AUX module data I can't load: no matching module '%s'", name);
+ exit(1);
+ } else if (!rdbCheckMode && mt != NULL) {
+ if (!mt->aux_load) {
+ /* Module doesn't support AUX. */
+ serverLog(LL_WARNING,"The RDB file contains module AUX data, but the module '%s' doesn't seem to support it.", name);
+ exit(1);
+ }
+
+ RedisModuleIO io;
+ moduleInitIOContext(io,mt,rdb,NULL,-1);
+ /* Call the rdb_load method of the module providing the 10 bit
+ * encoding version in the lower 10 bits of the module ID. */
+ int rc = mt->aux_load(&io,moduleid&1023, when);
+ if (io.ctx) {
+ moduleFreeContext(io.ctx);
+ zfree(io.ctx);
+ }
+ if (rc != REDISMODULE_OK || io.error) {
+ moduleTypeNameByID(name,moduleid);
+ serverLog(LL_WARNING,"The RDB file contains module AUX data for the module type '%s', that the responsible module is not able to load. Check for modules log above for additional clues.", name);
+ goto eoferr;
+ }
+ uint64_t eof = rdbLoadLen(rdb,NULL);
+ if (eof != RDB_MODULE_OPCODE_EOF) {
+ serverLog(LL_WARNING,"The RDB file contains module AUX data for the module '%s' that is not terminated by the proper module value EOF marker", name);
+ goto eoferr;
+ }
+ continue;
+ } else {
+ /* RDB check mode. */
+ robj *aux = rdbLoadCheckModuleValue(rdb,name);
+ decrRefCount(aux);
+ continue; /* Read next opcode. */
+ }
+ } else if (type == RDB_OPCODE_FUNCTION_PRE_GA) {
+ rdbReportCorruptRDB("Pre-release function format not supported.");
+ exit(1);
+ } else if (type == RDB_OPCODE_FUNCTION2) {
+ sds err = NULL;
+ if (rdbFunctionLoad(rdb, rdbver, rdb_loading_ctx->functions_lib_ctx, rdbflags, &err) != C_OK) {
+ serverLog(LL_WARNING,"Failed loading library, %s", err);
+ sdsfree(err);
+ goto eoferr;
+ }
+ continue;
+ }
+
+ /* Read key */
+ if ((key = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL)) == NULL)
+ goto eoferr;
+ /* Read value */
+ val = rdbLoadObject(type,rdb,key,db->id,&error);
+
+ /* Check if the key already expired. This function is used when loading
+ * an RDB file from disk, either at startup, or when an RDB was
+ * received from the master. In the latter case, the master is
+ * responsible for key expiry. If we would expire keys here, the
+ * snapshot taken by the master may not be reflected on the slave.
+ * Similarly, if the base AOF is RDB format, we want to load all
+ * the keys they are, since the log of operations in the incr AOF
+ * is assumed to work in the exact keyspace state. */
+ if (val == NULL) {
+ /* Since we used to have bug that could lead to empty keys
+ * (See #8453), we rather not fail when empty key is encountered
+ * in an RDB file, instead we will silently discard it and
+ * continue loading. */
+ if (error == RDB_LOAD_ERR_EMPTY_KEY) {
+ if(empty_keys_skipped++ < 10)
+ serverLog(LL_NOTICE, "rdbLoadObject skipping empty key: %s", key);
+ sdsfree(key);
+ } else {
+ sdsfree(key);
+ goto eoferr;
+ }
+ } else if (iAmMaster() &&
+ !(rdbflags&RDBFLAGS_AOF_PREAMBLE) &&
+ expiretime != -1 && expiretime < now)
+ {
+ if (rdbflags & RDBFLAGS_FEED_REPL) {
+ /* Caller should have created replication backlog,
+ * and now this path only works when rebooting,
+ * so we don't have replicas yet. */
+ serverAssert(server.repl_backlog != NULL && listLength(server.slaves) == 0);
+ robj keyobj;
+ initStaticStringObject(keyobj,key);
+ robj *argv[2];
+ argv[0] = server.lazyfree_lazy_expire ? shared.unlink : shared.del;
+ argv[1] = &keyobj;
+ replicationFeedSlaves(server.slaves,dbid,argv,2);
+ }
+ sdsfree(key);
+ decrRefCount(val);
+ server.rdb_last_load_keys_expired++;
+ } else {
+ robj keyobj;
+ initStaticStringObject(keyobj,key);
+
+ /* Add the new object in the hash table */
+ int added = dbAddRDBLoad(db,key,val);
+ server.rdb_last_load_keys_loaded++;
+ if (!added) {
+ if (rdbflags & RDBFLAGS_ALLOW_DUP) {
+ /* This flag is useful for DEBUG RELOAD special modes.
+ * When it's set we allow new keys to replace the current
+ * keys with the same name. */
+ dbSyncDelete(db,&keyobj);
+ dbAddRDBLoad(db,key,val);
+ } else {
+ serverLog(LL_WARNING,
+ "RDB has duplicated key '%s' in DB %d",key,db->id);
+ serverPanic("Duplicated key found in RDB file");
+ }
+ }
+
+ /* Set the expire time if needed */
+ if (expiretime != -1) {
+ setExpire(NULL,db,&keyobj,expiretime);
+ }
+
+ /* Set usage information (for eviction). */
+ objectSetLRUOrLFU(val,lfu_freq,lru_idle,lru_clock,1000);
+
+ /* call key space notification on key loaded for modules only */
+ moduleNotifyKeyspaceEvent(NOTIFY_LOADED, "loaded", &keyobj, db->id);
+ }
+
+ /* Loading the database more slowly is useful in order to test
+ * certain edge cases. */
+ if (server.key_load_delay)
+ debugDelay(server.key_load_delay);
+
+ /* Reset the state that is key-specified and is populated by
+ * opcodes before the key, so that we start from scratch again. */
+ expiretime = -1;
+ lfu_freq = -1;
+ lru_idle = -1;
+ }
+ /* Verify the checksum if RDB version is >= 5 */
+ if (rdbver >= 5) {
+ uint64_t cksum, expected = rdb->cksum;
+
+ if (rioRead(rdb,&cksum,8) == 0) goto eoferr;
+ if (server.rdb_checksum && !server.skip_checksum_validation) {
+ memrev64ifbe(&cksum);
+ if (cksum == 0) {
+ serverLog(LL_NOTICE,"RDB file was saved with checksum disabled: no check performed.");
+ } else if (cksum != expected) {
+ serverLog(LL_WARNING,"Wrong RDB checksum expected: (%llx) but "
+ "got (%llx). Aborting now.",
+ (unsigned long long)expected,
+ (unsigned long long)cksum);
+ rdbReportCorruptRDB("RDB CRC error");
+ return C_ERR;
+ }
+ }
+ }
+
+ if (empty_keys_skipped) {
+ serverLog(LL_NOTICE,
+ "Done loading RDB, keys loaded: %lld, keys expired: %lld, empty keys skipped: %lld.",
+ server.rdb_last_load_keys_loaded, server.rdb_last_load_keys_expired, empty_keys_skipped);
+ } else {
+ serverLog(LL_NOTICE,
+ "Done loading RDB, keys loaded: %lld, keys expired: %lld.",
+ server.rdb_last_load_keys_loaded, server.rdb_last_load_keys_expired);
+ }
+ return C_OK;
+
+ /* Unexpected end of file is handled here calling rdbReportReadError():
+ * this will in turn either abort Redis in most cases, or if we are loading
+ * the RDB file from a socket during initial SYNC (diskless replica mode),
+ * we'll report the error to the caller, so that we can retry. */
+eoferr:
+ serverLog(LL_WARNING,
+ "Short read or OOM loading DB. Unrecoverable error, aborting now.");
+ rdbReportReadError("Unexpected EOF reading RDB file");
+ return C_ERR;
+}
+
+/* Like rdbLoadRio() but takes a filename instead of a rio stream. The
+ * filename is open for reading and a rio stream object created in order
+ * to do the actual loading. Moreover the ETA displayed in the INFO
+ * output is initialized and finalized.
+ *
+ * If you pass an 'rsi' structure initialized with RDB_SAVE_INFO_INIT, the
+ * loading code will fill the information fields in the structure. */
+int rdbLoad(char *filename, rdbSaveInfo *rsi, int rdbflags) {
+ FILE *fp;
+ rio rdb;
+ int retval;
+ struct stat sb;
+ int rdb_fd;
+
+ fp = fopen(filename, "r");
+ if (fp == NULL) {
+ if (errno == ENOENT) return RDB_NOT_EXIST;
+
+ serverLog(LL_WARNING,"Fatal error: can't open the RDB file %s for reading: %s", filename, strerror(errno));
+ return RDB_FAILED;
+ }
+
+ if (fstat(fileno(fp), &sb) == -1)
+ sb.st_size = 0;
+
+ startLoadingFile(sb.st_size, filename, rdbflags);
+ rioInitWithFile(&rdb,fp);
+
+ retval = rdbLoadRio(&rdb,rdbflags,rsi);
+
+ fclose(fp);
+ stopLoading(retval==C_OK);
+ /* Reclaim the cache backed by rdb */
+ if (retval == C_OK && !(rdbflags & RDBFLAGS_KEEP_CACHE)) {
+ /* TODO: maybe we could combine the fopen and open into one in the future */
+ rdb_fd = open(filename, O_RDONLY);
+ if (rdb_fd > 0) bioCreateCloseJob(rdb_fd, 0, 1);
+ }
+ return (retval==C_OK) ? RDB_OK : RDB_FAILED;
+}
+
+/* A background saving child (BGSAVE) terminated its work. Handle this.
+ * This function covers the case of actual BGSAVEs. */
+static void backgroundSaveDoneHandlerDisk(int exitcode, int bysignal) {
+ if (!bysignal && exitcode == 0) {
+ serverLog(LL_NOTICE,
+ "Background saving terminated with success");
+ server.dirty = server.dirty - server.dirty_before_bgsave;
+ server.lastsave = time(NULL);
+ server.lastbgsave_status = C_OK;
+ } else if (!bysignal && exitcode != 0) {
+ serverLog(LL_WARNING, "Background saving error");
+ server.lastbgsave_status = C_ERR;
+ } else {
+ mstime_t latency;
+
+ serverLog(LL_WARNING,
+ "Background saving terminated by signal %d", bysignal);
+ latencyStartMonitor(latency);
+ rdbRemoveTempFile(server.child_pid, 0);
+ latencyEndMonitor(latency);
+ latencyAddSampleIfNeeded("rdb-unlink-temp-file",latency);
+ /* SIGUSR1 is whitelisted, so we have a way to kill a child without
+ * triggering an error condition. */
+ if (bysignal != SIGUSR1)
+ server.lastbgsave_status = C_ERR;
+ }
+}
+
+/* A background saving child (BGSAVE) terminated its work. Handle this.
+ * This function covers the case of RDB -> Slaves socket transfers for
+ * diskless replication. */
+static void backgroundSaveDoneHandlerSocket(int exitcode, int bysignal) {
+ if (!bysignal && exitcode == 0) {
+ serverLog(LL_NOTICE,
+ "Background RDB transfer terminated with success");
+ } else if (!bysignal && exitcode != 0) {
+ serverLog(LL_WARNING, "Background transfer error");
+ } else {
+ serverLog(LL_WARNING,
+ "Background transfer terminated by signal %d", bysignal);
+ }
+ if (server.rdb_child_exit_pipe!=-1)
+ close(server.rdb_child_exit_pipe);
+ aeDeleteFileEvent(server.el, server.rdb_pipe_read, AE_READABLE);
+ close(server.rdb_pipe_read);
+ server.rdb_child_exit_pipe = -1;
+ server.rdb_pipe_read = -1;
+ zfree(server.rdb_pipe_conns);
+ server.rdb_pipe_conns = NULL;
+ server.rdb_pipe_numconns = 0;
+ server.rdb_pipe_numconns_writing = 0;
+ zfree(server.rdb_pipe_buff);
+ server.rdb_pipe_buff = NULL;
+ server.rdb_pipe_bufflen = 0;
+}
+
+/* When a background RDB saving/transfer terminates, call the right handler. */
+void backgroundSaveDoneHandler(int exitcode, int bysignal) {
+ int type = server.rdb_child_type;
+ switch(server.rdb_child_type) {
+ case RDB_CHILD_TYPE_DISK:
+ backgroundSaveDoneHandlerDisk(exitcode,bysignal);
+ break;
+ case RDB_CHILD_TYPE_SOCKET:
+ backgroundSaveDoneHandlerSocket(exitcode,bysignal);
+ break;
+ default:
+ serverPanic("Unknown RDB child type.");
+ break;
+ }
+
+ server.rdb_child_type = RDB_CHILD_TYPE_NONE;
+ server.rdb_save_time_last = time(NULL)-server.rdb_save_time_start;
+ server.rdb_save_time_start = -1;
+ /* Possibly there are slaves waiting for a BGSAVE in order to be served
+ * (the first stage of SYNC is a bulk transfer of dump.rdb) */
+ updateSlavesWaitingBgsave((!bysignal && exitcode == 0) ? C_OK : C_ERR, type);
+}
+
+/* Kill the RDB saving child using SIGUSR1 (so that the parent will know
+ * the child did not exit for an error, but because we wanted), and performs
+ * the cleanup needed. */
+void killRDBChild(void) {
+ kill(server.child_pid, SIGUSR1);
+ /* Because we are not using here waitpid (like we have in killAppendOnlyChild
+ * and TerminateModuleForkChild), all the cleanup operations is done by
+ * checkChildrenDone, that later will find that the process killed.
+ * This includes:
+ * - resetChildState
+ * - rdbRemoveTempFile */
+}
+
+/* Spawn an RDB child that writes the RDB to the sockets of the slaves
+ * that are currently in SLAVE_STATE_WAIT_BGSAVE_START state. */
+int rdbSaveToSlavesSockets(int req, rdbSaveInfo *rsi) {
+ listNode *ln;
+ listIter li;
+ pid_t childpid;
+ int pipefds[2], rdb_pipe_write, safe_to_exit_pipe;
+
+ if (hasActiveChildProcess()) return C_ERR;
+
+ /* Even if the previous fork child exited, don't start a new one until we
+ * drained the pipe. */
+ if (server.rdb_pipe_conns) return C_ERR;
+
+ /* Before to fork, create a pipe that is used to transfer the rdb bytes to
+ * the parent, we can't let it write directly to the sockets, since in case
+ * of TLS we must let the parent handle a continuous TLS state when the
+ * child terminates and parent takes over. */
+ if (anetPipe(pipefds, O_NONBLOCK, 0) == -1) return C_ERR;
+ server.rdb_pipe_read = pipefds[0]; /* read end */
+ rdb_pipe_write = pipefds[1]; /* write end */
+
+ /* create another pipe that is used by the parent to signal to the child
+ * that it can exit. */
+ if (anetPipe(pipefds, 0, 0) == -1) {
+ close(rdb_pipe_write);
+ close(server.rdb_pipe_read);
+ return C_ERR;
+ }
+ safe_to_exit_pipe = pipefds[0]; /* read end */
+ server.rdb_child_exit_pipe = pipefds[1]; /* write end */
+
+ /* Collect the connections of the replicas we want to transfer
+ * the RDB to, which are i WAIT_BGSAVE_START state. */
+ server.rdb_pipe_conns = zmalloc(sizeof(connection *)*listLength(server.slaves));
+ server.rdb_pipe_numconns = 0;
+ server.rdb_pipe_numconns_writing = 0;
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ client *slave = ln->value;
+ if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) {
+ /* Check slave has the exact requirements */
+ if (slave->slave_req != req)
+ continue;
+ server.rdb_pipe_conns[server.rdb_pipe_numconns++] = slave->conn;
+ replicationSetupSlaveForFullResync(slave,getPsyncInitialOffset());
+ }
+ }
+
+ /* Create the child process. */
+ if ((childpid = redisFork(CHILD_TYPE_RDB)) == 0) {
+ /* Child */
+ int retval, dummy;
+ rio rdb;
+
+ rioInitWithFd(&rdb,rdb_pipe_write);
+
+ /* Close the reading part, so that if the parent crashes, the child will
+ * get a write error and exit. */
+ close(server.rdb_pipe_read);
+
+ redisSetProcTitle("redis-rdb-to-slaves");
+ redisSetCpuAffinity(server.bgsave_cpulist);
+
+ retval = rdbSaveRioWithEOFMark(req,&rdb,NULL,rsi);
+ if (retval == C_OK && rioFlush(&rdb) == 0)
+ retval = C_ERR;
+
+ if (retval == C_OK) {
+ sendChildCowInfo(CHILD_INFO_TYPE_RDB_COW_SIZE, "RDB");
+ }
+
+ rioFreeFd(&rdb);
+ /* wake up the reader, tell it we're done. */
+ close(rdb_pipe_write);
+ close(server.rdb_child_exit_pipe); /* close write end so that we can detect the close on the parent. */
+ /* hold exit until the parent tells us it's safe. we're not expecting
+ * to read anything, just get the error when the pipe is closed. */
+ dummy = read(safe_to_exit_pipe, pipefds, 1);
+ UNUSED(dummy);
+ exitFromChild((retval == C_OK) ? 0 : 1);
+ } else {
+ /* Parent */
+ if (childpid == -1) {
+ serverLog(LL_WARNING,"Can't save in background: fork: %s",
+ strerror(errno));
+
+ /* Undo the state change. The caller will perform cleanup on
+ * all the slaves in BGSAVE_START state, but an early call to
+ * replicationSetupSlaveForFullResync() turned it into BGSAVE_END */
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ client *slave = ln->value;
+ if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_END) {
+ slave->replstate = SLAVE_STATE_WAIT_BGSAVE_START;
+ }
+ }
+ close(rdb_pipe_write);
+ close(server.rdb_pipe_read);
+ zfree(server.rdb_pipe_conns);
+ server.rdb_pipe_conns = NULL;
+ server.rdb_pipe_numconns = 0;
+ server.rdb_pipe_numconns_writing = 0;
+ } else {
+ serverLog(LL_NOTICE,"Background RDB transfer started by pid %ld",
+ (long) childpid);
+ server.rdb_save_time_start = time(NULL);
+ server.rdb_child_type = RDB_CHILD_TYPE_SOCKET;
+ close(rdb_pipe_write); /* close write in parent so that it can detect the close on the child. */
+ if (aeCreateFileEvent(server.el, server.rdb_pipe_read, AE_READABLE, rdbPipeReadHandler,NULL) == AE_ERR) {
+ serverPanic("Unrecoverable error creating server.rdb_pipe_read file event.");
+ }
+ }
+ close(safe_to_exit_pipe);
+ return (childpid == -1) ? C_ERR : C_OK;
+ }
+ return C_OK; /* Unreached. */
+}
+
+void saveCommand(client *c) {
+ if (server.child_type == CHILD_TYPE_RDB) {
+ addReplyError(c,"Background save already in progress");
+ return;
+ }
+
+ server.stat_rdb_saves++;
+
+ rdbSaveInfo rsi, *rsiptr;
+ rsiptr = rdbPopulateSaveInfo(&rsi);
+ if (rdbSave(SLAVE_REQ_NONE,server.rdb_filename,rsiptr,RDBFLAGS_NONE) == C_OK) {
+ addReply(c,shared.ok);
+ } else {
+ addReplyErrorObject(c,shared.err);
+ }
+}
+
+/* BGSAVE [SCHEDULE] */
+void bgsaveCommand(client *c) {
+ int schedule = 0;
+
+ /* The SCHEDULE option changes the behavior of BGSAVE when an AOF rewrite
+ * is in progress. Instead of returning an error a BGSAVE gets scheduled. */
+ if (c->argc > 1) {
+ if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"schedule")) {
+ schedule = 1;
+ } else {
+ addReplyErrorObject(c,shared.syntaxerr);
+ return;
+ }
+ }
+
+ rdbSaveInfo rsi, *rsiptr;
+ rsiptr = rdbPopulateSaveInfo(&rsi);
+
+ if (server.child_type == CHILD_TYPE_RDB) {
+ addReplyError(c,"Background save already in progress");
+ } else if (hasActiveChildProcess() || server.in_exec) {
+ if (schedule || server.in_exec) {
+ server.rdb_bgsave_scheduled = 1;
+ addReplyStatus(c,"Background saving scheduled");
+ } else {
+ addReplyError(c,
+ "Another child process is active (AOF?): can't BGSAVE right now. "
+ "Use BGSAVE SCHEDULE in order to schedule a BGSAVE whenever "
+ "possible.");
+ }
+ } else if (rdbSaveBackground(SLAVE_REQ_NONE,server.rdb_filename,rsiptr,RDBFLAGS_NONE) == C_OK) {
+ addReplyStatus(c,"Background saving started");
+ } else {
+ addReplyErrorObject(c,shared.err);
+ }
+}
+
+/* Populate the rdbSaveInfo structure used to persist the replication
+ * information inside the RDB file. Currently the structure explicitly
+ * contains just the currently selected DB from the master stream, however
+ * if the rdbSave*() family functions receive a NULL rsi structure also
+ * the Replication ID/offset is not saved. The function populates 'rsi'
+ * that is normally stack-allocated in the caller, returns the populated
+ * pointer if the instance has a valid master client, otherwise NULL
+ * is returned, and the RDB saving will not persist any replication related
+ * information. */
+rdbSaveInfo *rdbPopulateSaveInfo(rdbSaveInfo *rsi) {
+ rdbSaveInfo rsi_init = RDB_SAVE_INFO_INIT;
+ *rsi = rsi_init;
+
+ /* If the instance is a master, we can populate the replication info
+ * only when repl_backlog is not NULL. If the repl_backlog is NULL,
+ * it means that the instance isn't in any replication chains. In this
+ * scenario the replication info is useless, because when a slave
+ * connects to us, the NULL repl_backlog will trigger a full
+ * synchronization, at the same time we will use a new replid and clear
+ * replid2. */
+ if (!server.masterhost && server.repl_backlog) {
+ /* Note that when server.slaveseldb is -1, it means that this master
+ * didn't apply any write commands after a full synchronization.
+ * So we can let repl_stream_db be 0, this allows a restarted slave
+ * to reload replication ID/offset, it's safe because the next write
+ * command must generate a SELECT statement. */
+ rsi->repl_stream_db = server.slaveseldb == -1 ? 0 : server.slaveseldb;
+ return rsi;
+ }
+
+ /* If the instance is a slave we need a connected master
+ * in order to fetch the currently selected DB. */
+ if (server.master) {
+ rsi->repl_stream_db = server.master->db->id;
+ return rsi;
+ }
+
+ /* If we have a cached master we can use it in order to populate the
+ * replication selected DB info inside the RDB file: the slave can
+ * increment the master_repl_offset only from data arriving from the
+ * master, so if we are disconnected the offset in the cached master
+ * is valid. */
+ if (server.cached_master) {
+ rsi->repl_stream_db = server.cached_master->db->id;
+ return rsi;
+ }
+ return NULL;
+}